兄弟给了个案例,这是在设计开发环节很可能被忽略的一个问题,如下测试表,c1字段按照char存储,c2字段按照byte存储,
create table test(c1 varchar2(10 char), c2 varchar2(10 byte));
复制
现在要扩容字段,以下两种操作,有什么区别?
alter table test modify c1 varchar2(20);
alter table test modify c2 varchar2(30);
复制
《NLS_LENGTH_SEMANTICS参数引申的问题》文章介绍了字符串类型字段按照char和byte存储的形式,默认按照byte存储,但是上述测试表,一个是按照char,一个是按照byte,其实问题可以翻译成,两个扩容操作,一个是char->byte,一个是byte->byte,有什么区别?
从语义上讲,例如UTF-8字符集,1个中文占用三个字节,GBK字符集,1个中文占用两个字节,如果按照存储中文来计算,
1. UTF-8字符集,
原始:c1 varchar2(10 char)可以存储10个中文,占30个字节。
c2 varchar2(10 byte)可以存储10/3=3个中文,占9个字节。
扩容:c1 varchar2(20)是20个字节,可以存储20/3=6个中文。
c2 varchar2(30)是30个字节,可以存储30/3=10个中文。
2. GBK字符集,
原始:c1 varchar2(10 char)可以存储10个中文,占20个字节。
c2 varchar2(10 byte)可以存储10/2=5个中文,占10个字节。
扩容:c1 varchar2(20)是20个字节,可以存储20/2=10个中文。
c2 varchar2(30)是30个字节,可以存储30/2=15个中文。
因此,不同字符集,char和byte之间的转换,存储中文字符的个数,有可能存储更多,有可能存储更少,还可能是相同的,取决于char和byte的换算关系以及扩容的数值。如果只存储英文字符,从存储容量来说,几乎无影响。
从实际运行上,char->byte和byte->byte,还是有些区别的,主要体现在对性能的影响。
如果是char->byte,除了扩容长度,他还设计类型上的转换,从逻辑上讲,需要判断当前字段存储的值,在进行转换后,是否出现超常的情况,
alter table test modify c1 varchar2(20);
复制
这是上述操作对应的10046,为了扩容字段,在数据字典层面,执行了35条SQL,
select count(*) from ind$ i where i.bo#=:1
select /*+ rule */ bucket_cnt, row_cnt, cache_cnt, null_cnt, timestamp#, sample_size, minimum, maximum, distcnt, lowval, hival, density, col#, spare1, spare2, avgcln from hist_head$ where obj#=:1 and intcol#=:2
select /*+ rule */ bucket, endpoint, col#, epvalue from histgrm$ where obj#=:1 and intcol#=:2 and row#=:3 order by bucket
select obj#,type#,ctime,mtime,stime, status, dataobj#, flags, oid$, spare1, spare2 from obj$ where owner#=:1 and name=:2 and namespace=:3 and remoteowner is null and linkname is null and subname is null
select t.ts#,t.file#,t.block#,nvl(t.bobj#,0),nvl(t.tab#,0),t.intcols,nvl(t.clucols,0),t.audit$,t.flags,t.pctfree$,t.pctused$,t.initrans,t.maxtrans,t.rowcnt,t.blkcnt,t.empcnt,t.avgspc,t.chncnt,t.avgrln,t.analyzetime,t.samplesize,t.cols,t.property,nvl(t.degree,1),nvl(t.instances,1),t.avgspc_flb,t.flbcnt,t.kernelcols,nvl(t.trigflag, 0),nvl(t.spare1,0),nvl(t.spare2,0),t.spare4,t.spare6,ts.cachedblk,ts.cachehit,ts.logicalread from tab$ t, tab_stats$ ts where t.obj#= :1 and t.obj# = ts.obj# (+)
select o.owner#,o.name,o.namespace,o.remoteowner,o.linkname,o.subname from obj$ o where o.obj#=:1
select col#, grantee#, privilege#,max(mod(nvl(option$,0),2)) from objauth$ where obj#=:1 and col# is not null group by privilege#, col#, grantee# order by col#, grantee#
select grantee#,privilege#,nvl(col#,0),max(mod(nvl(option$,0),2))from objauth$ where obj#=:1 group by grantee#,privilege#,nvl(col#,0) order by grantee#
select con#,obj#,rcon#,enabled,nvl(defer,0),spare2,spare3 from cdef$ where robj#=:1
select con#,type#,condlength,intcols,robj#,rcon#,match#,refact,nvl(enabled,0),rowid,cols,nvl(defer,0),mtime,nvl(spare1,0),spare2,spare3 from cdef$ where obj#=:1
select intcol#,nvl(pos#,0),col#,nvl(spare1,0) from ccol$ where con#=:1
select count(*) from ( select 1 from ind$ i, jijoin$ j where i.bo# = tab1obj# and j.obj#=i.obj# and j.tab2obj#=:1 union all select 1 from ind$ i, jijoin$ j where i.bo# = tab2obj# and j.obj#=i.obj# and j.tab1obj#=:1)
select nvl((decode(i.type#, 2, :1, 0) + sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0), i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0) from icol$ ic,col$ c,ind$ i where i.bo# = :2 and i.obj# in(select i.obj# from ind$ i,icol$ ic where i.bo#=:2 and i.obj#=ic.obj# and ic.intcol#=:3)and i.obj# = ic.obj# and ic.bo# = :2 and ic.intcol# = c.intcol# and c.obj# = :2 group by i.obj#, i.type#, i.initrans, i.pctthres$
select nvl((decode(i.type#, 2, :1, 0) + sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0), i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0), i.bo# from ind$ i, jijoin$ j, icol$ ic, col$ c where i.obj# = j.obj# and i.bo# = j.tab1obj# and i.obj# = ic.obj# and c.obj# = ic.bo# and c.col# = ic.col# and j.tab2obj#=: 2 group by i.obj#, i.type#, i.initrans, i.pctthres$, i.bo#
select 1 from icol$ c,ind$ i where i.bo#=:1 and i.type# = 9 and i.obj#=c.obj# and c.intcol# in (select c1.intcol# from col$ c1 where c1.obj#=:1 and c1.col#=:2)
SELECT /* OPT_DYN_SAMP */ /*+ ALL_ROWS IGNORE_WHERE_CLAUSE NO_PARALLEL(SAMPLESUB) opt_param('parallel_execution_enabled', 'false') NO_PARALLEL_INDEX(SAMPLESUB) NO_SQL_TUNE */ NVL(SUM(C1),0), NVL(SUM(C2),0) FROM (SELECT /*+ IGNORE_WHERE_CLAUSE NO_PARALLEL("TEST") FULL("TEST") NO_PARALLEL_INDEX("TEST") */ 1 AS C1, CASE WHEN LENGTHB("TEST"."C1")>20 THEN 1 ELSE 0 END AS C2 FROM "BISAL"."TEST" "TEST") SAMPLESUB
select /*+ first_rows */ 1 from "BISAL"."TEST" where LENGTHB("C1") > 20
select 1 from icoldep$ i where i.bo#= :1 and i.intcol#= :2 and i.obj# NOT IN (select j.obj# from jijoin$ j)
select 1 from partcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and c.intcol#=:2
select 1 from subpartcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and c.intcol#=:2
delete from idl_ub1$ where obj#=:1
delete from idl_char$ where obj#=:1
delete from idl_ub2$ where obj#=:1
delete from idl_sb4$ where obj#=:1
delete from error$ where obj#=:1
select o.owner#, o.name, o.namespace, o.obj#, d.d_timestamp, nvl(d.property,0), o.type#, o.subname, d.d_attrs from dependency$ d, obj$ o where d.p_obj#=:1 and (d.p_timestamp=nvl(:2,d.p_timestamp) or d.property=2) and o.owner#=nvl(:3,o.owner#) and d.d_obj#=o.obj# order by o.obj#
update dependency$ set p_timestamp=:1, p_obj#=:2 where d_obj#=:3 and p_obj#=:4
update dependency$ set d_reason = :1 where d_obj# = :2 and p_obj# = :3
update dependency$ set d_attrs = :1 where d_obj# = :2 and p_obj# = :3
delete from superobj$ where subobj# = :1
update tab$ set ts#=:2,file#=:3,block#=:4,bobj#=decode(:5,0,null,:5),tab#=decode(:6,0,null,:6),intcols=:7,kernelcols=:8,clucols=decode(:9,0,null,:9),audit$=:10,flags=:11,pctfree$=:12,pctused$=:13,initrans=:14,maxtrans=:15,rowcnt=:16,blkcnt=:17,empcnt=:18,avgspc=:19,chncnt=:20,avgrln=:21,analyzetime=:22,samplesize=:23,cols=:24,property=:25,degree=decode(:26,1,null,:26),instances=decode(:27,1,null,:27),dataobj#=:28,avgspc_flb=:29,flbcnt=:30,trigflag=:31,spare1=:32,spare2=decode(:33,0,null,:33),spare4=:34,spare6=:35 where obj#=:1
update col$ set intcol#=:3,segcol#=:4,type#=:5,length=:6,precision#=decode(:5,182/*DTYIYM*/,:7,183/*DTYIDS*/,:7,decode(:7,0,null,:7)),scale=decode(:5,2,decode(:8,-127/*MAXSB1MINAL*/,null,:8),178,:8,179,:8,180,:8,181,:8,182,:8,183,:8,231,:8,null),null$=:9,fixedstorage=:10,segcollength=:11,col#=:12,property=:13,charsetid=:14,charsetform=:15,spare1=:16,spare2=:17,spare3=:18,deflength=decode(:19,0,null,:19),default$=:20 where obj#=:1 and name=:2
update obj$ set obj#=:4, type#=:5,ctime=:6,mtime=:7,stime=:8,status=:9,dataobj#=:10,flags=:11,oid$=:12,spare1=:13, spare2=:14 where owner#=:1 and name=:2 and namespace=:3 and remoteowner is null and linkname is null and subname is null
select count(FA#) from SYS_FBA_TRACKEDTABLES where OBJ# = :1 and bitand(FLAGS, :2)=0
LOCK TABLE "TEST" IN EXCLUSIVE MODE NOWAIT
复制
其中值得关注的,有如下几条,这条SQL会根据c1字段的字节长度是否超过20来设置1或者0,
SELECT /* OPT_DYN_SAMP */
/*+ ALL_ROWS IGNORE_WHERE_CLAUSE
NO_PARALLEL(SAMPLESUB)
opt_param('parallel_execution_enabled', 'false')
NO_PARALLEL_INDEX(SAMPLESUB) NO_SQL_TUNE */
NVL(SUM(C1),0), NVL(SUM(C2),0)
FROM (SELECT /*+ IGNOR E_WHERE_CLAUSE NO_PARALLEL("TEST")
FULL("TEST") NO_PARALLEL_INDEX("TEST") */ 1 AS C1,
CASE WHEN LENGTHB("TEST"."C1")>20
THEN 1 ELSE 0 END AS C2 FROM "BISAL"."TEST" "TEST") SAMPLESUB
复制
这条SQL应该是找到所有c1的字节长度超过20个记录,
select /*+ first_rows */ 1 from "BISAL"."TEST" where LENGTHB("C1") > 20
复制
以上两个操作,都是用全表扫描,而且还会显式LOCK这张表,
LOCK TABLE "TEST" IN EXCLUSIVE MODE NOWAIT
复制
另外,还需要更新tab$、col$、obj$这些数据字典,
update tab$ set ts#=:2,file#=:3,block#=:4,bobj#=decode(:5,0,null,:5),tab#=decode(:6,0,null,:6),intcols=:7,kernelcols=:8,clucols=decode(:9,0,null,:9),audit$=:10,flags=:11,pctfree$=:12,pctused$=:13,initrans=:14,maxtrans=:15,rowcnt=:16,blkcnt=:17,empcnt=:18,avgspc=:19,chncnt=:20,avgrln=:21,analyzetime=:22,samplesize=:23,cols=:24,property=:25,degree=decode(:26,1,null,:26),instances=decode(:27,1,null,:27),dataobj#=:28,avgspc_flb=:29,flbcnt=:30,trigflag=:31,spare1=:32,spare2=decode(:33,0,null,:33),spare4=:34,spare6=:35 where obj#=:1
update col$ set intcol#=:3,segcol#=:4,type#=:5,length=:6,precision#=decode(:5,182/*DTYIYM*/,:7,183/*DTYIDS*/,:7,decode(:7,0,null,:7)),scale=decode(:5,2,decode(:8,-127/*MAXSB1MINAL*/,null,:8),178,:8,179,:8,180,:8,181,:8,182,:8,183,:8,231,:8,null),null$=:9,fixedstorage=:10,segcollength=:11,col#=:12,property=:13,charsetid=:14,charsetform=:15,spare1=:16,spare2=:17,spare3=:18,deflength=decode(:19,0,null,:19),default$=:20 where obj#=:1 and name=:2
update obj$ set obj#=:4, type#=:5,ctime=:6,mtime=:7,stime=:8,status=:9,dataobj#=:10,flags=:11,oid$=:12,spare1=:13, spare2=:14 where owner#=:1 and name=:2 and namespace=:3 and remoteowner is null and linkname is null and subname is null
复制
我们看下char->char,从逻辑上讲,他只是进行长度的扩容,不涉及类型,
alter table test modify c2 varchar2(30);
复制
10046显示执行的SQL明显比char->byte要少,而且不存在对原表的任何访问,不需要更新tab$、col$、obj$等数据字典,不需要LOCK,
select count(*) from ind$ i where i.bo#=:1
select count(*) from ( select 1 from ind$ i, jijoin$ j where i.bo# = tab1obj# and j.obj#=i.obj# and j.tab2obj#=:1 union all select 1 from ind$ i, jijoin$ j where i.bo# = tab2obj# and j.obj#=i.obj# and j.tab1obj#=:1)
select nvl((decode(i.type#, 2, :1, 0) + sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0), i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0) from icol$ ic,col$ c,ind$ i where i.bo# = :2 and i.obj# in(select i.obj# from ind$ i,icol$ ic where i.bo#=:2 and i.obj#=ic.obj# and ic.intcol#=:3)and i.obj# = ic.obj# and ic.bo# = :2 and ic.intcol# = c.intcol# and c.obj# = :2 group by i.obj#, i.type#, i.initrans, i.pctthres$
select nvl((decode(i.type#, 2, :1, 0) + sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0), i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0), i.bo# from ind$ i, jijoin$ j, icol$ ic, col$ c where i.obj# = j.obj# and i.bo# = j.tab1obj# and i.obj# = ic.obj# and c.obj# = ic.bo# and c.col# = ic.col# and j.tab2obj#=: 2 group by i.obj#, i.type#, i.initrans, i.pctthres$, i.bo#
select 1 from icol$ c,ind$ i where i.bo#=:1 and i.type# = 9 and i.obj#=c.obj# and c.intcol# in (select c1.intcol# from col$ c1 where c1.obj#=:1 and c1.col#=:2)
select 1 from icoldep$ i where i.bo#= :1 and i.intcol#= :2 and i.obj# NOT IN (select j.obj# from jijoin$ j)
select 1 from partcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and c.intcol#=:2
select 1 from subpartcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and c.intcol#=:2
delete from idl_ub1$ where obj#=:1
delete from idl_ub2$ where obj#=:1
delete from idl_sb4$ where obj#=:1
delete from error$ where obj#=:1
select o.owner#, o.name, o.namespace, o.obj#, d.d_timestamp, nvl(d.property,0), o.type#, o.subname, d.d_attrs from dependency$ d, obj$ o where d.p_obj#=:1 and (d.p_timestamp=nvl(:2,d.p_timestamp) or d.property=2) and o.owner#=nvl(:3,o.owner#) and d.d_obj#=o.obj# order by o.obj#
update dependency$ set p_timestamp=:1, p_obj#=:2 where d_obj#=:3 and p_obj#=:4
update dependency$ set d_reason = :1 where d_obj# = :2 and p_obj# = :3
update dependency$ set d_attrs = :1 where d_obj# = :2 and p_obj# = :3
delete from superobj$ where subobj# = :1
delete from tab_stats$ where obj#=:1
复制
char->byte或者byte->char,数据库需要做更多的操作,还需要进行数据的校验,如果对大表进行这种转换,从执行时间上,就会更久,而且LOCK表的操作,就会影响表的事务并发,进而对系统产生一定的影响。而单纯的char->char或者byte->byte的扩容,仅需要更新一些数据字典,不存在LOCK的需求,不需要读取原表,执行时间上,自然就很快。
归根结底,我认为实际场景中不太可能出现主动char->byte或者byte->char的场景,大多情况下,可能都是“误伤”,例如某个库,初始参数NLS_LENGTH_SEMANTICS设置成了按照byte存储,迁移到的新库,NLS_LENGTH_SEMANTICS设置成了按照char存储,这就涉及到转换。或者测试环境NLS_LENGTH_SEMANTICS按照char存储,生产环境NLS_LENGTH_SEMANTICS按照byte存储,准备上线SQL就忽略了字段应该带着的char或者byte,选择使用默认的,这就很容易“误伤”,生产上执行了,还可能都不知道他的影响,系统并发受到影响的同时,字段存储容量很可能不满足需求。归根结底,这些都是设计开发规范不严谨导致的,前提还是得知道他的原理,只是刻意复制,很可能导致其他的隐患问题。
近期更新的文章:
《小白学习MySQL - only_full_group_by的校验规则》
文章分类和索引: