Oracle字符串类型扩容隐患

bisal的个人杂货铺 2021-08-05

762

兄弟给了个案例，这是在设计开发环节很可能被忽略的一个问题，如下测试表，c1字段按照char存储，c2字段按照byte存储，

create table test(c1 varchar2(10 char), c2 varchar2(10 byte));
复制

现在要扩容字段，以下两种操作，有什么区别？

alter table test modify c1 varchar2(20);
alter table test modify c2 varchar2(30);
复制

《NLS_LENGTH_SEMANTICS参数引申的问题》文章介绍了字符串类型字段按照char和byte存储的形式，默认按照byte存储，但是上述测试表，一个是按照char，一个是按照byte，其实问题可以翻译成，两个扩容操作，一个是char->byte，一个是byte->byte，有什么区别？

从语义上讲，例如UTF-8字符集，1个中文占用三个字节，GBK字符集，1个中文占用两个字节，如果按照存储中文来计算，

1. UTF-8字符集，

原始：c1 varchar2(10 char)可以存储10个中文，占30个字节。

c2 varchar2(10 byte)可以存储10/3=3个中文，占9个字节。

扩容：c1 varchar2(20)是20个字节，可以存储20/3=6个中文。

c2 varchar2(30)是30个字节，可以存储30/3=10个中文。

2. GBK字符集，

原始：c1 varchar2(10 char)可以存储10个中文，占20个字节。

c2 varchar2(10 byte)可以存储10/2=5个中文，占10个字节。

扩容：c1 varchar2(20)是20个字节，可以存储20/2=10个中文。

c2 varchar2(30)是30个字节，可以存储30/2=15个中文。

因此，不同字符集，char和byte之间的转换，存储中文字符的个数，有可能存储更多，有可能存储更少，还可能是相同的，取决于char和byte的换算关系以及扩容的数值。如果只存储英文字符，从存储容量来说，几乎无影响。

从实际运行上，char->byte和byte->byte，还是有些区别的，主要体现在对性能的影响。

如果是char->byte，除了扩容长度，他还设计类型上的转换，从逻辑上讲，需要判断当前字段存储的值，在进行转换后，是否出现超常的情况，

alter table test modify c1 varchar2(20);
复制

这是上述操作对应的10046，为了扩容字段，在数据字典层面，执行了35条SQL，

select count(*) from ind$ i where i.bo#=:1
select /*+ rule */ bucket_cnt, row_cnt, cache_cnt, null_cnt, timestamp#, sample_size, minimum, maximum, distcnt, lowval, hival, density, col#, spare1, spare2, avgcln from hist_head$ where obj#=:1 and intcol#=:2
select /*+ rule */ bucket, endpoint, col#, epvalue from histgrm$ where obj#=:1 and intcol#=:2 and row#=:3 order by bucket
select obj#,type#,ctime,mtime,stime, status, dataobj#, flags, oid$, spare1, spare2 from obj$ where owner#=:1 and name=:2 and namespace=:3 and remoteowner is null and linkname is null and subname is null
select t.ts#,t.file#,t.block#,nvl(t.bobj#,0),nvl(t.tab#,0),t.intcols,nvl(t.clucols,0),t.audit$,t.flags,t.pctfree$,t.pctused$,t.initrans,t.maxtrans,t.rowcnt,t.blkcnt,t.empcnt,t.avgspc,t.chncnt,t.avgrln,t.analyzetime,t.samplesize,t.cols,t.property,nvl(t.degree,1),nvl(t.instances,1),t.avgspc_flb,t.flbcnt,t.kernelcols,nvl(t.trigflag, 0),nvl(t.spare1,0),nvl(t.spare2,0),t.spare4,t.spare6,ts.cachedblk,ts.cachehit,ts.logicalread from tab$ t, tab_stats$ ts where t.obj#= :1 and t.obj# = ts.obj# (+)
select o.owner#,o.name,o.namespace,o.remoteowner,o.linkname,o.subname from obj$ o where o.obj#=:1
select col#, grantee#, privilege#,max(mod(nvl(option$,0),2)) from objauth$ where obj#=:1 and col# is not null group by privilege#, col#, grantee# order by col#, grantee#
select grantee#,privilege#,nvl(col#,0),max(mod(nvl(option$,0),2))from objauth$ where obj#=:1 group by grantee#,privilege#,nvl(col#,0) order by grantee#
select con#,obj#,rcon#,enabled,nvl(defer,0),spare2,spare3 from cdef$ where robj#=:1
select con#,type#,condlength,intcols,robj#,rcon#,match#,refact,nvl(enabled,0),rowid,cols,nvl(defer,0),mtime,nvl(spare1,0),spare2,spare3 from cdef$ where obj#=:1
select intcol#,nvl(pos#,0),col#,nvl(spare1,0) from ccol$ where con#=:1
select count(*) from (                     select 1 from ind$ i, jijoin$ j                      where i.bo# = tab1obj# and j.obj#=i.obj# and                            j.tab2obj#=:1                     union all                     select 1 from ind$ i, jijoin$ j                      where i.bo# = tab2obj# and j.obj#=i.obj# and                            j.tab1obj#=:1)
select nvl((decode(i.type#, 2, :1, 0) + sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0), i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0) from icol$ ic,col$ c,ind$ i where i.bo# = :2 and i.obj# in(select i.obj# from ind$ i,icol$ ic where i.bo#=:2 and i.obj#=ic.obj# and ic.intcol#=:3)and i.obj# = ic.obj# and ic.bo# = :2 and ic.intcol# = c.intcol# and c.obj# = :2 group by i.obj#, i.type#, i.initrans, i.pctthres$
select nvl((decode(i.type#, 2, :1, 0) +  sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0),  i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0),  i.bo#  from ind$ i, jijoin$ j, icol$ ic, col$ c  where i.obj# = j.obj# and        i.bo# = j.tab1obj# and        i.obj# = ic.obj# and        c.obj# = ic.bo# and        c.col# = ic.col# and        j.tab2obj#=: 2  group by i.obj#, i.type#, i.initrans, i.pctthres$, i.bo#
select 1 from icol$ c,ind$ i where i.bo#=:1 and i.type# = 9   and i.obj#=c.obj# and c.intcol# in       (select c1.intcol# from col$ c1 where c1.obj#=:1 and c1.col#=:2)
SELECT /* OPT_DYN_SAMP */ /*+ ALL_ROWS IGNORE_WHERE_CLAUSE NO_PARALLEL(SAMPLESUB) opt_param('parallel_execution_enabled', 'false') NO_PARALLEL_INDEX(SAMPLESUB) NO_SQL_TUNE */ NVL(SUM(C1),0), NVL(SUM(C2),0) FROM (SELECT /*+ IGNORE_WHERE_CLAUSE NO_PARALLEL("TEST") FULL("TEST") NO_PARALLEL_INDEX("TEST") */ 1 AS C1, CASE WHEN LENGTHB("TEST"."C1")>20 THEN 1 ELSE 0 END AS C2 FROM "BISAL"."TEST" "TEST") SAMPLESUB
select /*+ first_rows */ 1 from "BISAL"."TEST" where LENGTHB("C1") > 20
select 1 from icoldep$ i   where i.bo#= :1 and         i.intcol#= :2 and         i.obj# NOT IN (select j.obj# from jijoin$ j) 
select 1 from partcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and   c.intcol#=:2
select 1 from subpartcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and  c.intcol#=:2
delete from idl_ub1$ where obj#=:1
delete from idl_char$ where obj#=:1
delete from idl_ub2$ where obj#=:1
delete from idl_sb4$ where obj#=:1
delete from error$ where obj#=:1
select o.owner#, o.name,            o.namespace,    o.obj#,   d.d_timestamp, nvl(d.property,0), o.type#, o.subname, d.d_attrs  from dependency$ d, obj$ o   where d.p_obj#=:1  and   (d.p_timestamp=nvl(:2,d.p_timestamp) or d.property=2)  and   o.owner#=nvl(:3,o.owner#)  and   d.d_obj#=o.obj#  order by o.obj#
update dependency$ set p_timestamp=:1, p_obj#=:2 where d_obj#=:3 and p_obj#=:4
update dependency$ set d_reason = :1 where d_obj# = :2 and p_obj# = :3
update dependency$ set d_attrs = :1 where d_obj# = :2 and p_obj# = :3
delete from superobj$ where subobj# = :1
update tab$ set ts#=:2,file#=:3,block#=:4,bobj#=decode(:5,0,null,:5),tab#=decode(:6,0,null,:6),intcols=:7,kernelcols=:8,clucols=decode(:9,0,null,:9),audit$=:10,flags=:11,pctfree$=:12,pctused$=:13,initrans=:14,maxtrans=:15,rowcnt=:16,blkcnt=:17,empcnt=:18,avgspc=:19,chncnt=:20,avgrln=:21,analyzetime=:22,samplesize=:23,cols=:24,property=:25,degree=decode(:26,1,null,:26),instances=decode(:27,1,null,:27),dataobj#=:28,avgspc_flb=:29,flbcnt=:30,trigflag=:31,spare1=:32,spare2=decode(:33,0,null,:33),spare4=:34,spare6=:35 where obj#=:1
update col$ set intcol#=:3,segcol#=:4,type#=:5,length=:6,precision#=decode(:5,182/*DTYIYM*/,:7,183/*DTYIDS*/,:7,decode(:7,0,null,:7)),scale=decode(:5,2,decode(:8,-127/*MAXSB1MINAL*/,null,:8),178,:8,179,:8,180,:8,181,:8,182,:8,183,:8,231,:8,null),null$=:9,fixedstorage=:10,segcollength=:11,col#=:12,property=:13,charsetid=:14,charsetform=:15,spare1=:16,spare2=:17,spare3=:18,deflength=decode(:19,0,null,:19),default$=:20 where obj#=:1 and name=:2
update obj$ set obj#=:4, type#=:5,ctime=:6,mtime=:7,stime=:8,status=:9,dataobj#=:10,flags=:11,oid$=:12,spare1=:13, spare2=:14 where owner#=:1 and name=:2 and namespace=:3 and remoteowner is null and linkname is null and subname is null
select count(FA#) from SYS_FBA_TRACKEDTABLES where OBJ# = :1 and bitand(FLAGS, :2)=0
LOCK TABLE "TEST" IN EXCLUSIVE MODE  NOWAIT 
复制

其中值得关注的，有如下几条，这条SQL会根据c1字段的字节长度是否超过20来设置1或者0，

SELECT /* OPT_DYN_SAMP */ 
       /*+ ALL_ROWS IGNORE_WHERE_CLAUSE 
           NO_PARALLEL(SAMPLESUB) 
           opt_param('parallel_execution_enabled', 'false') 
           NO_PARALLEL_INDEX(SAMPLESUB) NO_SQL_TUNE */ 
       NVL(SUM(C1),0), NVL(SUM(C2),0) 
  FROM (SELECT /*+ IGNOR E_WHERE_CLAUSE NO_PARALLEL("TEST") 
                   FULL("TEST") NO_PARALLEL_INDEX("TEST") */ 1 AS C1,
                   CASE WHEN LENGTHB("TEST"."C1")>20 
                        THEN 1 ELSE 0 END AS C2 FROM "BISAL"."TEST" "TEST") SAMPLESUB
复制

这条SQL应该是找到所有c1的字节长度超过20个记录，

select /*+ first_rows */ 1 from "BISAL"."TEST" where LENGTHB("C1") > 20
复制

以上两个操作，都是用全表扫描，而且还会显式LOCK这张表，

LOCK TABLE "TEST" IN EXCLUSIVE MODE  NOWAIT
复制

另外，还需要更新tab$、col$、obj$这些数据字典，

update tab$ set ts#=:2,file#=:3,block#=:4,bobj#=decode(:5,0,null,:5),tab#=decode(:6,0,null,:6),intcols=:7,kernelcols=:8,clucols=decode(:9,0,null,:9),audit$=:10,flags=:11,pctfree$=:12,pctused$=:13,initrans=:14,maxtrans=:15,rowcnt=:16,blkcnt=:17,empcnt=:18,avgspc=:19,chncnt=:20,avgrln=:21,analyzetime=:22,samplesize=:23,cols=:24,property=:25,degree=decode(:26,1,null,:26),instances=decode(:27,1,null,:27),dataobj#=:28,avgspc_flb=:29,flbcnt=:30,trigflag=:31,spare1=:32,spare2=decode(:33,0,null,:33),spare4=:34,spare6=:35 where obj#=:1
update col$ set intcol#=:3,segcol#=:4,type#=:5,length=:6,precision#=decode(:5,182/*DTYIYM*/,:7,183/*DTYIDS*/,:7,decode(:7,0,null,:7)),scale=decode(:5,2,decode(:8,-127/*MAXSB1MINAL*/,null,:8),178,:8,179,:8,180,:8,181,:8,182,:8,183,:8,231,:8,null),null$=:9,fixedstorage=:10,segcollength=:11,col#=:12,property=:13,charsetid=:14,charsetform=:15,spare1=:16,spare2=:17,spare3=:18,deflength=decode(:19,0,null,:19),default$=:20 where obj#=:1 and name=:2
update obj$ set obj#=:4, type#=:5,ctime=:6,mtime=:7,stime=:8,status=:9,dataobj#=:10,flags=:11,oid$=:12,spare1=:13, spare2=:14 where owner#=:1 and name=:2 and namespace=:3 and remoteowner is null and linkname is null and subname is null
复制

我们看下char->char，从逻辑上讲，他只是进行长度的扩容，不涉及类型，

alter table test modify c2 varchar2(30);
复制

10046显示执行的SQL明显比char->byte要少，而且不存在对原表的任何访问，不需要更新tab$、col$、obj$等数据字典，不需要LOCK，

select count(*) from ind$ i where i.bo#=:1
select count(*) from (                     select 1 from ind$ i, jijoin$ j                      where i.bo# = tab1obj# and j.obj#=i.obj# and                            j.tab2obj#=:1                     union all                     select 1 from ind$ i, jijoin$ j                      where i.bo# = tab2obj# and j.obj#=i.obj# and                            j.tab1obj#=:1)
select nvl((decode(i.type#, 2, :1, 0) + sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0), i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0) from icol$ ic,col$ c,ind$ i where i.bo# = :2 and i.obj# in(select i.obj# from ind$ i,icol$ ic where i.bo#=:2 and i.obj#=ic.obj# and ic.intcol#=:3)and i.obj# = ic.obj# and ic.bo# = :2 and ic.intcol# = c.intcol# and c.obj# = :2 group by i.obj#, i.type#, i.initrans, i.pctthres$
select nvl((decode(i.type#, 2, :1, 0) +  sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0),  i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0),  i.bo#  from ind$ i, jijoin$ j, icol$ ic, col$ c  where i.obj# = j.obj# and        i.bo# = j.tab1obj# and        i.obj# = ic.obj# and        c.obj# = ic.bo# and        c.col# = ic.col# and        j.tab2obj#=: 2  group by i.obj#, i.type#, i.initrans, i.pctthres$, i.bo#
select 1 from icol$ c,ind$ i where i.bo#=:1 and i.type# = 9   and i.obj#=c.obj# and c.intcol# in       (select c1.intcol# from col$ c1 where c1.obj#=:1 and c1.col#=:2)
select 1 from icoldep$ i   where i.bo#= :1 and         i.intcol#= :2 and         i.obj# NOT IN (select j.obj# from jijoin$ j) 
select 1 from partcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and   c.intcol#=:2
select 1 from subpartcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and  c.intcol#=:2
delete from idl_ub1$ where obj#=:1
delete from idl_ub2$ where obj#=:1
delete from idl_sb4$ where obj#=:1
delete from error$ where obj#=:1
select o.owner#, o.name,            o.namespace,    o.obj#,   d.d_timestamp, nvl(d.property,0), o.type#, o.subname, d.d_attrs  from dependency$ d, obj$ o   where d.p_obj#=:1  and   (d.p_timestamp=nvl(:2,d.p_timestamp) or d.property=2)  and   o.owner#=nvl(:3,o.owner#)  and   d.d_obj#=o.obj#  order by o.obj#
update dependency$ set p_timestamp=:1, p_obj#=:2 where d_obj#=:3 and p_obj#=:4
update dependency$ set d_reason = :1 where d_obj# = :2 and p_obj# = :3
update dependency$ set d_attrs = :1 where d_obj# = :2 and p_obj# = :3
delete from superobj$ where subobj# = :1
delete from tab_stats$ where obj#=:1
复制

char->byte或者byte->char，数据库需要做更多的操作，还需要进行数据的校验，如果对大表进行这种转换，从执行时间上，就会更久，而且LOCK表的操作，就会影响表的事务并发，进而对系统产生一定的影响。而单纯的char->char或者byte->byte的扩容，仅需要更新一些数据字典，不存在LOCK的需求，不需要读取原表，执行时间上，自然就很快。

归根结底，我认为实际场景中不太可能出现主动char->byte或者byte->char的场景，大多情况下，可能都是“误伤”，例如某个库，初始参数NLS_LENGTH_SEMANTICS设置成了按照byte存储，迁移到的新库，NLS_LENGTH_SEMANTICS设置成了按照char存储，这就涉及到转换。或者测试环境NLS_LENGTH_SEMANTICS按照char存储，生产环境NLS_LENGTH_SEMANTICS按照byte存储，准备上线SQL就忽略了字段应该带着的char或者byte，选择使用默认的，这就很容易“误伤”，生产上执行了，还可能都不知道他的影响，系统并发受到影响的同时，字段存储容量很可能不满足需求。归根结底，这些都是设计开发规范不严谨导致的，前提还是得知道他的原理，只是刻意复制，很可能导致其他的隐患问题。

近期更新的文章：

《小白学习MySQL - 大小写敏感问题解惑》

《小白学习MySQL - only_full_group_by的校验规则》

《最近碰到的几个问题》

《PG逻辑复制的REPLICA IDENTITY设置》

《Linux的dd指令》

文章分类和索引：

《公众号800篇文章分类和索引》

oracle

文章转载自bisal的个人杂货铺，如果涉嫌侵权，请发送邮件至：contact@modb.pro进行举报，并提供相关证据，一经查实，墨天轮将立刻删除相关内容。

Oracle字符串类型扩容隐患

评论