今天学习openGauss存储模型-行存和列存
1.创建行存表
CREATE TABLE test_t1
(
col1 CHAR(2),
col2 VARCHAR2(40),
col3 NUMBER
);
omm=# omm=# CREATE TABLE test_t1 omm-# ( omm(# col1 CHAR(2), omm(# col2 VARCHAR2(40), omm(# col3 NUMBER omm(# ); CREATE TABLE
复制
–压缩属性为no
\d+ test_t1
omm=# \d+ test_t1 Table "public.test_t1" Column | Type | Modifiers | Storage | Stats target | Description --------+-----------------------+-----------+----------+--------------+------------- col1 | character(2) | | extended | | col2 | character varying(40) | | extended | | col3 | numeric | | main | | Has OIDs: no Options: orientation=row, compression=no
复制
insert into test_t1 select col1, col2, col3 from (select generate_series(1, 100000) as key, repeat(chr(int4(random() * 26) + 65), 2) as col1, repeat(chr(int4(random() * 26) + 65), 30) as col2, (random() * (10^4))::integer as col3);
omm=# insert into test_t1 select col1, col2, col3 from (select generate_series(1, 100000) as key, repeat(chr(int4(random() * 26) + 65), 2) as col1, repeat(chr(int4(random() * 26) + 65), 30) as col2, (random() * (10^4))::integer as col3); INSERT 0 100000
复制
2.创建列存表
CREATE TABLE test_t2
(
col1 CHAR(2),
col2 VARCHAR2(40),
col3 NUMBER
)
WITH (ORIENTATION = COLUMN);
omm=# CREATE TABLE test_t2 omm-# ( omm(# col1 CHAR(2), omm(# col2 VARCHAR2(40), omm(# col3 NUMBER omm(# omm-# ) WITH (ORIENTATION = COLUMN); CREATE TABLE
复制
–压缩属性为low
\d+ test_t2;
omm=# \d+ test_t2; Table "public.test_t2" Column | Type | Modifiers | Storage | Stats target | Description --------+-----------------------+-----------+----------+--------------+------------- col1 | character(2) | | extended | | col2 | character varying(40) | | extended | | col3 | numeric | | main | | Has OIDs: no Options: orientation=column, compression=low
复制
–插入和行存表相同的数据
insert into test_t2 select * from test_t1;
omm=# insert into test_t2 select * from test_t1; INSERT 0 100000
复制
3.占用空间对比
\d+
omm=# \d+ List of relations Schema | Name | Type | Owner | Size | Storage | Description --------+---------+-------+-------+---------+--------------------------------------+------------- public | test_t1 | table | omm | 6760 kB | {orientation=row,compression=no} | public | test_t2 | table | omm | 1112 kB | {orientation=column,compression=low} | (2 rows)
复制
4.对比读取一列的速度
analyze VERBOSE test_t1;
analyze VERBOSE test_t2;
omm=# analyze VERBOSE test_t1; INFO: analyzing "public.test_t1"(gaussdb pid=1) INFO: ANALYZE INFO : "test_t1": scanned 841 of 841 pages, containing 100000 live rows and 0 dead rows; 30000 rows in sample, 100000 estimated total rows(gaussdb pid=1) ANALYZE omm=# omm=# analyze VERBOSE test_t2; INFO: analyzing "public.test_t2"(gaussdb pid=1) INFO: ANALYZE INFO : estimate total rows of "pg_delta_16395": scanned 0 pages of total 0 pages with 1 retry times, containing 0 live rows and 0 dead rows, estimated 0 total rows(gaussdb pid=1) INFO: ANALYZE INFO : "test_t2": scanned 2 of 2 cus, sample 30000 rows, estimated total 100000 rows(gaussdb pid=1) ANALYZE
复制
–列存表时间少于行存表
explain analyze select distinct col1 from test_t1;
explain analyze select distinct col1 from test_t2;
omm=# explain analyze select distinct col1 from test_t1; QUERY PLAN ----------------------------------------------------------------------------------------------------------- ---------- HashAggregate (cost=2091.00..2091.27 rows=27 width=3) (actual time=51.899..51.904 rows=27 loops=1) Group By Key: col1 -> Seq Scan on test_t1 (cost=0.00..1841.00 rows=100000 width=3) (actual time=0.011..24.916 rows=100000 loops=1) Total runtime: 51.960 ms (4 rows) omm=# explain analyze select distinct col1 from test_t2; QUERY PLAN ----------------------------------------------------------------------------------------------------------- ----------------- Row Adapter (cost=1008.27..1008.27 rows=27 width=3) (actual time=4.161..4.165 rows=27 loops=1) -> Vector Sonic Hash Aggregate (cost=1008.00..1008.27 rows=27 width=3) (actual time=4.160..4.161 rows= 27 loops=1) Group By Key: col1 -> CStore Scan on test_t2 (cost=0.00..758.00 rows=100000 width=3) (actual time=0.031..0.265 rows =100000 loops=1) Total runtime: 4.271 ms (5 rows)
复制
5.对比插入一行的速度
–行存表时间少于列存表
explain analyze insert into test_t1 values(‘x’, ‘xxxx’, ‘123’);
explain analyze insert into test_t2 values(‘x’, ‘xxxx’, ‘123’);
omm=# explain analyze insert into test_t1 values('x', 'xxxx', '123'); QUERY PLAN ----------------------------------------------------------------------------------------------- [Bypass] Insert on test_t1 (cost=0.00..0.01 rows=1 width=0) (actual time=0.059..0.060 rows=1 loops=1) -> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.002..0.002 rows=1 loops=1) Total runtime: 0.146 ms (4 rows) omm=# explain analyze insert into test_t2 values('x', 'xxxx', '123'); QUERY PLAN ----------------------------------------------------------------------------------------------- Insert on test_t2 (cost=0.00..0.01 rows=1 width=0) (actual time=3.755..3.756 rows=1 loops=1) -> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.002..0.002 rows=1 loops=1) Total runtime: 3.839 ms (3 rows)
复制
6.清理数据
drop table test_t1;
drop table test_t2;
omm=# drop table test_t1; DROP TABLE omm=# omm=# omm=# drop table test_t2; omm=# DROP TABLE omm=#
复制
课程作业
1.创建行存表和列存表,并批量插入10万条数据(行存表和列存表数据相同)
omm=# create table table1(id int, info text, c_time timestamp); CREATE TABLE omm=# insert into table1 select generate_series(1,100000),md5(random()::text),clock_timestamp(); INSERT 0 100000 omm=# create table table1(id int, info text, c_time timestamp); CREATE TABLE omm=# insert into table1 select generate_series(1,100000),md5(random()::text),clock_timestamp(); INSERT 0 100000 omm=# create table table2 (id int, info text, c_time timestamp) WITH (ORIENTATION = COLUMN); CREATE TABLE omm=# insert into table2 select * from table1; INSERT 0 100000
复制
2.对比行存表和列存表空间大小
omm=# \d+ List of relations Schema | Name | Type | Owner | Size | Storage | Description --------+--------+-------+-------+---------+--------------------------------------+------------- public | table1 | table | omm | 7512 kB | {orientation=row,compression=no} | public | table2 | table | omm | 3904 kB | {orientation=column,compression=low} | (2 rows)
复制
3.对比查询一列和插入一行的速度
omm=# analyze VERBOSE table1; INFO: analyzing "public.table1"(gaussdb pid=1) INFO: ANALYZE INFO : "table1": scanned 935 of 935 pages, containing 100000 live rows and 0 dead rows; 30000 rows in sample, 100000 estimated total rows(gaussdb pid=1) ANALYZE omm=# analyze VERBOSE table2; INFO: analyzing "public.table2"(gaussdb pid=1) INFO: ANALYZE INFO : estimate total rows of "pg_delta_16417": scanned 0 pages of total 0 pages with 1 retry times, containing 0 live rows and 0 dead rows, estimated 0 total rows(gaussdb pid=1) INFO: ANALYZE INFO : "table2": scanned 2 of 2 cus, sample 30000 rows, estimated total 100000 rows(gaussdb pid=1) ANALYZE omm=# explain analyze select count(*) from table1; QUERY PLAN ----------------------------------------------------------------------------------------------------------- --------- Aggregate (cost=2185.00..2185.01 rows=1 width=8) (actual time=32.310..32.310 rows=1 loops=1) -> Seq Scan on table1 (cost=0.00..1935.00 rows=100000 width=0) (actual time=0.011..18.627 rows=100000 loops=1) Total runtime: 32.375 ms (3 rows) omm=# explain analyze select count(*) from table2; QUERY PLAN ----------------------------------------------------------------------------------------------------------- ---------------- Row Adapter (cost=1008.01..1008.01 rows=1 width=8) (actual time=0.618..0.619 rows=1 loops=1) -> Vector Aggregate (cost=1008.00..1008.01 rows=1 width=8) (actual time=0.615..0.615 rows=1 loops=1) -> CStore Scan on table2 (cost=0.00..758.00 rows=100000 width=0) (actual time=0.025..0.272 rows= 100000 loops=1) Total runtime: 0.713 ms (4 rows) omm=# select * from table1 limit 2; id | info | c_time ----+----------------------------------+---------------------------- 1 | 97773cf7fb8829fda9f24f3816dce73a | 2021-12-24 17:46:30.734842 2 | e01c1ee4a06c14c2cf2ab0826ac6b423 | 2021-12-24 17:46:30.734971 (2 rows) omm=# insert into table1 values(100001,'aaaa',clock_timestamp()); INSERT 0 1 omm=# explain analyze insert into table1 values(100003,'aaaa',clock_timestamp()); QUERY PLAN ---------------------------------------------------------------------------------------------- Insert on table1 (cost=0.00..0.01 rows=1 width=0) (actual time=0.037..0.038 rows=1 loops=1) -> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.009..0.009 rows=1 loops=1) Total runtime: 0.141 ms (3 rows) omm=# explain analyze insert into table2 values(100002,'aaaa',clock_timestamp()); QUERY PLAN ---------------------------------------------------------------------------------------------- Insert on table2 (cost=0.00..0.01 rows=1 width=0) (actual time=0.172..0.173 rows=1 loops=1) -> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.011..0.011 rows=1 loops=1) Total runtime: 0.270 ms (3 rows)
复制
4.清理数据
omm=# drop table table1; DROP TABLE omm=# drop table table2; DROP TABLE
复制