学习目标
学习openGauss存储模型-行存和列存
行存储是指将表按行存储到硬盘分区上,列存储是指将表按列存储到硬盘分区上。默认情况下,创建的表为行存储。
行、列存储模型各有优劣,通常用于TP场景的数据库,默认使用行存储,仅对执行复杂查询且数据量大的AP场景时,才使用列存储
课程学习实操
连接数据库
#第一次进入等待15秒
#数据库启动中…
su - omm
gsql -r
1.创建行存表
CREATE TABLE test_t1
(
col1 CHAR(2),
col2 VARCHAR2(40),
col3 NUMBER
);
omm=# CREATE TABLE test_t1
omm-# (
omm(# col1 CHAR(2),
omm(# col2 VARCHAR2(40),
omm(# col3 NUMBER
omm(# );
CREATE TABLE
omm=#
复制
–压缩属性为no
\d+ test_t1
insert into test_t1 select col1, col2, col3 from (select generate_series(1, 100000) as key, repeat(chr(int4(random() * 26) + 65), 2) as col1, repeat(chr(int4(random() * 26) + 65), 30) as col2, (random() * (10^4))::integer as col3);
omm=# \d+ test_t1
Table "public.test_t1"
Column | Type | Modifiers | Storage | Stats target | Description
--------+-----------------------+-----------+----------+--------------+-------------
col1 | character(2) | | extended | |
col2 | character varying(40) | | extended | |
col3 | numeric | | main | |
Has OIDs: no
Options: orientation=row, compression=no
omm=# insert into test_t1 select col1, col2, col3 from (select generate_series(1, 100000) as key, repeat(chr(int4(random() * 26) + 65), 2) as col1, repeat(chr(int4(random() * 26) + 65), 30) as col2, (random() * (10^4))::integer as col3);
INSERT 0 100000
omm=#
复制
2.创建列存表
CREATE TABLE test_t2
(
col1 CHAR(2),
col2 VARCHAR2(40),
col3 NUMBER
)
WITH (ORIENTATION = COLUMN);
–压缩属性为low
\d+ test_t2;
omm=# CREATE TABLE test_t2
omm-# (
omm(# col1 CHAR(2),
omm(# col2 VARCHAR2(40),
omm(# col3 NUMBER
omm(# )
omm-# WITH (ORIENTATION = COLUMN);
CREATE TABLE
omm=#
omm=# \d+ test_t2;
Table "public.test_t2"
Column | Type | Modifiers | Storage | Stats target | Description
--------+-----------------------+-----------+----------+--------------+-------------
col1 | character(2) | | extended | |
col2 | character varying(40) | | extended | |
col3 | numeric | | main | |
Has OIDs: no
Options: orientation=column, compression=low
复制
–插入和行存表相同的数据
insert into test_t2 select * from test_t1;
omm=# insert into test_t2 select * from test_t1;
INSERT 0 100000
复制
3.占用空间对比
\d+
omm=# \d+
List of relations
Schema | Name | Type | Owner | Size | Storage | Description
--------+---------+-------+-------+---------+--------------------------------------+-------------
public | test_t1 | table | omm | 6760 kB | {orientation=row,compression=no} |
public | test_t2 | table | omm | 1112 kB | {orientation=column,compression=low} |
(2 rows)
复制
4.对比读取一列的速度
analyze VERBOSE test_t1;
analyze VERBOSE test_t2;
–列存表时间少于行存表
explain analyze select distinct col1 from test_t1;
explain analyze select distinct col1 from test_t2;
omm=# analyze VERBOSE test_t1;
INFO: analyzing "public.test_t1"(dn_6001 pid=4020)
INFO: ANALYZE INFO : "test_t1": scanned 841 of 841 pages, containing 100000 live rows and 0 dead rows; 30000 rows in sample, 100000 estimated total rows(dn_6001 pid=4020)
ANALYZE
omm=# analyze VERBOSE test_t2;
INFO: analyzing "public.test_t2"(dn_6001 pid=4020)
INFO: ANALYZE INFO : estimate total rows of "pg_delta_24692": scanned 0 pages of total 0 pages with 1 retry times, containing 0 live rows and 0 dead rows, estimated 0 total rows(dn_6001 pid=4020)
INFO: ANALYZE INFO : "test_t2": scanned 2 of 2 cus, sample 30000 rows, estimated total 100000 rows(dn_6001 pid=4020)
ANALYZE
omm=#
omm=# explain analyze select distinct col1 from test_t1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=2091.00..2091.27 rows=27 width=3) (actual time=74.531..74.538 rows=27 loops=1)
Group By Key: col1
-> Seq Scan on test_t1 (cost=0.00..1841.00 rows=100000 width=3) (actual time=0.018..34.092 rows=100000 loops=1)
Total runtime: 74.614 ms
(4 rows)
omm=# explain analyze select distinct col1 from test_t2;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------
Row Adapter (cost=1008.27..1008.27 rows=27 width=3) (actual time=7.121..7.125 rows=27 loops=1)
-> Vector Sonic Hash Aggregate (cost=1008.00..1008.27 rows=27 width=3) (actual time=7.111..7.112 rows=27 loops=1)
Group By Key: col1
-> CStore Scan on test_t2 (cost=0.00..758.00 rows=100000 width=3) (actual time=0.065..0.645 rows=100000 loops=1)
Total runtime: 7.328 ms
(5 rows)
复制
5.对比插入一行的速度
–行存表时间少于列存表
explain analyze insert into test_t1 values('x', 'xxxx', '123');
explain analyze insert into test_t2 values('x', 'xxxx', '123');
omm=# explain analyze insert into test_t1 values('x', 'xxxx', '123');
QUERY PLAN
-----------------------------------------------------------------------------------------------
[Bypass]
Insert on test_t1 (cost=0.00..0.01 rows=1 width=0) (actual time=0.167..0.169 rows=1 loops=1)
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.062..0.062 rows=1 loops=1)
Total runtime: 0.422 ms
(4 rows)
omm=# explain analyze insert into test_t2 values('x', 'xxxx', '123');
QUERY PLAN
-----------------------------------------------------------------------------------------------
Insert on test_t2 (cost=0.00..0.01 rows=1 width=0) (actual time=2.276..2.287 rows=1 loops=1)
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.000..0.002 rows=1 loops=1)
Total runtime: 2.480 ms
(3 rows)
复制
6.清理数据
drop table test_t1; drop table test_t2; omm=# drop table test_t1; DROP TABLE omm=# drop table test_t2; DROP TABLE omm=# omm=#
复制
课程作业
1.创建行存表和列存表,并批量插入10万条数据(行存表和列存表数据相同)
omm=# CREATE TABLE t1
omm-# (
omm(# id CHAR(2),
omm(# name VARCHAR2(40),
omm(# t1_key NUMBER
omm(# );
CREATE TABLE
omm=# CREATE TABLE t2
omm-# (
omm(# id CHAR(2),
omm(# name VARCHAR2(40),
omm(# t2_key NUMBER
omm(# )
omm-# WITH (ORIENTATION = COLUMN);
CREATE TABLE
omm=# \d+ t1
Table "public.t1"
Column | Type | Modifiers | Storage | Stats target | Description
--------+-----------------------+-----------+----------+--------------+-------------
id | character(2) | | extended | |
name | character varying(40) | | extended | |
t1_key | numeric | | main | |
Has OIDs: no
Options: orientation=row, compression=no
omm=# \d+ t2
Table "public.t2"
Column | Type | Modifiers | Storage | Stats target | Description
--------+-----------------------+-----------+----------+--------------+-------------
id | character(2) | | extended | |
name | character varying(40) | | extended | |
t2_key | numeric | | main | |
Has OIDs: no
Options: orientation=column, compression=low
omm=# insert into t1 select id, name, t1_key from (select generate_series(1, 100000) as key, repeat(chr(int4(random() * 26) + 65), 2) as id, repeat(chr(int4(random() * 26) + 65), 30) as name, (random() * (10^4))::integer as t1_key);
INSERT 0 100000
omm=# insert into t2 select id, name, t2_key from (select generate_series(1, 100000) as key, repeat(chr(int4(random() * 26) + 65), 2) as id, repeat(chr(int4(random() * 26) + 65), 30) as name, (random() * (10^4))::integer as t2_key);
INSERT 0 100000
复制
2.对比行存表和列存表空间大小
omm=# \d+
List of relations
Schema | Name | Type | Owner | Size | Storage | Description
--------+------+-------+-------+---------+--------------------------------------+-------------
public | t1 | table | omm | 6760 kB | {orientation=row,compression=no} |
public | t2 | table | omm | 1112 kB | {orientation=column,compression=low} |
(2 rows)
复制
3.对比查询一列和插入一行的速度
omm=# analyze VERBOSE t1;
INFO: analyzing "public.t1"(dn_6001 pid=4020)
INFO: ANALYZE INFO : "t1": scanned 841 of 841 pages, containing 100000 live rows and 0 dead rows; 30000 rows in sample, 100000 estimated total rows(dn_6001 pid=4020)
ANALYZE
omm=# analyze VERBOSE t2;
INFO: analyzing "public.t2"(dn_6001 pid=4020)
INFO: ANALYZE INFO : estimate total rows of "pg_delta_24802": scanned 0 pages of total 0 pages with 1 retry times, containing 0 live rows and 0 dead rows, estimated 0 total rows(dn_6001 pid=4020)
INFO: ANALYZE INFO : "t2": scanned 2 of 2 cus, sample 30000 rows, estimated total 100000 rows(dn_6001 pid=4020)
ANALYZE
omm=# explain analyze select distinct id from t1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
HashAggregate (cost=2091.00..2091.27 rows=27 width=3) (actual time=50.704..50.707 rows=27 loops=1)
Group By Key: id
-> Seq Scan on t1 (cost=0.00..1841.00 rows=100000 width=3) (actual time=0.018..28.253 rows=100000 loops=1)
Total runtime: 50.764 ms
(4 rows)
omm=# explain analyze select distinct id from t2;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
Row Adapter (cost=1008.27..1008.27 rows=27 width=3) (actual time=3.033..3.034 rows=27 loops=1)
-> Vector Sonic Hash Aggregate (cost=1008.00..1008.27 rows=27 width=3) (actual time=3.027..3.028 rows=27 loops=1)
Group By Key: id
-> CStore Scan on t2 (cost=0.00..758.00 rows=100000 width=3) (actual time=0.029..0.258 rows=100000 loops=1)
Total runtime: 3.120 ms
(5 rows)
omm=# explain analyze insert into t1 values('y', 'xxyy', '456');
QUERY PLAN
------------------------------------------------------------------------------------------
[Bypass]
Insert on t1 (cost=0.00..0.01 rows=1 width=0) (actual time=0.094..0.096 rows=1 loops=1)
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.001..0.001 rows=1 loops=1)
Total runtime: 0.343 ms
(4 rows)
omm=# explain analyze insert into t2 values('z', 'xyyx', '456');
QUERY PLAN
------------------------------------------------------------------------------------------
Insert on t2 (cost=0.00..0.01 rows=1 width=0) (actual time=2.091..2.094 rows=1 loops=1)
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.001..0.001 rows=1 loops=1)
Total runtime: 2.209 ms
(3 rows)
复制
对于行存和列存,查询一列的速度,列存要比行存表快,插入速度,行存比列存快。
4.清理数据
omm=# drop table t1,t2;
DROP TABLE
omm=#
复制
学习小结
通过本节课的学习,我掌握了行存和列存的区别和不同的应用场景。
最后修改时间:2021-12-22 15:31:46
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。
评论
相关阅读
2025年3月国产数据库大事记
墨天轮编辑部
872次阅读
2025-04-03 15:21:16
MogDB 发布更新,解决 openGauss 数据库在长事务情况下Ustore表膨胀问题
MogDB
286次阅读
2025-04-17 10:41:41
openGauss 7.0.0-RC1 版本正式发布!
Gauss松鼠会
201次阅读
2025-04-01 12:27:03
MogDB 发布更新,解决 openGauss 数据库在长事务情况下Ustore表膨胀问题
云和恩墨
184次阅读
2025-04-16 09:52:02
openGauss 7.0.0-RC1 版本体验:一主一备快速安装指南
孙莹
176次阅读
2025-04-01 10:30:07
鲲鹏RAG一体机解决方案正式发布 openGauss DataVec向量数据库助力DeepSeek行业应用
Gauss松鼠会
123次阅读
2025-03-31 10:00:29
荣誉时刻!openGauss认证证书快递已发,快来看看谁榜上有名!
墨天轮小教习
106次阅读
2025-04-23 17:39:13
openGauss6.0.0适配操作系统自带的软件,不依赖三方库
来杯拿铁
75次阅读
2025-04-18 10:49:53
opengauss使用gs_probackup进行增量备份恢复
进击的CJR
70次阅读
2025-04-09 16:11:58
Postgresql数据库单个Page最多存储多少行数据
maozicb
55次阅读
2025-04-23 16:02:19