GBase 8c 学习笔记 008 —— GBase 8c 高级语法

心有阳光 2023-03-28

1790

GBase 8c 学习笔记 008 —— GBase 8c 高级语法

环境准备

# linux 命令：在安装节点（即执行gha_ctl start/stop 所在的节点） 执行 gs_guc 命令 修改数据库参数
gs_guc reload -Z coordinator -N all -I all -c "enable_incremental_checkpoint=off"
gs_guc reload -Z datanode -N all -I all -c "enable_incremental_checkpoint=off"
gs_guc reload -Z gtm -N all -I all -c "enable_incremental_checkpoint=off"

gs_guc reload -Z coordinator -N all -I all -c "enable_gbase_mot=on"
gs_guc reload -Z datanode -N all -I all -c "enable_gbase_mot=on"
gs_guc reload -Z gtm -N all -I all -c "enable_gbase_mot=on"

[gbase@gbase8c_5_150 script]$ gs_guc reload -Z coordinator -N all -I all -c "enable_incremental_checkpoint=off"
The gs_guc run with the following arguments: [gs_guc -Z coordinator -N all -I all -c enable_incremental_checkpoint=off reload ].
Begin to perform the total nodes: 3.
Popen count is 2, Popen success count is 2, Popen failure count is 0.
Begin to perform gs_guc for coordinators.
Command count is 2, Command success count is 2, Command failure count is 0.

Total instances: 2.
ALL: Success to perform gs_guc!


[gbase@gbase8c_5_150 script]$ gs_guc reload -Z datanode -N all -I all -c "enable_incremental_checkpoint=off"
The gs_guc run with the following arguments: [gs_guc -Z datanode -N all -I all -c enable_incremental_checkpoint=off reload ].
Begin to perform the total nodes: 3.
Popen count is 3, Popen success count is 3, Popen failure count is 0.
Begin to perform gs_guc for datanodes.
Command count is 3, Command success count is 3, Command failure count is 0.

Total instances: 3.
ALL: Success to perform gs_guc!


[gbase@gbase8c_5_150 script]$ gs_guc reload -Z gtm -N all -I all -c "enable_incremental_checkpoint=off"
The gs_guc run with the following arguments: [gs_guc -Z gtm -N all -I all -c enable_incremental_checkpoint=off reload ].
Begin to perform the total nodes: 3.
Popen count is 2, Popen success count is 2, Popen failure count is 0.
Begin to perform gs_guc for gtms.
Command count is 2, Command success count is 2, Command failure count is 0.

Total instances: 2.
ALL: Success to perform gs_guc!


[gbase@gbase8c_5_150 script]$ gs_guc reload -Z coordinator -N all -I all -c "enable_gbase_mot=on"
The gs_guc run with the following arguments: [gs_guc -Z coordinator -N all -I all -c enable_gbase_mot=on reload ].
Begin to perform the total nodes: 3.
Popen count is 2, Popen success count is 2, Popen failure count is 0.
Begin to perform gs_guc for coordinators.
Command count is 2, Command success count is 2, Command failure count is 0.

Total instances: 2.
ALL: Success to perform gs_guc!


[gbase@gbase8c_5_150 script]$ gs_guc reload -Z datanode -N all -I all -c "enable_gbase_mot=on"
The gs_guc run with the following arguments: [gs_guc -Z datanode -N all -I all -c enable_gbase_mot=on reload ].
Begin to perform the total nodes: 3.
Popen count is 3, Popen success count is 3, Popen failure count is 0.
Begin to perform gs_guc for datanodes.
Command count is 3, Command success count is 3, Command failure count is 0.

Total instances: 3.
ALL: Success to perform gs_guc!


[gbase@gbase8c_5_150 script]$ gs_guc reload -Z gtm -N all -I all -c "enable_gbase_mot=on"
The gs_guc run with the following arguments: [gs_guc -Z gtm -N all -I all -c enable_gbase_mot=on reload ].
Begin to perform the total nodes: 3.
Popen count is 2, Popen success count is 2, Popen failure count is 0.
Begin to perform gs_guc for gtms.
Command count is 2, Command success count is 2, Command failure count is 0.

Total instances: 2.
ALL: Success to perform gs_guc!

# linux 命令：重启数据库加载数据库参数
[gbase@gbase8c_5_150 script]$ gha_ctl stop all -l http://192.168.254.150:2379
{
    "ret":0,
    "msg":"Success"
}
[gbase@gbase8c_5_150 script]$ gha_ctl start all -l http://192.168.254.150:2379
{
    "ret":0,
    "msg":"Success"
}
[gbase@gbase8c_5_150 script]$ gha_ctl monitor all -H -l http://192.168.254.150:2379
+----+-------------+-----------------+-------+---------+--------+
| No |     name    |       host      |  port |  state  | leader |
+----+-------------+-----------------+-------+---------+--------+
| 0  | gha_server1 | 192.168.254.150 | 20001 | running |  True  |
+----+-------------+-----------------+-------+---------+--------+
+----+------+-----------------+------+---------------------------+---------+---------+
| No | name |       host      | port |          work_dir         |  state  |   role  |
+----+------+-----------------+------+---------------------------+---------+---------+
| 0  | gtm1 | 192.168.254.150 | 6666 | /home/gbase/data/gtm/gtm1 | running | primary |
| 1  | gtm2 | 192.168.254.151 | 6666 | /home/gbase/data/gtm/gtm2 | running | standby |
+----+------+-----------------+------+---------------------------+---------+---------+
+----+------+-----------------+------+----------------------------+---------+---------+
| No | name |       host      | port |          work_dir          |  state  |   role  |
+----+------+-----------------+------+----------------------------+---------+---------+
| 0  | cn1  | 192.168.254.151 | 5432 | /home/gbase/data/coord/cn1 | running | primary |
| 1  | cn2  | 192.168.254.152 | 5432 | /home/gbase/data/coord/cn2 | running | primary |
+----+------+-----------------+------+----------------------------+---------+---------+
+----+-------+-------+-----------------+-------+----------------------------+---------+---------+
| No | group |  name |       host      |  port |          work_dir          |  state  |   role  |
+----+-------+-------+-----------------+-------+----------------------------+---------+---------+
| 0  |  dn1  | dn1_1 | 192.168.254.151 | 15432 | /home/gbase/data/dn1/dn1_1 | running | primary |
| 1  |  dn2  | dn2_1 | 192.168.254.152 | 20010 | /home/gbase/data/dn2/dn2_1 | running | primary |
| 2  |  dn2  | dn2_2 | 192.168.254.150 | 20010 | /home/gbase/data/dn2/dn2_2 | running | standby |
+----+-------+-------+-----------------+-------+----------------------------+---------+---------+
+----+-----------------------------+--------+---------+----------+
| No |             url             |  name  |  state  | isLeader |
+----+-----------------------------+--------+---------+----------+
| 0  | http://192.168.254.150:2379 | node_2 | healthy |   True   |
| 1  | http://192.168.254.151:2379 | node_0 | healthy |  False   |
| 2  | http://192.168.254.152:2379 | node_1 | healthy |  False   |
+----+-----------------------------+--------+---------+----------+
[gbase@gbase8c_5_150 script]$

[gbase@gbase8c_5_151 ~]$ gsql -d postgres -r
gsql ((multiple_nodes GBase8cV5 3.0.0B76 build 47948f99) compiled at 2023-02-27 16:04:20 commit 0 last mr 1232 )
Non-SSL connection (SSL connection is recommended when requiring high-security)
Type "help" for help.

postgres=# create database test;
CREATE DATABASE
postgres=#
postgres=# \c test
Non-SSL connection (SSL connection is recommended when requiring high-security)
You are now connected to database "test" as user "gbase".
test=#
test=# show enable_incremental_checkpoint;
 enable_incremental_checkpoint
-------------------------------
 off
(1 row)

test=# show enable_gbase_mot;
 enable_gbase_mot
------------------
 on
(1 row)

test=#

存储引擎

目前支持
- USTORE，表示表支持Inplace-Update 存储引擎。
- ASTORE，表示表支持Append-Only 存储引擎。
支持三种存储方法
- ROW，表示表的数据将以行式存储。
  - 行存储适合于OLTP业务，适用于点查询或者增删操作比较多的场景。
- COLUMN，表示表的数据将以列式存储。
  - 列存储适合于数据仓库业务，此类型的表上会做大量的汇聚计算，且涉及的列操作较少。
- MOT，表示表的数据将以内存的形式存储。
  - MOT内存适用于高吞吐事务处理，性能瓶颈加速，消除中间层缓存，大规模流数据提取；

drop table if exists test_astore_row ;
drop table if exists test_astore_column ;
drop foreign table if exists test_astore_mot ;

-- 创建表时，指定 orientation 参数值为 row，创建行存表。
test=# create table test_astore_row(col text)  with (orientation=row) ;
CREATE TABLE

-- 创建表时，指定 orientation 参数值为 column，创建列存表。
test=# create table test_astore_column(col text) with (orientation=column);
CREATE TABLE

-- 创建表时，create foreign table ... server 为 mot_server时，创建内存表。
test=# create foreign table test_astore_mot(col int) server mot_server ;
CREATE FOREIGN TABLE

-- 创建表时，指定 storage_type 参数值为 ustore，创建 ustore 表。
drop table if exists test_ustore ;

test=# create table test_ustore(col text)with(storage_type=ustore);
CREATE TABLE

-- 当sql语句不做指定时，默认使用 astore 存储引擎，row 存储方法。

test=# \d+
                                                         List of relations
 Schema |        Name        |     Type      | Owner |  Size   |                       Storage                        | Description
--------+--------------------+---------------+-------+---------+------------------------------------------------------+-------------
 public | dual               | view          | gbase | 0 bytes |                                                      |
 public | test_astore_column | table         | gbase | 48 kB   | {orientation=column,compression=low}                 |
 public | test_astore_mot    | foreign table | gbase | 16 kB   |                                                      |
 public | test_astore_row    | table         | gbase | 16 kB   | {orientation=row,compression=no}                     |
 public | test_ustore        | table         | gbase | 16 kB   | {orientation=row,storage_type=ustore,compression=no} |
(5 rows)

test=# \d+ test_astore_column
                 Table "public.test_astore_column"
 Column | Type | Modifiers | Storage  | Stats target | Description
--------+------+-----------+----------+--------------+-------------
 col    | text |           | extended |              |
Has OIDs: no
Distribute By: HASH(col)
Location Nodes: ALL DATANODES
Options: orientation=column, compression=low

test=# \d+ test_ustore
                    Table "public.test_ustore"
 Column | Type | Modifiers | Storage  | Stats target | Description
--------+------+-----------+----------+--------------+-------------
 col    | text |           | extended |              |
Has OIDs: no
Distribute By: HASH(col)
Location Nodes: ALL DATANODES
Options: orientation=row, storage_type=ustore, compression=no, toast.storage_type=ustore

分区表

GBase 8c 需要将数据按照一定规律分布到各个数据节点，目前支持 hash 分片。
在此基础上，分区表功能，支持二级分区，9种分区组合，interval 分区。即分片+分区功能。

-- 表分片
-- GBase 8c的数据分片策略：在创建表时，通过关键字distribute/replication 来设置表分片策略。
-- 通过distribute by hash 创建 hash 分片表，将数据通过hash算法 均匀存储到每个数据节点。
test=# CREATE TABLE t1_dis(c1 int, c2 int) DISTRIBUTE BY hash(c1);
CREATE TABLE

-- 通过distribute by replication 创建 复制表，每个数据节点都会存储一份数据。
test=# CREATE TABLE t1_rep(c1 int, c2 int) DISTRIBUTE BY replication;
CREATE TABLE

-- 备注：在不指定时，默认使用hash分片，分布式键按照顺序为第一个满足hash算法的字段。

test=# \d+ t1_dis
                        Table "public.t1_dis"
 Column |  Type   | Modifiers | Storage | Stats target | Description
--------+---------+-----------+---------+--------------+-------------
 c1     | integer |           | plain   |              |
 c2     | integer |           | plain   |              |
Has OIDs: no
Distribute By: HASH(c1)
Location Nodes: ALL DATANODES
Options: orientation=row, compression=no

test=# \d+ t1_rep
                        Table "public.t1_rep"
 Column |  Type   | Modifiers | Storage | Stats target | Description
--------+---------+-----------+---------+--------------+-------------
 c1     | integer |           | plain   |              |
 c2     | integer |           | plain   |              |
Has OIDs: no
Distribute By: REPLICATION
Location Nodes: ALL DATANODES
Options: orientation=row, compression=no

GBase 8c

interval 分区：根据间隔自动创建分区，例如：1 day、1 month。（主备式）

CREATE TABLE sales(prod_id NUMBER(6),cust_id NUMBER,time_id DATE,channel_id
CHAR(1),promo_id NUMBER(6),quantity_sold NUMBER(3),amount_sold NUMBER(10,2))
PARTITION BY RANGE (time_id) INTERVAL('1 day')
( PARTITION p1 VALUES LESS THAN ('2019-02-01 00:00:00'),
PARTITION p2 VALUES LESS THAN ('2019-02-02 00:00:00')
);

分区自动管理：根据间隔自动创建分区，例如：1 day、1 month。（分布式）

语法：建表时增加 WITH，或者通过 ALTER TABLE xxx SET (PERIOD=‘1 day’)来实现。（其中 PERIOD范围是 1 hour ~ 1 year）。通过ALTER TABLE xxx RESET (PERIOD)来关闭。
限制：仅支持RANGE分区的一级分区表（其他分区类型，以及带有二级分区的表不支持）效果：从当前命令时间（向下规约到hour）算起，连续增加能覆盖30个period时间范围的分区，每个分区大小为period。如果现有分区在待新增的分区范围内，则跳过此新增分区。且每隔period时间后再次执行此流程。

create table range_auto_1hour_tb01(id int,ip text,time timestamp)
with(PERIOD='1 hour') partition by range(time)( partition p1 values less than
('2022-11-23 14:00:00'));

-- 分区表
-- 一级 hash 分区
drop table if exists mea_hash cascade;
test=# create table mea_hash ( city_id int,logdate timestamp,id int )  partition by hash(id) ( partition p1 , partition p2 );
CREATE TABLE

--二级分区 hash-list,hash-hash, hash-range
drop table if exists mea_hash_list cascade;
test=# create table mea_hash_list ( city_id int,logdate timestamp,id int)  partition by hash(id) subpartition by list(city_id) ( partition p1  (subpartition p12 values (10),subpartition p13 values (20) ));
CREATE TABLE

drop table if exists mea_hash_hash cascade;
test=# create table mea_hash_hash ( city_id int,logdate timestamp,id int)  partition by hash(id) subpartition by hash(city_id) ( partition id_1  (subpartition p12 ,subpartition p13) );
CREATE TABLE

drop table if exists mea_hash_range cascade;
postgres=# create table mea_hash_range ( city_id int,logdate timestamp,id int)  partition by hash(id) subpartition by range(logdate) ( partition meas_y2021  (subpartition p12 values less than ('2021-02-04 12:00:00'),subpartition p13 values less than ('2021-02-04 20:00:00') ));
CREATE TABLE

-- 一级 range 分区

drop table if exists mea_range cascade;
postgres=# create table mea_range ( city_id int,logdate timestamp)  partition by range(logdate) ( partition meas_y2021 values less than  ('2021-01-01') );
CREATE TABLE

--二级 range-range, range-hash,range-list
drop table if exists mea_range_range cascade;
postgres=# create table mea_range_range ( city_id int,logdate timestamp,id int)  partition by range(logdate) subpartition  by range(id) ( partition meas_y2021 values less than ('2021-02-04 21:00:00') (subpartition p12 values less than (1),subpartition p13 values less than (10) ));
CREATE TABLE

drop table if exists mea_range_hash cascade;
postgres=# create table mea_range_hash ( city_id int,logdate timestamp,id int)  partition by range(logdate) subpartition by hash(city_id) ( partition id_1 values less than ('2021-02-01 01:00:00') (subpartition p12,subpartition p13) );
CREATE TABLE

drop table if exists mea_range_list cascade;
postgres=# create table mea_range_list ( city_id int,logdate timestamp,id int)  partition by range(logdate) subpartition by list(city_id) ( partition p1 values less than  ('2021-02-01 01:00:00') (subpartition p12 values (1),subpartition p13 values (20) ));
CREATE TABLE

-- 一级 list 分区
drop table if exists mea_list cascade;
postgres=# create table mea_list ( city_id int,logdate timestamp,id int )  partition by list(id) ( partition p1 values (1), partition p2 values (2) );
CREATE TABLE

--期望支持.成功执行

-- 二级 list-list,list-range,list-hash 分区
drop table if exists mea_list_list cascade;
postgres=# create table mea_list_list ( city_id int,logdate timestamp,id int)  partition by  list(id) subpartition by list(city_id) ( partition p1 values (1) (subpartition p12 values (10),subpartition p13 values (20) ));
CREATE TABLE

drop table if exists mea_list_range cascade;
postgres=# create table mea_list_range ( city_id int,logdate timestamp,id int)  partition by  list(id) subpartition by range(logdate) ( partition meas_y2021 values ('202102') (subpartition p12 values less than ('2021-02-04 12:00:00'),subpartition p13 values less than ('2021-02-04 20:00:00') ));
CREATE TABLE

drop table if exists mea_list_hash cascade;
postgres=# create table mea_list_hash ( city_id int,logdate timestamp,id int)  partition by  list(id) subpartition by hash(city_id) ( partition id_1 values (2021) (subpartition p12,subpartition p13) );
CREATE TABLE

------------数据库分区查看
postgres=# with RECURSIVE temp_partition as(
postgres(# SELECT t1.oid, t1.relname, partstrategy, boundaries,t1.parttype FROM pg_partition t1, pg_class t2 WHERE t1.parentid = t2.oid AND t2.relname = 'mea_hash_hash'
postgres(# union
postgres(# SELECT t1.oid, t1.relname, t1.partstrategy, t1.boundaries,t1.parttype FROM pg_partition t1 join temp_partition  t2 on t1.parentid = t2.oid where t2.parttype='p')
postgres-# select t1.relname, t1.partstrategy, t1.boundaries,t1.parttype  from temp_partition t1;
    relname    | partstrategy | boundaries | parttype
---------------+--------------+------------+----------
 mea_hash_hash | h            |            | r
 id_1          | h            | {0}        | p
 p12           | h            | {0}        | s
 p13           | h            | {1}        | s
(4 rows)

--- 自动分区功能，请注意：创建分表时，子分区必须要有一个 大于当前时间 2小时的 分区。时间需按照实际时间 进行修改。
postgres=# create table range_auto_1hour_tb01(id int,ip text,time timestamp) with(PERIOD='1 hour') partition by range(time)( partition p1 values less than ('2023-02-27 23:00'));
CREATE TABLE

postgres=# select * from dba_tab_partitions where table_name = 'RANGE_AUTO_1HOUR_TB01';
insert into range_auto_1hour_tb01 values(1,'12','2023-2-18 09:00:00');
insert into range_auto_1hour_tb01 values(2,'10','2023-2-18 10:00:00');
insert into range_auto_1hour_tb01 values(2,'10','2023-2-19 18:00:00');
insert into range_auto_1hour_tb01 values(2,'10','2023-2-19 19:00:00');
 table_owner |      table_name       |  partition_name  |     high_value      | high_value_length | partition_position | tablespace_name | logging | compression | segment_managed | num_rows | blocks | last_
analyzed
-------------+-----------------------+------------------+---------------------+-------------------+--------------------+-----------------+---------+-------------+-----------------+----------+--------+------
---------
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | P1               | 2023-02-27 23:00    |                16 |                  1 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022800 | 2023-02-28 00:00:00 |                19 |                  2 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022801 | 2023-02-28 01:00:00 |                19 |                  3 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022802 | 2023-02-28 02:00:00 |                19 |                  4 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022803 | 2023-02-28 03:00:00 |                19 |                  5 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022804 | 2023-02-28 04:00:00 |                19 |                  6 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022805 | 2023-02-28 05:00:00 |                19 |                  7 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022806 | 2023-02-28 06:00:00 |                19 |                  8 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022807 | 2023-02-28 07:00:00 |                19 |                  9 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022808 | 2023-02-28 08:00:00 |                19 |                 10 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022809 | 2023-02-28 09:00:00 |                19 |                 11 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022810 | 2023-02-28 10:00:00 |                19 |                 12 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022811 | 2023-02-28 11:00:00 |                19 |                 13 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022812 | 2023-02-28 12:00:00 |                19 |                 14 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022813 | 2023-02-28 13:00:00 |                19 |                 15 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022814 | 2023-02-28 14:00:00 |                19 |                 16 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
 PUBLIC      | RANGE_AUTO_1HOUR_TB01 | AUTO_P2023022815 | 2023-02-28 15:00:00 |                19 |                 17 | DEFAULT         | YES     | NO          | NO              |        0 |      0 |
……………………………………………………………………………………………………………………………………

索引类型

GBase 8c 提供了多种索引类型：hash，B-tree，gist，gin。每一种索引类型使用了一种不同的算法来适应不同类型的查询。

B-tree —— 适合所有的数据类型，支持排序，支持大于、小于、等于、大于或等于、小于或等于的搜索。
hash —— 索引存储的是被索引字段VALUE的哈希值，只支持等值查询。
Gist(Generalized Search Tree) —— 即通用搜索树。可处理地理数据、图像，空间，树形图等场景。
gin —— gin索引是“倒排索引”。应用领域是加速全文搜索。

默认情况下，create index 使用B-tree 索引并适合于大部分情况。

hash 索引：只能处理简单等值查询。
通过语法：create index … using hash(column) 。指定使用hash索引

B-tree 索引： btree索引常常用来进行例如大于、小于、等于这些操作。
通过语法：create index … using btree(column) 。指定使用btree索引。

gist 索引：
地理数据、图像：如果我们想要查询在某个地方是否存在某一点，即判断地理位置的"包含“。
对于空间数据，GiST索引可以使用 R树，以支持相对位置运算符（位于左侧，右侧，包含等）。
对于树形图，R树可以支持相交或包含运算符。
通过语法：create index … using gist(column) 。指定使用gist索引。

gin 索引：当需要搜索多值类型内的VALUE时，适合多值类型，例如数组、全文检索、TOKEN。（根据不同的类型，支持相交、包含、大于、在左边、在右边等搜索）。
通过语法：create index … using gin(column) 。指定使用 gin 索引。

--多种索引
--hash 索引
postgres=# create table rw_split(col int,name text);
CREATE TABLE
postgres=# insert into rw_split select generate_series(1,50000), md5(random()::text)::text;
INSERT 0 50000
postgres=# create index rw_split_col_hash on rw_split using hash(col);
CREATE INDEX
postgres=# explain select * from rw_split where col =2;
                                       QUERY PLAN
----------------------------------------------------------------------------------------
 Data Node Scan  (cost=0.00..0.00 rows=1000 width=36)
   Node/s: dn1
   ->  Bitmap Heap Scan on rw_split  (cost=5.22..197.48 rows=125 width=36)
         Recheck Cond: (col = 2)
         ->  Bitmap Index Scan on rw_split_col_hash  (cost=0.00..5.19 rows=125 width=0)
               Index Cond: (col = 2)
(6 rows)

postgres=#

-----------------btree 索引

drop index rw_split_col_hash;

postgres=# create index rw_split_col_btree on rw_split using btree(col);
CREATE INDEX
postgres=# explain select * from rw_split where col =2;
                                       QUERY PLAN
-----------------------------------------------------------------------------------------
 Data Node Scan  (cost=0.00..0.00 rows=1000 width=36)
   Node/s: dn1
   ->  Bitmap Heap Scan on rw_split  (cost=5.22..197.48 rows=125 width=36)
         Recheck Cond: (col = 2)
         ->  Bitmap Index Scan on rw_split_col_btree  (cost=0.00..5.19 rows=125 width=0)
               Index Cond: (col = 2)
(6 rows)

postgres=#

---------------gist 索引

drop table if exists t_gist;
postgres=# create table t_gist(id int,p point);
CREATE TABLE
postgres=# insert into t_gist select generate_series(1,10000),point(round((random()*1000)::numeric,2),round((random()*1000)::numeric,2));
INSERT 0 10000
postgres=# select * from t_gist limit 2;
 id |        p
----+-----------------
  1 | (947.99,748.17)
  2 | (410.15,357.92)
(2 rows)

postgres=# create index on t_gist using gist(p);
CREATE INDEX
postgres=# explain (analyze,buffers) select * from t_gist where circle '((100,100) 1)' @> p order by p <-> '(100,100)' limit 10;
                                                            QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=8.29..8.29 rows=1 width=20) (actual time=2.769..2.769 rows=0 loops=1)
   ->  Sort  (cost=8.29..8.29 rows=1 width=20) (actual time=2.767..2.767 rows=0 loops=1)
         Sort Key: ((p <-> '(100,100)'::point))
         Sort Method: quicksort  Memory: 33kB
         (Buffers: shared hit=3)
         ->  Streaming(type: GATHER)  (cost=0.00..8.28 rows=1 width=20) (actual time=2.723..2.723 rows=0 loops=1)
               Spawn on: All datanodes
               ->  Limit  (cost=0.00..8.27 rows=2 width=20) (Actual time: never executed)
                     ->  Index Scan using t_gist_p_idx on t_gist  (cost=0.00..8.27 rows=1 width=20) (Actual time: never executed)
                           Index Cond: ('<(100,100),1>'::circle @> p)
                           Order By: (p <-> '(100,100)'::point)
 Total runtime: 4.654 ms
(12 rows)

---在 100，100 点，半径10 以内的点。


---------------gin索引
drop table if exists t_gin;
create table t_gin(doc text, doc_tsv tsvector);
insert into t_gin(doc) values
  ('Can a sheet slitter slit sheets?'),
  ('How many sheets could a sheet slitter slit?'),
  ('I slit a sheet, a sheet I slit.'),
  ('Upon a slitted sheet I sit.'),
  ('Whoever slit the sheets is a good sheet slitter.'),
  ('I am a sheet slitter.'),
  ('I slit sheets.'),
  ('I am the sleekest sheet slitter that ever slit sheets.'),
  ('She slits the sheet she sits on.');
update t_gin set doc_tsv = to_tsvector(doc);
create index on t_gin using gin(doc_tsv);


postgres=# create table t_gin(doc text, doc_tsv tsvector);
CREATE TABLE
postgres=# insert into t_gin(doc) values
postgres-#   ('Can a sheet slitter slit sheets?'),
postgres-#   ('How many sheets could a sheet slitter slit?'),
postgres-#   ('I slit a sheet, a sheet I slit.'),
postgres-#   ('Upon a slitted sheet I sit.'),
postgres-#   ('Whoever slit the sheets is a good sheet slitter.'),
postgres-#   ('I am a sheet slitter.'),
postgres-#   ('I slit sheets.'),
postgres-#   ('I am the sleekest sheet slitter that ever slit sheets.'),
postgres-#   ('She slits the sheet she sits on.');
INSERT 0 9

postgres=# select * from t_gin;
                          doc                           | doc_tsv
--------------------------------------------------------+---------
 Can a sheet slitter slit sheets?                       |
 I slit sheets.                                         |
 How many sheets could a sheet slitter slit?            |
 I slit a sheet, a sheet I slit.                        |
 Upon a slitted sheet I sit.                            |
 Whoever slit the sheets is a good sheet slitter.       |
 I am a sheet slitter.                                  |
 I am the sleekest sheet slitter that ever slit sheets. |
 She slits the sheet she sits on.                       |
(9 rows)

postgres=# update t_gin set doc_tsv = to_tsvector(doc);
UPDATE 9
postgres=# select * from t_gin;
                          doc                           |                         doc_tsv
--------------------------------------------------------+---------------------------------------------------------
 How many sheets could a sheet slitter slit?            | 'could':4 'mani':2 'sheet':3,6 'slit':8 'slitter':7
 I slit a sheet, a sheet I slit.                        | 'sheet':4,6 'slit':2,8
 Upon a slitted sheet I sit.                            | 'sheet':4 'sit':6 'slit':3 'upon':1
 Whoever slit the sheets is a good sheet slitter.       | 'good':7 'sheet':4,8 'slit':2 'slitter':9 'whoever':1
 I am a sheet slitter.                                  | 'sheet':4 'slitter':5
 I am the sleekest sheet slitter that ever slit sheets. | 'ever':8 'sheet':5,10 'sleekest':4 'slit':9 'slitter':6
 She slits the sheet she sits on.                       | 'sheet':4 'sit':6 'slit':2
 Can a sheet slitter slit sheets?                       | 'sheet':3,6 'slit':5 'slitter':4
 I slit sheets.                                         | 'sheet':3 'slit':2
(9 rows)

postgres=# create index on t_gin using gin(doc_tsv);
CREATE INDEX


--- 为了效果，手动关闭顺序扫描。
postgres=# set enable_seqscan=off;
SET
postgres=#
postgres=# explain(costs off) select doc from t_gin where doc_tsv @@ to_tsquery('many & slitter');
                                QUERY PLAN
--------------------------------------------------------------------------
 Data Node Scan
   Node/s: All datanodes
   ->  Bitmap Heap Scan on t_gin
         Recheck Cond: (doc_tsv @@ '''mani'' & ''slitter'''::tsquery)
         ->  Bitmap Index Scan on t_gin_doc_tsv_idx
               Index Cond: (doc_tsv @@ '''mani'' & ''slitter'''::tsquery)
(6 rows)


--------------表达式索引

postgres=# create table test_expression(col int, name varchar(64));
CREATE TABLE
postgres=#
postgres=# insert into test_expression select 1,'ASDD';
insert into test_expression select 2,'ASDD';
insert into test_expression select 3,'AS';
insert into test_expression select 4,'ASsda';
insert into test_expression select 5,'ASdssa';
insert into test_expression select 6,'Asds';
insert into test_expression select 7,'Assa';
insert into test_expression select 8,'as';INSERT 0 1
postgres=# insert into test_expression select 2,'ASDD';
INSERT 0 1
postgres=# insert into test_expression select 3,'AS';
INSERT 0 1
postgres=# insert into test_expression select 4,'ASsda';
INSERT 0 1
postgres=# insert into test_expression select 5,'ASdssa';
INSERT 0 1
postgres=# insert into test_expression select 6,'Asds';
INSERT 0 1
postgres=# insert into test_expression select 7,'Assa';
INSERT 0 1
postgres=# insert into test_expression select 8,'as';
INSERT 0 1



postgres=# explain select * from test_expression where lower(name) ='as';
                                         QUERY PLAN
---------------------------------------------------------------------------------------------
 Data Node Scan  (cost=0.00..0.00 rows=1000 width=150)
   Node/s: All datanodes
   ->  Seq Scan on test_expression  (cost=10000000000.00..1000000001681.00 rows=2 width=150)
         Filter: (lower((name)::text) = 'as'::text)
(4 rows)

postgres=# create index on test_expression (name);
CREATE INDEX

postgres=# create index test_expression_lower on test_expression (lower(name));
CREATE INDEX

postgres=# explain select * from test_expression where lower(name) ='as';
                                             QUERY PLAN
-----------------------------------------------------------------------------------------------------
 Data Node Scan  (cost=0.00..0.00 rows=1000 width=150)
   Node/s: All datanodes
   ->  [Bypass]
       Index Scan using test_expression_lower on test_expression  (cost=0.00..8.27 rows=1 width=150)
         Index Cond: (lower((name)::text) = 'as'::text)
(5 rows)

数据压缩

行存表 创建表时通过关键字 compresstype，可实现数据压缩，减少磁盘的占用。

create table test_row_compress(col text) with ( compresstype = 2, COMPRESS_LEVEL= 3);

COMPRESSTYPE
行存表参数，设置行存表压缩算法。1代表pglz算法，2代表zstd算法，默认不压缩。（仅支持ASTORE下的普通表）

取值范围：0~2，默认值为0。

COMPRESS_LEVEL

行存表参数，设置行存表压缩算法等级，仅当COMPRESS_TYPE为2时生效。压缩等级越高，表的压缩效果越好，表的访问速度越慢。（仅支持ASTORE下的普通表）

取值范围：-31~31，默认值为0。

列存表 创建表时通过关键字 compression，可实现数据压缩，减少磁盘的占用。

-- 示例
create table test_compress(col text) with (orientation=column,compression=high);

COMPRESSION

指定表数据的压缩级别，它决定了表数据的压缩比以及压缩时间。一般来讲，压缩级别越高，压缩比也越大，压缩时间也越长；反之亦然。实际压缩比取决于加载的表数据的分布特征。行存表不支持压缩。

取值范围：
列存表的有效值为YES/NO/LOW/MIDDLE/HIGH，默认值为LOW。

---- 行存压缩
drop table  if exists test_row_compress;
postgres=# create table test_row_compress(col text);
CREATE TABLE

postgres=# insert into test_row_compress select md5(random()::text) from generate_series(1,1000000);
INSERT 0 1000000

postgres=# select * from pg_size_pretty(pg_catalog.pg_table_size('test_row_compress'));
 pg_size_pretty
----------------
 66 MB
(1 row)

postgres=# drop table  if exists test_row_compress;
DROP TABLE
postgres=#
postgres=# create table test_row_compress(col text) with (compresstype = 2,compress_level= 3);
CREATE TABLE
postgres=#
postgres=# insert into test_row_compress select md5(random()::text) from generate_series(1,1000000);
INSERT 0 1000000
postgres=#
postgres=# select * from pg_size_pretty(pg_catalog.pg_table_size('test_row_compress'));
 pg_size_pretty
----------------
 29 MB
(1 row)

postgres=#

---- 列存压缩
postgres=# drop table test_column_compress;
ERROR:  table "test_column_compress" does not exist
postgres=#
postgres=# create table test_column_compress(col text) with (orientation=column,compression=no);
CREATE TABLE
postgres=#
postgres=# insert into test_column_compress select md5(random()::text) from generate_series(1,1000000);
INSERT 0 1000000
postgres=#
postgres=# select * from pg_size_pretty(pg_catalog.pg_table_size('test_column_compress'));
 pg_size_pretty
----------------
 34 MB
(1 row)



postgres=# drop table test_column_compress;
DROP TABLE
postgres=#
postgres=# create table test_column_compress(col text) with (orientation=column,compression=high);
CREATE TABLE
postgres=#
postgres=# insert into test_column_compress select md5(random()::text) from generate_series(1,1000000);

INSERT 0 1000000
postgres=#
postgres=#
postgres=# select * from pg_size_pretty(pg_catalog.pg_table_size('test_column_compress'));
 pg_size_pretty
----------------
 19 MB
(1 row)

子事务

子事务允许你回滚部分已经事务中完成的工作。可通过关键字 SAVEPOINT,EXCEPTION，Autonomous Transaction 启动子事务。

自治事务

自治事务（Autonomous Transaction）,在主事务执行过程中新启的独立的事务。自治事务的提交和回滚不会影响已提交的数据，同时自治事务也不受主事务的影响。

自治事务在存储过程，函数和匿名块中定义，用 PARAGMA AUTONOMOUS_TRANSACTION 关键字来声明。

----------------------子事务
-----自治事务

postgres=# create table t2(a int,b int); insert into t2 values(1,2); select * from t2;
CREATE TABLE
INSERT 0 1
 a | b
---+---
 1 | 2
(1 row)

postgres=# create or replace procedure autonomus_4(a int ,b int) as
postgres$# declare
postgres$# num3 int =a; num4 int =b;
postgres$# pragma autonomous_transaction;
postgres$# begin insert into t2 values (num3,num4); end;
postgres$# /
CREATE PROCEDURE
postgres=#
postgres=#
postgres=# create or replace procedure autonomus_5(a int ,b int) as
postgres$# declare
postgres$# begin insert into t2 values (444,55);autonomus_4(a,b);rollback; end;
postgres$# /
CREATE PROCEDURE
postgres=#
postgres=# select autonomus_5(11,22);
 autonomus_5
-------------

(1 row)

postgres=# select * from t2;
 a  | b
----+----
  1 |  2
 11 | 22
(2 rows)

-----------savepoint

--设置保存点
--创建表
postgres=# drop table if exists savepoint_test;
NOTICE:  table "savepoint_test" does not exist, skipping
DROP TABLE
postgres=# create table savepoint_test(a int primary key,b int) ;
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "savepoint_test_pkey" for table "savepoint_test"
CREATE TABLE

--开始事务
postgres=# start transaction;
START TRANSACTION

--插入数据
postgres=# insert into savepoint_test values(1,2);
INSERT 0 1

--创建保存点
postgres=# savepoint test_savepoint;
SAVEPOINT

--插入数据
postgres=# insert into savepoint_test values(2,2);
INSERT 0 1

--查看表中数据
postgres=# select * from savepoint_test;
 a | b
---+---
 1 | 2
 2 | 2
(2 rows)

--回滚保存点
postgres=# rollback to savepoint test_savepoint;
ROLLBACK

--查看表中数据(只能查到(1,1)，(2,2)被回滚)
postgres=# select * from savepoint_test;
 a | b
---+---
 1 | 2
(1 row)

--插入数据
postgres=# insert into savepoint_test values(3,3);
INSERT 0 1

--提交事务
postgres=# commit;
COMMIT

--查看表中数据(=能查到(1,1)、(3,3)，(2,2)被回滚)
postgres=# select * from savepoint_test;
 a | b
---+---
 1 | 2
 3 | 3
(2 rows)

--删除测试表savepoint_test
postgres=# drop table savepoint_test;
DROP TABLE

--------exception

create type type_test as (a int,b int);
create or replace procedure p1
as
c int;
begin
	select a into c from type_test;
exception
	when others then 
	raise 'NULL' ;
end;
/
call p1();


postgres=# create type type_test as (a int,b int);
CREATE TYPE
postgres=#
postgres=# create or replace procedure p1
postgres-# as
postgres$# c int;
postgres$# begin
postgres$# select a into c from type_test;
postgres$# exception
postgres$# when others then
postgres$# raise 'NULL' ;
postgres$# end;
postgres$# /
CREATE PROCEDURE
postgres=#
postgres=# call p1();
ERROR:  NULL