postgresql 中测试VM对INDEX ONLY SCAN的影响

原创范计杰云和恩墨交付战队 2021-11-17

577

概述

使用过ORACLE的都知道，当SQL查询的字段、条件字段都在INDEX中可以找到时，SQL执行过程可以不用回表，大大提高查询效率。
postgresql中也有这样的机制，但有所不同。
由于postgresql index中不保存事物信息，因此postgresql 中的仅仅通过INDEX不能判断数据可见性。
postgresql 中的 INDEX ONLY SCAN，扫描的index tuple需要通过VM去判断可见性，当发现扫描的tuple所在的heap block 有dead tuple时，需要回表扫描来判断事务可见性。 INDEX ONLY SCAN并不是仅仅字面上的只通过INDEX查找数据。

VM是用BIT位标识page中是否有dead tuple的，占用空间小，判断可见性时效率很高。

实验记录

test=# prepare p(varchar(20)) as select name,setting,reset_val from pg_settings where name like '%'||$1||'%';
PREPARE
test=# execute p('scan');
             name | setting | reset_val 
------------------------------+---------+-----------
 enable_bitmapscan | on | on
 enable_indexonlyscan | on | on
 enable_indexscan | on | on
 enable_seqscan | on | on
 enable_tidscan | on | on
 min_parallel_index_scan_size | 64 | 64
 min_parallel_table_scan_size | 1024 | 1024
 synchronize_seqscans | on | on
(8 rows)

test=# create extension pageinspect;
CREATE EXTENSION

create table tab4 (id int,s varchar(100));
create index idx_tab4 on tab4(id);
insert into tab4 select generate_series,'a' from generate_series(1,400);



select substring(ctid::text from '\(([0-9]+),[0-9]+\)')as page,min(id),max(id) from tab4 group by substring(ctid::text from '\(([0-9]+),[0-9]+\)');

 page | min | max 
------+-----+-----
 0 | 1 | 226
 1 | 227 | 400
 

test=# select *from get_raw_page('tab4','vm',0) ;
ERROR: could not open file "base/16447/54036_vm": No such file or directory
 
 vacuum tab4;

select substr(get_raw_page,1,100) from get_raw_page('tab4','vm',0) ;

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 \x18000000b8e3de860000000018000020002004200000000005000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
(1 row)

test=# explain analyze select count(id) from tab4 where id between 100 and 300;
                                                          QUERY PLAN                                                           
-------------------------------------------------------------------------------------------------------------------------------
 Aggregate (cost=12.65..12.66 rows=1 width=8) (actual time=0.066..0.066 rows=1 loops=1)
   -> Index Only Scan using idx_tab4 on tab4 (cost=0.15..12.15 rows=200 width=4) (actual time=0.027..0.044 rows=201 loops=1)
         Index Cond: ((id >= 100) AND (id <= 300))
         Heap Fetches: 0 <<<
 Planning time: 0.126 ms
 Execution time: 0.101 ms
(6 rows)

delete from tab4 where id=110; 

select substr(get_raw_page,1,100) from get_raw_page('tab4','vm',0) ;
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 \x1800000028ffb3870000000018000020002004200000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
(1 row)

test=# explain analyze select count(id) from tab4 where id between 100 and 300;
                                                          QUERY PLAN                                                           
-------------------------------------------------------------------------------------------------------------------------------
 Aggregate (cost=12.65..12.66 rows=1 width=8) (actual time=0.102..0.102 rows=1 loops=1)
   -> Index Only Scan using idx_tab4 on tab4 (cost=0.15..12.15 rows=200 width=4) (actual time=0.051..0.079 rows=200 loops=1)
         Index Cond: ((id >= 100) AND (id <= 300))
         Heap Fetches: 127 <<< 226-99=127
 Planning time: 0.092 ms
 Execution time: 0.136 ms
(6 rows)

delete from tab4 where id=250; 

select substr(get_raw_page,1,100) from get_raw_page('tab4','vm',0) ;

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 \x1800000028ffb3870000000018000020002004200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
(1 row)


test=# explain analyze select count(id) from tab4 where id between 100 and 300;
                                                          QUERY PLAN                                                           
-------------------------------------------------------------------------------------------------------------------------------
 Aggregate (cost=12.65..12.66 rows=1 width=8) (actual time=0.096..0.096 rows=1 loops=1)
   -> Index Only Scan using idx_tab4 on tab4 (cost=0.15..12.15 rows=200 width=4) (actual time=0.028..0.073 rows=199 loops=1)
         Index Cond: ((id >= 100) AND (id <= 300))
         Heap Fetches: 200 <<<<<
 Planning time: 0.089 ms
 Execution time: 0.141 ms
(6 rows)

vacuum tab4;

select substr(get_raw_page,1,100) from get_raw_page('tab4','vm',0) ;

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 \x1800000060185a880000000018000020002004200000000005000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
(1 row)


select '0 no delete' as t,x'05'::bit(8)
union 
select '1 delete id=110' as t,x'04'::bit(8)
union
select '2 delete id=250' as t,x'00'::bit(8);

        t | bit    
-----------------+----------
 0 no delete | 00000101
 1 delete id=110 | 00000100
 2 delete id=250 | 00000000
(3 rows)


/* Number of bits for one heap page */
#define BITS_PER_HEAPBLOCK 2

/* Flags for bit map */
#define VISIBILITYMAP_ALL_VISIBLE	0x01
#define VISIBILITYMAP_ALL_FROZEN	0x02
#define VISIBILITYMAP_VALID_BITS	0x03	/* OR of all valid visibilitymap
											 * flags bits */
复制