Re: POC, WIP: OR-clause support for indexes

From: Alena Rybakina <a(dot)rybakina(at)postgrespro(dot)ru>
To: pgsql-hackers(at)postgresql(dot)org
Cc: Robert Haas <robertmhaas(at)gmail(dot)com>, Peter Geoghegan <pg(at)bowt(dot)ie>, Alexander Korotkov <aekorotkov(at)gmail(dot)com>, jian he <jian(dot)universality(at)gmail(dot)com>, Marcos Pegoraro <marcos(at)f10(dot)com(dot)br>, teodor(at)sigaev(dot)ru, Andrei Lepikhov <a(dot)lepikhov(at)postgrespro(dot)ru>, Tomas Vondra <tomas(dot)vondra(at)enterprisedb(dot)com>, Peter Eisentraut <peter(at)eisentraut(dot)org>, Ranier Vilela <ranier(dot)vf(at)gmail(dot)com>
Subject: Re: POC, WIP: OR-clause support for indexes
Date: 2024-07-10 23:30:22
Message-ID: a1eaf12e-d774-495b-9173-fee669841ddf@postgrespro.ru
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

On 27.06.2024 23:06, Alena Rybakina wrote:
> Tobe honest,I've alreadystartedwritingcodetodothis,butI'm facedwitha
> misunderstandingof howto correctlycreatea
> conditionfor"OR"expressionsthatare notsubjectto transformation.
>>
>> For example,the expressions b=1in the query below:
>>
>> alena(at)postgres=# explain select * from x where ( (a =5 or a=4) and a
>> = ANY(ARRAY[5,4])) or (b=1); QUERY PLAN
>> ----------------------------------------------------------------------------------
>> Seq Scan on x (cost=0.00..123.00 rows=1 width=8) Filter: ((((a = 5)
>> OR (a = 4)) AND (a = ANY ('{5,4}'::integer[]))) OR (b = 1)) (2 rows)
>>
>> I see that two expressions have remained unchanged and it only works
>> for "AND" binary operations.
>>
>> But I think it might be worth applying this together, where does the
>> optimizer generate indexes (build_paths_for_OR function)?
>>
Iimplementedsuchcode,butatthe
analysisstageinplanner,anditwasn'tfullyreadyyet,butIwas ableto
drawsomeimportantconclusions.Firstof all,Ifacedtheproblemof the
inequalityof the numberof columnsinthe expressionwiththe
requiredone,atleastsomeextracolumnappeared,judgingby the
crust.Ihaven'tfullyrealizedityet andhaven'tfixedit.

#0  __pthread_kill_implementation (no_tid=0, signo=6,
threadid=134300960061248)
    at ./nptl/pthread_kill.c:44
#1  __pthread_kill_internal (signo=6, threadid=134300960061248) at
./nptl/pthread_kill.c:78
#2  __GI___pthread_kill (threadid=134300960061248, signo=signo(at)entry=6)
at ./nptl/pthread_kill.c:89
#3  0x00007a2560042476 in __GI_raise (sig=sig(at)entry=6) at
../sysdeps/posix/raise.c:26
#4  0x00007a25600287f3 in __GI_abort () at ./stdlib/abort.c:79
#5  0x00005573f9df62a8 in ExceptionalCondition (
    conditionName=0x5573f9fec4c8
"AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
!bms_is_member(attnum, clauses_attnums)", fileName=0x5573f9fec11c
"dependencies.c", lineNumber=1525) at assert.c:66
#6  0x00005573f9b8b85f in dependencies_clauselist_selectivity
(root=0x5573fad534e8,
    clauses=0x5573fad0b2d8, varRelid=0, jointype=JOIN_INNER,
sjinfo=0x0, rel=0x5573fad54b38,
    estimatedclauses=0x7ffe2e43f178) at dependencies.c:1525
#7  0x00005573f9b8fed9 in statext_clauselist_selectivity
(root=0x5573fad534e8, clauses=0x5573fad0b2d8,
    varRelid=0, jointype=JOIN_INNER, sjinfo=0x0, rel=0x5573fad54b38,
estimatedclauses=0x7ffe2e43f178,
    is_or=false) at extended_stats.c:2035
--Type <RET> for more, q to quit, c to continue without paging--
#8  0x00005573f9a57f88 in clauselist_selectivity_ext
(root=0x5573fad534e8, clauses=0x5573fad0b2d8,
    varRelid=0, jointype=JOIN_INNER, sjinfo=0x0,
use_extended_stats=true) at clausesel.c:153
#9  0x00005573f9a57e30 in clauselist_selectivity (root=0x5573fad534e8,
clauses=0x5573fad0b2d8,
    varRelid=0, jointype=JOIN_INNER, sjinfo=0x0) at clausesel.c:106
#10 0x00005573f9a62e03 in set_baserel_size_estimates
(root=0x5573fad534e8, rel=0x5573fad54b38)
    at costsize.c:5247
#11 0x00005573f9a51aa5 in set_plain_rel_size (root=0x5573fad534e8,
rel=0x5573fad54b38,
    rte=0x5573fad0ec58) at allpaths.c:581
#12 0x00005573f9a516ce in set_rel_size (root=0x5573fad534e8,
rel=0x5573fad54b38, rti=1,
    rte=0x5573fad0ec58) at allpaths.c:411
#13 0x00005573f9a514c7 in set_base_rel_sizes (root=0x5573fad534e8) at
allpaths.c:322
#14 0x00005573f9a5119d in make_one_rel (root=0x5573fad534e8,
joinlist=0x5573fad0adf8) at allpaths.c:183
#15 0x00005573f9a94d45 in query_planner (root=0x5573fad534e8,
    qp_callback=0x5573f9a9b59e <standard_qp_callback>,
qp_extra=0x7ffe2e43f540) at planmain.c:280
#16 0x00005573f9a977a8 in grouping_planner (root=0x5573fad534e8,
tuple_fraction=0, setops=0x0)
    at planner.c:1520
#17 0x00005573f9a96e47 in subquery_planner (glob=0x5573fad533d8,
parse=0x5573fad0ea48, parent_root=0x0,
    hasRecursion=false, tuple_fraction=0, setops=0x0) at planner.c:1089
#18 0x00005573f9a954aa in standard_planner (parse=0x5573fad0ea48,
    query_string=0x5573fad8b3b0 "explain analyze SELECT * FROM
functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND
upper(b) = '1'", cursorOptions=2048, boundParams=0x0) at planner.c:415
#19 0x00005573f9a951d4 in planner (parse=0x5573fad0ea48,
--Type <RET> for more, q to quit, c to continue without paging--
    query_string=0x5573fad8b3b0 "explain analyze SELECT * FROM
functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND
upper(b) = '1'", cursorOptions=2048, boundParams=0x0) at planner.c:282
#20 0x00005573f9bf4e2e in pg_plan_query (querytree=0x5573fad0ea48,
    query_string=0x5573fad8b3b0 "explain analyze SELECT * FROM
functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND
upper(b) = '1'", cursorOptions=2048, boundParams=0x0) at postgres.c:904
#21 0x00005573f98613e7 in standard_ExplainOneQuery
(query=0x5573fad0ea48, cursorOptions=2048, into=0x0,
    es=0x5573fad57da0,
    queryString=0x5573fad8b3b0 "explain analyze SELECT * FROM
functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND
upper(b) = '1'", params=0x0, queryEnv=0x0) at explain.c:489
#22 0x00005573f9861205 in ExplainOneQuery (query=0x5573fad0ea48,
cursorOptions=2048, into=0x0,
    es=0x5573fad57da0,
    queryString=0x5573fad8b3b0 "explain analyze SELECT * FROM
functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND
upper(b) = '1'", params=0x0, queryEnv=0x0) at explain.c:445
#23 0x00005573f9860e35 in ExplainQuery (pstate=0x5573fad57c90,
stmt=0x5573fad8b5a0, params=0x0,
    dest=0x5573fad57c00) at explain.c:341
#24 0x00005573f9bff3a8 in standard_ProcessUtility (pstmt=0x5573fad8b490,
    queryString=0x5573fad8b3b0 "explain analyze SELECT * FROM
functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND
upper(b) = '1'", readOnlyTree=false, context=PROCESS_UTILITY_QUERY,
params=0x0,
    queryEnv=0x0, dest=0x5573fad57c00, qc=0x7ffe2e43fcd0) at utility.c:863
#25 0x00005573f9bfe91a in ProcessUtility (pstmt=0x5573fad8b490,
    queryString=0x5573fad8b3b0 "explain analyze SELECT * FROM
functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND
upper(b) = '1'", readOnlyTree=false, context=PROCESS_UTILITY_QUERY,
params=0x0,
--Type <RET> for more, q to quit, c to continue without paging--
    queryEnv=0x0, dest=0x5573fad57c00, qc=0x7ffe2e43fcd0) at utility.c:523
#26 0x00005573f9bfd195 in PortalRunUtility (portal=0x5573fac6bcf0,
pstmt=0x5573fad8b490,
    isTopLevel=false, setHoldSnapshot=true, dest=0x5573fad57c00,
qc=0x7ffe2e43fcd0) at pquery.c:1158
#27 0x00005573f9bfced2 in FillPortalStore (portal=0x5573fac6bcf0,
isTopLevel=false) at pquery.c:1031
#28 0x00005573f9bfd778 in PortalRunFetch (portal=0x5573fac6bcf0,
fdirection=FETCH_FORWARD, count=10,
    dest=0x5573fa1d6880 <spi_printtupDR>) at pquery.c:1442
#29 0x00005573f9992675 in _SPI_cursor_operation (portal=0x5573fac6bcf0,
direction=FETCH_FORWARD,
    count=10, dest=0x5573fa1d6880 <spi_printtupDR>) at spi.c:3019
#30 0x00005573f9990849 in SPI_cursor_fetch (portal=0x5573fac6bcf0,
forward=true, count=10) at spi.c:1805
#31 0x00007a25603e0aa5 in exec_for_query (estate=0x7ffe2e440200,
stmt=0x5573fad067c8,
    portal=0x5573fac6bcf0, prefetch_ok=true) at pl_exec.c:5889
#32 0x00007a25603de728 in exec_stmt_dynfors (estate=0x7ffe2e440200,
stmt=0x5573fad067c8)
    at pl_exec.c:4647
#33 0x00007a25603d8b1c in exec_stmts (estate=0x7ffe2e440200,
stmts=0x5573fad06ec8) at pl_exec.c:2100
#34 0x00007a25603d8697 in exec_stmt_block (estate=0x7ffe2e440200,
block=0x5573fad06f18) at pl_exec.c:1943
#35 0x00007a25603d7d9e in exec_toplevel_block (estate=0x7ffe2e440200,
block=0x5573fad06f18)
    at pl_exec.c:1634
#36 0x00007a25603d5a2e in plpgsql_exec_function (func=0x5573fac2c1e0,
fcinfo=0x5573fad2af60,
    simple_eval_estate=0x0, simple_eval_resowner=0x0,
procedure_resowner=0x0, atomic=true)
    at pl_exec.c:623
#37 0x00007a25603f277f in plpgsql_call_handler (fcinfo=0x5573fad2af60)
at pl_handler.c:277
#38 0x00005573f993589a in ExecMakeTableFunctionResult
(setexpr=0x5573facfd8c8, econtext=0x5573facfd798,
--Type <RET> for more, q to quit, c to continue without paging--
    argContext=0x5573fad2ae60, expectedDesc=0x5573facfe130,
randomAccess=false) at execSRF.c:234
#39 0x00005573f995299c in FunctionNext (node=0x5573facfd588) at
nodeFunctionscan.c:94
#40 0x00005573f993735f in ExecScanFetch (node=0x5573facfd588,
accessMtd=0x5573f99528e6 <FunctionNext>,
    recheckMtd=0x5573f9952ced <FunctionRecheck>) at execScan.c:131
#41 0x00005573f99373d8 in ExecScan (node=0x5573facfd588,
accessMtd=0x5573f99528e6 <FunctionNext>,
    recheckMtd=0x5573f9952ced <FunctionRecheck>) at execScan.c:180
#42 0x00005573f9952d46 in ExecFunctionScan (pstate=0x5573facfd588) at
nodeFunctionscan.c:269
#43 0x00005573f9932c7f in ExecProcNodeFirst (node=0x5573facfd588) at
execProcnode.c:464
#44 0x00005573f9925df5 in ExecProcNode (node=0x5573facfd588)
    at ../../../src/include/executor/executor.h:274
#45 0x00005573f9928bf9 in ExecutePlan (estate=0x5573facfd360,
planstate=0x5573facfd588,
    use_parallel_mode=false, operation=CMD_SELECT, sendTuples=true,
numberTuples=0,
    direction=ForwardScanDirection, dest=0x5573fad8f6e0,
execute_once=true) at execMain.c:1646
#46 0x00005573f992653d in standard_ExecutorRun (queryDesc=0x5573fad87f70,
    direction=ForwardScanDirection, count=0, execute_once=true) at
execMain.c:363
#47 0x00005573f9926316 in ExecutorRun (queryDesc=0x5573fad87f70,
direction=ForwardScanDirection,
    count=0, execute_once=true) at execMain.c:304
#48 0x00005573f9bfcb7d in PortalRunSelect (portal=0x5573fac6bbe0,
forward=true, count=0,
    dest=0x5573fad8f6e0) at pquery.c:924
#49 0x00005573f9bfc7a5 in PortalRun (portal=0x5573fac6bbe0,
count=9223372036854775807, isTopLevel=true,
    run_once=true, dest=0x5573fad8f6e0, altdest=0x5573fad8f6e0,
qc=0x7ffe2e440a60) at pquery.c:768
#50 0x00005573f9bf5512 in exec_simple_query (
--Type <RET> for more, q to quit, c to continue without paging--
    query_string=0x5573fabea030 "SELECT * FROM
check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a *
2) = 2 OR (a * 2) = 102) AND upper(b) = ''1''');") at postgres.c:1274
#51 0x00005573f9bfa5b7 in PostgresMain (dbname=0x5573fab52240 "regression",
    username=0x5573fac27c98 "alena") at postgres.c:4680
#52 0x00005573f9bf137e in BackendMain (startup_data=0x7ffe2e440ce4 "",
startup_data_len=4)
    at backend_startup.c:105
#53 0x00005573f9b06852 in postmaster_child_launch (child_type=B_BACKEND,
startup_data=0x7ffe2e440ce4 "",
    startup_data_len=4, client_sock=0x7ffe2e440d30) at launch_backend.c:265
#54 0x00005573f9b0cd66 in BackendStartup (client_sock=0x7ffe2e440d30) at
postmaster.c:3593
#55 0x00005573f9b09db1 in ServerLoop () at postmaster.c:1674
#56 0x00005573f9b09678 in PostmasterMain (argc=8, argv=0x5573fab500d0)
at postmaster.c:1372
#57 0x00005573f99b5f79 in main (argc=8, argv=0x5573fab500d0) at main.c:197

Secondly,Isawdiffchangesinqueriesthatshowedcaseswherethe optimizerdid
noteliminateoneofthe redundantexpressionsandprocessedbothof
them.Thisindicatestheproblemthatthe optimizerhas notlearnedhow to
handleitinallcases.IthinkI'll needtoaddsomecodetohandleit.

 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM tenk1
   WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous <
2) OR thousand = 41;
-                                                         QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Aggregate
    ->  Bitmap Heap Scan on tenk1
-         Recheck Cond: (((hundred = 42) AND ((thousand = ANY
('{42,99}'::integer[])) OR (tenthous < 2))) OR (thousand = 41))
+         Recheck Cond: (((((thousand = 42) AND (thousand = ANY
('{42,99}'::integer[]))) OR ((thousand = 99) AND (thousand = ANY
('{42,99}'::integer[])))) OR (tenthous < 2)) OR (thousand = 41))
+         Filter: (((hundred = 42) AND ((((thousand = 42) OR (thousand =
99)) AND (thousand = ANY ('{42,99}'::integer[]))) OR (tenthous < 2))) OR
(thousand = 41))
          ->  BitmapOr
-               ->  BitmapAnd
-                     ->  Bitmap Index Scan on tenk1_hundred
-                           Index Cond: (hundred = 42)
+               ->  BitmapOr
                      ->  BitmapOr
                            ->  Bitmap Index Scan on tenk1_thous_tenthous
-                                 Index Cond: (thousand = ANY
('{42,99}'::integer[]))
+                                 Index Cond: ((thousand = 42) AND
(thousand = ANY ('{42,99}'::integer[])))
                            ->  Bitmap Index Scan on tenk1_thous_tenthous
-                                 Index Cond: (tenthous < 2)
+                                 Index Cond: ((thousand = 99) AND
(thousand = ANY ('{42,99}'::integer[])))
+                     ->  Bitmap Index Scan on tenk1_thous_tenthous
+                           Index Cond: (tenthous < 2)
                ->  Bitmap Index Scan on tenk1_thous_tenthous
                      Index Cond: (thousand = 41)
-(14 rows)
+(15 rows)

 SELECT count(*) FROM tenk1
   WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous <
2) OR thousand = 41;
@@ -1986,20 +1987,21 @@
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM tenk1
   WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand =
99 AND tenthous = 2);
-                                                          QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Aggregate
    ->  Bitmap Heap Scan on tenk1
-         Recheck Cond: ((hundred = 42) AND ((thousand = ANY
('{41,42}'::integer[])) OR ((thousand = 99) AND (tenthous = 2))))
-         ->  BitmapAnd
-               ->  Bitmap Index Scan on tenk1_hundred
-                     Index Cond: (hundred = 42)
+         Recheck Cond: ((((thousand = 42) AND (thousand = ANY
('{41,42}'::integer[]))) OR ((thousand = 41) AND (thousand = ANY
('{41,42}'::integer[])))) OR ((thousand = 99) AND (tenthous = 2)))
+         Filter: (hundred = 42)
+         ->  BitmapOr
                ->  BitmapOr
                      ->  Bitmap Index Scan on tenk1_thous_tenthous
-                           Index Cond: (thousand = ANY
('{41,42}'::integer[]))
+                           Index Cond: ((thousand = 42) AND (thousand =
ANY ('{41,42}'::integer[])))
                      ->  Bitmap Index Scan on tenk1_thous_tenthous
-                           Index Cond: ((thousand = 99) AND (tenthous = 2))
-(11 rows)
+                           Index Cond: ((thousand = 41) AND (thousand =
ANY ('{41,42}'::integer[])))
+               ->  Bitmap Index Scan on tenk1_thous_tenthous
+                     Index Cond: ((thousand = 99) AND (tenthous = 2))
+(12 rows)

 SELECT count(*) FROM tenk1
   WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand =
99 AND tenthous = 2);
diff -U3 /home/alena/postgrespro5/src/test/regress/expected/inherit.out
/home/alena/postgrespro5/src/test/regress/results/inherit.out
--- /home/alena/postgrespro5/src/test/regress/expected/inherit.out
2024-06-20 12:28:52.324011724 +0300
+++ /home/alena/postgrespro5/src/test/regress/results/inherit.out
2024-07-11 02:00:55.404006843 +0300
@@ -2126,7 +2126,7 @@
                                    QUERY PLAN
 ---------------------------------------------------------------------------------
  Seq Scan on part_ab_cd list_parted
-   Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY
('{NULL,cd}'::text[])))
+   Filter: (((a)::text = ANY ('{NULL,cd}'::text[])) OR ((a)::text =
'ab'::text))
 (2 rows)

 explain (costs off) select * from list_parted where a = 'ab';
diff -U3 /home/alena/postgrespro5/src/test/regress/expected/join.out
/home/alena/postgrespro5/src/test/regress/results/join.out
--- /home/alena/postgrespro5/src/test/regress/expected/join.out
2024-06-28 11:05:44.304135987 +0300
+++ /home/alena/postgrespro5/src/test/regress/results/join.out
2024-07-11 02:00:58.152006921 +0300
@@ -4210,10 +4210,17 @@
 select * from tenk1 a join tenk1 b on
   (a.unique1 = 1 and b.unique1 = 2) or
   ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
-                                                       QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------
  Nested Loop
-   Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 =
ANY ('{3,7}'::integer[])) AND (b.hundred = 4)))
+   Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2
= 3) OR (a.unique2 = 7)) AND (a.unique2 = ANY ('{3,7}'::integer[])) AND
(b.hundred = 4)))
+   ->  Bitmap Heap Scan on tenk1 a
+         Recheck Cond: ((unique1 = 1) OR (unique2 = ANY
('{3,7}'::integer[])))
+         ->  BitmapOr
+               ->  Bitmap Index Scan on tenk1_unique1
+                     Index Cond: (unique1 = 1)
+               ->  Bitmap Index Scan on tenk1_unique2
+                     Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
    ->  Bitmap Heap Scan on tenk1 b
          Recheck Cond: ((unique1 = 2) OR (hundred = 4))
          ->  BitmapOr
@@ -4221,25 +4228,24 @@
                      Index Cond: (unique1 = 2)
                ->  Bitmap Index Scan on tenk1_hundred
                      Index Cond: (hundred = 4)
-   ->  Materialize
-         ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique1 = 1) OR (unique2 = ANY
('{3,7}'::integer[])))
-               ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = 1)
-                     ->  Bitmap Index Scan on tenk1_unique2
-                           Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-(17 rows)
+(16 rows)

 SET enable_or_transformation = on;
 explain (costs off)
 select * from tenk1 a join tenk1 b on
   (a.unique1 = 1 and b.unique1 = 2) or
   ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
-                                                       QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------
  Nested Loop
-   Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 =
ANY ('{3,7}'::integer[])) AND (b.hundred = 4)))
+   Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2
= 3) OR (a.unique2 = 7)) AND (a.unique2 = ANY ('{3,7}'::integer[])) AND
(b.hundred = 4)))
+   ->  Bitmap Heap Scan on tenk1 a
+         Recheck Cond: ((unique1 = 1) OR (unique2 = ANY
('{3,7}'::integer[])))
+         ->  BitmapOr
+               ->  Bitmap Index Scan on tenk1_unique1
+                     Index Cond: (unique1 = 1)
+               ->  Bitmap Index Scan on tenk1_unique2
+                     Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
    ->  Bitmap Heap Scan on tenk1 b
          Recheck Cond: ((unique1 = 2) OR (hundred = 4))
          ->  BitmapOr
@@ -4247,37 +4253,29 @@
                      Index Cond: (unique1 = 2)
                ->  Bitmap Index Scan on tenk1_hundred
                      Index Cond: (hundred = 4)
-   ->  Materialize
-         ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique1 = 1) OR (unique2 = ANY
('{3,7}'::integer[])))
-               ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = 1)
-                     ->  Bitmap Index Scan on tenk1_unique2
-                           Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-(17 rows)
+(16 rows)

 explain (costs off)
 select * from tenk1 a join tenk1 b on
   (a.unique1 < 20 or a.unique1 = 3 or a.unique1 = 1 and b.unique1 = 2) or
   ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
- QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Nested Loop
-   Join Filter: ((a.unique1 < 20) OR (a.unique1 = 3) OR ((a.unique1 =
1) AND (b.unique1 = 2)) OR ((a.unique2 = ANY ('{3,7}'::integer[])) AND
(b.hundred = 4)))
+   Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2
= 3) OR (a.unique2 = 7)) AND (a.unique2 = ANY ('{3,7}'::integer[])) AND
(b.hundred = 4)) OR (a.unique1 < 20) OR (a.unique1 = 3))
    ->  Seq Scan on tenk1 b
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique1 < 20) OR (unique1 = 3) OR
(unique1 = 1) OR (unique2 = ANY ('{3,7}'::integer[])))
+               Recheck Cond: ((unique1 = 1) OR (unique2 = ANY
('{3,7}'::integer[])) OR (unique1 < 20) OR (unique1 = 3))
                ->  BitmapOr
                      ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 < 20)
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = 3)
-                     ->  Bitmap Index Scan on tenk1_unique1
                            Index Cond: (unique1 = 1)
                      ->  Bitmap Index Scan on tenk1_unique2
                            Index Cond: (unique2 = ANY
('{3,7}'::integer[]))
+                     ->  Bitmap Index Scan on tenk1_unique1
+                           Index Cond: (unique1 < 20)
+                     ->  Bitmap Index Scan on tenk1_unique1
+                           Index Cond: (unique1 = 3)
 (15 rows)

Thirdly,Ihaveevidencethatthismayaffecttheunderestimationof power.I'll
lookinto thisin detaillater.

diff -U3
/home/alena/postgrespro5/src/test/regress/expected/stats_ext.out
/home/alena/postgrespro5/src/test/regress/results/stats_ext.out
--- /home/alena/postgrespro5/src/test/regress/expected/stats_ext.out
2024-06-28 11:05:44.304135987 +0300
+++ /home/alena/postgrespro5/src/test/regress/results/stats_ext.out
2024-07-11 02:01:06.596007159 +0300
@@ -1156,19 +1156,19 @@
 SELECT * FROM check_estimated_rows('SELECT * FROM
functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
  estimated | actual
 -----------+--------
-         2 |    100
+         1 |    100
 (1 row)

 SELECT * FROM check_estimated_rows('SELECT * FROM
functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b =
''2'')');
  estimated | actual
 -----------+--------
-         4 |    100
+         1 |    100
 (1 row)

 SELECT * FROM check_estimated_rows('SELECT * FROM
functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND
(b = ''1'' OR b = ''2'')');
  estimated | actual
 -----------+--------
-         8 |    200
+         1 |    200
 (1 row)

 -- OR clauses referencing different attributes
@@ -1322,19 +1322,19 @@
 SELECT * FROM check_estimated_rows('SELECT * FROM
functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
  estimated | actual
 -----------+--------
-       100 |    100
+         2 |    100
 (1 row)

 SELECT * FROM check_estimated_rows('SELECT * FROM
functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b =
''2'')');
  estimated | actual
 -----------+--------
-       100 |    100
+         2 |    100
 (1 row)

 SELECT * FROM check_estimated_rows('SELECT * FROM
functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND
(b = ''1'' OR b = ''2'')');
  estimated | actual
 -----------+--------
-       200 |    200
+         8 |    200
 (1 row)

--
Regards,
Alena Rybakina
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company

Attachment Content-Type Size
or.diff.no-cfbot text/plain 39.4 KB

In response to

Browse pgsql-hackers by date

  From Date Subject
Next Message Michael Paquier 2024-07-10 23:34:51 Re: Allow non-superuser to cancel superuser tasks.
Previous Message Nathan Bossart 2024-07-10 22:05:11 Re: improve performance of pg_dump with many sequences