From 9d261c24072c781dd8bd82b0d51bfb950a4fcb1e Mon Sep 17 00:00:00 2001 From: David Rowley Date: Wed, 24 Apr 2024 14:14:44 +1200 Subject: [PATCH v3] Ensure we allocate NAMEDATALEN bytes for names in Index Only Scans --- src/backend/executor/nodeIndexonlyscan.c | 85 ++++++++++++++++++++++-- src/include/catalog/pg_opclass.dat | 7 +- src/include/nodes/execnodes.h | 4 ++ 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index fcf6d1d932..bec34a62f1 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -35,18 +35,20 @@ #include "access/tableam.h" #include "access/tupdesc.h" #include "access/visibilitymap.h" +#include "catalog/pg_opfamily_d.h" #include "executor/executor.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" #include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/predicate.h" +#include "utils/builtins.h" #include "utils/rel.h" static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node); -static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, - TupleDesc itupdesc); +static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot, + IndexTuple itup, TupleDesc itupdesc); /* ---------------------------------------------------------------- @@ -205,7 +207,7 @@ IndexOnlyNext(IndexOnlyScanState *node) ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false); } else if (scandesc->xs_itup) - StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); + StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc); else elog(ERROR, "no data returned for index-only scan"); @@ -263,7 +265,8 @@ IndexOnlyNext(IndexOnlyScanState *node) * right now we don't need it elsewhere. */ static void -StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc) +StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot, + IndexTuple itup, TupleDesc itupdesc) { /* * Note: we must use the tupdesc supplied by the AM in index_deform_tuple, @@ -276,6 +279,37 @@ StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc) ExecClearTuple(slot); index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull); + + /* + * Because the btree opclass for "name" is defined to store cstrings to + * avoid having to store the full NAMEDATALEN bytes in indexes, here we + * must copy the name Datum into a NAMEDATALEN-byte memory allocation. We + * mark this branch as unlikely as "name", or a derivative thereof, + * shouldn't be commonly used in tables other than the catalog tables and + * we'll only index only scan those from user queries to catalog tables, + * never from internal code performing a catalog lookup. + */ + if (unlikely(node->ioss_NameColumnAttNums != NULL)) + { + for (int idx = 0; idx < node->ioss_NameColumnAttCount; idx++) + { + int attnum = node->ioss_NameColumnAttNums[idx]; + Name name; + + /* skip null Datums */ + if (slot->tts_isnull[attnum]) + continue; + + /* allocate the NAMEDATALEN and copy the datum into that memory */ + name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory, + NAMEDATALEN); + + /* use namestrcpy to zero-pad all trailing bytes */ + namestrcpy(name, DatumGetCString(slot->tts_values[attnum])); + slot->tts_values[attnum] = NameGetDatum(name); + } + } + ExecStoreVirtualTuple(slot); } @@ -473,8 +507,10 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) { IndexOnlyScanState *indexstate; Relation currentRelation; + Relation indexRelation; LOCKMODE lockmode; TupleDesc tupDesc; + int namecount; /* * create state structure @@ -547,7 +583,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) /* Open the index relation. */ lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode; - indexstate->ioss_RelationDesc = index_open(node->indexid, lockmode); + indexRelation = index_open(node->indexid, lockmode); + indexstate->ioss_RelationDesc = indexRelation; /* * Initialize index-specific scan state @@ -560,7 +597,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) * build the index scan keys from the index qualification */ ExecIndexBuildScanKeys((PlanState *) indexstate, - indexstate->ioss_RelationDesc, + indexRelation, node->indexqual, false, &indexstate->ioss_ScanKeys, @@ -574,7 +611,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) * any ORDER BY exprs have to be turned into scankeys in the same way */ ExecIndexBuildScanKeys((PlanState *) indexstate, - indexstate->ioss_RelationDesc, + indexRelation, node->indexorderby, true, &indexstate->ioss_OrderByKeys, @@ -603,6 +640,40 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) indexstate->ioss_RuntimeContext = NULL; } + indexstate->ioss_NameColumnAttNums = NULL; + namecount = 0; + + /* + * For indexes using the name_ops opclass, because that opclass is defined + * to store cstrings rather than NAMEDATALEN padded strings, we must + * detect this case so that we can properly pad name Datums with the full + * NAMEDATALEN bytes. Here we first determine if any such columns exist, + * if so, we create an array marking the index attribute number of each of + * them. StoreIndexTuple() handles copying the name Datums into a + * NAMEDATALEN-byte allocation. + */ + for (int attnum = 0; attnum < tupDesc->natts; attnum++) + { + if (indexRelation->rd_opfamily[attnum] == TEXT_BTREE_FAM_OID && + indexRelation->rd_opcintype[attnum] == NAMEOID) + namecount++; + } + + if (namecount > 0) + { + int idx = 0; + + indexstate->ioss_NameColumnAttNums = (int *) palloc(sizeof(int) * namecount); + + for (int attnum = 0; attnum < tupDesc->natts; attnum++) + { + if (indexRelation->rd_opfamily[attnum] == TEXT_BTREE_FAM_OID && + indexRelation->rd_opcintype[attnum] == NAMEOID) + indexstate->ioss_NameColumnAttNums[idx++] = attnum; + } + } + indexstate->ioss_NameColumnAttCount = namecount; + /* * all done. */ diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat index 6c30770fe7..f503c652eb 100644 --- a/src/include/catalog/pg_opclass.dat +++ b/src/include/catalog/pg_opclass.dat @@ -91,8 +91,11 @@ # Here's an ugly little hack to save space in the system catalog indexes. # btree doesn't ordinarily allow a storage type different from input type; # but cstring and name are the same thing except for trailing padding, -# and we can safely omit that within an index entry. So we declare the -# btree opclass for name as using cstring storage type. +# so we choose to omit that within an index entry. Here we declare the +# btree opclass for name as using cstring storage type. This does require +# that we pad the cstring out with the full NAMEDATALEN bytes when performing +# index-only scans. See corresponding hacks in ExecInitIndexOnlyScan() and +# StoreIndexTuple(). { opcmethod => 'btree', opcname => 'name_ops', opcfamily => 'btree/text_ops', opcintype => 'name', opckeytype => 'cstring' }, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index d927ac44a8..5b98bbed91 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1690,6 +1690,8 @@ typedef struct IndexScanState * TableSlot slot for holding tuples fetched from the table * VMBuffer buffer in use for visibility map testing, if any * PscanLen size of parallel index-only scan descriptor + * NameColumnAttNums attnums of name typed columns to pad to NAMEDATALEN + * NameColumnAttCount number of elements in the NameColumnAttNums array * ---------------- */ typedef struct IndexOnlyScanState @@ -1709,6 +1711,8 @@ typedef struct IndexOnlyScanState TupleTableSlot *ioss_TableSlot; Buffer ioss_VMBuffer; Size ioss_PscanLen; + int *ioss_NameColumnAttNums; + int ioss_NameColumnAttCount; } IndexOnlyScanState; /* ---------------- -- 2.40.1