From bd9ee5e164f7a7f9f4489d617383f83cbb669fb1 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Wed, 24 Apr 2024 14:14:44 +1200 Subject: [PATCH v4] Ensure we allocate NAMEDATALEN bytes for names in Index Only Scans --- src/backend/executor/nodeIndexonlyscan.c | 94 +++++++++++++++++-- src/include/catalog/pg_opclass.dat | 7 +- src/include/nodes/execnodes.h | 4 + src/test/regress/expected/index_including.out | 25 +++++ src/test/regress/sql/index_including.sql | 19 ++++ 5 files changed, 140 insertions(+), 9 deletions(-) diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index fcf6d1d932..d82da50a69 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -35,18 +35,20 @@ #include "access/tableam.h" #include "access/tupdesc.h" #include "access/visibilitymap.h" +#include "catalog/pg_opfamily_d.h" #include "executor/executor.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" #include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/predicate.h" +#include "utils/builtins.h" #include "utils/rel.h" static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node); -static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, - TupleDesc itupdesc); +static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot, + IndexTuple itup, TupleDesc itupdesc); /* ---------------------------------------------------------------- @@ -205,7 +207,7 @@ IndexOnlyNext(IndexOnlyScanState *node) ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false); } else if (scandesc->xs_itup) - StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); + StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc); else elog(ERROR, "no data returned for index-only scan"); @@ -263,7 +265,8 @@ IndexOnlyNext(IndexOnlyScanState *node) * right now we don't need it elsewhere. */ static void -StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc) +StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot, + IndexTuple itup, TupleDesc itupdesc) { /* * Note: we must use the tupdesc supplied by the AM in index_deform_tuple, @@ -276,6 +279,37 @@ StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc) ExecClearTuple(slot); index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull); + + /* + * Copy all name columns stored as cstrings back into a NAMEDATALEN byte + * sized allocation. We mark this branch as unlikely as generally "name" + * is used only for the system catalogs and this would have to be a user + * query running on those or some other user table with an index on a name + * column. + */ + if (unlikely(node->ioss_NameCStringAttNums != NULL)) + { + int attcount = node->ioss_NameCStringCount; + + for (int idx = 0; idx < attcount; idx++) + { + int attnum = node->ioss_NameCStringAttNums[idx]; + Name name; + + /* skip null Datums */ + if (slot->tts_isnull[attnum]) + continue; + + /* allocate the NAMEDATALEN and copy the datum into that memory */ + name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory, + NAMEDATALEN); + + /* use namestrcpy to zero-pad all trailing bytes */ + namestrcpy(name, DatumGetCString(slot->tts_values[attnum])); + slot->tts_values[attnum] = NameGetDatum(name); + } + } + ExecStoreVirtualTuple(slot); } @@ -473,8 +507,11 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) { IndexOnlyScanState *indexstate; Relation currentRelation; + Relation indexRelation; LOCKMODE lockmode; TupleDesc tupDesc; + int indnkeyatts; + int namecount; /* * create state structure @@ -547,7 +584,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) /* Open the index relation. */ lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode; - indexstate->ioss_RelationDesc = index_open(node->indexid, lockmode); + indexRelation = index_open(node->indexid, lockmode); + indexstate->ioss_RelationDesc = indexRelation; /* * Initialize index-specific scan state @@ -560,7 +598,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) * build the index scan keys from the index qualification */ ExecIndexBuildScanKeys((PlanState *) indexstate, - indexstate->ioss_RelationDesc, + indexRelation, node->indexqual, false, &indexstate->ioss_ScanKeys, @@ -574,7 +612,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) * any ORDER BY exprs have to be turned into scankeys in the same way */ ExecIndexBuildScanKeys((PlanState *) indexstate, - indexstate->ioss_RelationDesc, + indexRelation, node->indexorderby, true, &indexstate->ioss_OrderByKeys, @@ -603,6 +641,48 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) indexstate->ioss_RuntimeContext = NULL; } + indexstate->ioss_NameCStringAttNums = NULL; + indnkeyatts = indexRelation->rd_index->indnkeyatts; + namecount = 0; + + /* + * The "name" type for btree uses text_ops which results in storing + * cstrings in the indexed keys rather than names. Here we detect that in + * a generic way in case other index AMs want to do the same optimization. + * Check for opclasses with an opcintype of NAMEOID and a index tuple + * descriptor with CSTRINGOID. If any of these are found, create an array + * marking the index attribute number of each of them. StoreIndexTuple() + * handles copying the name Datums into a NAMEDATALEN-byte allocation. + */ + + /* First count the number of such index keys */ + for (int attnum = 0; attnum < indnkeyatts; attnum++) + { + if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID && + indexRelation->rd_opcintype[attnum] == NAMEOID) + namecount++; + } + + if (namecount > 0) + { + int idx = 0; + + /* + * Now create an array to mark the attribute numbers of the keys that + * need to be converted from cstring to name. + */ + indexstate->ioss_NameCStringAttNums = (int *) palloc(sizeof(int) * namecount); + + for (int attnum = 0; attnum < indnkeyatts; attnum++) + { + if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID && + indexRelation->rd_opcintype[attnum] == NAMEOID) + indexstate->ioss_NameCStringAttNums[idx++] = attnum; + } + } + + indexstate->ioss_NameCStringCount = namecount; + /* * all done. */ diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat index 6c30770fe7..f503c652eb 100644 --- a/src/include/catalog/pg_opclass.dat +++ b/src/include/catalog/pg_opclass.dat @@ -91,8 +91,11 @@ # Here's an ugly little hack to save space in the system catalog indexes. # btree doesn't ordinarily allow a storage type different from input type; # but cstring and name are the same thing except for trailing padding, -# and we can safely omit that within an index entry. So we declare the -# btree opclass for name as using cstring storage type. +# so we choose to omit that within an index entry. Here we declare the +# btree opclass for name as using cstring storage type. This does require +# that we pad the cstring out with the full NAMEDATALEN bytes when performing +# index-only scans. See corresponding hacks in ExecInitIndexOnlyScan() and +# StoreIndexTuple(). { opcmethod => 'btree', opcname => 'name_ops', opcfamily => 'btree/text_ops', opcintype => 'name', opckeytype => 'cstring' }, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index d927ac44a8..c7b1c008cb 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1690,6 +1690,8 @@ typedef struct IndexScanState * TableSlot slot for holding tuples fetched from the table * VMBuffer buffer in use for visibility map testing, if any * PscanLen size of parallel index-only scan descriptor + * NameCStringAttNums attnums of name typed columns to pad to NAMEDATALEN + * NameCStringCount number of elements in the NameCStringAttNums array * ---------------- */ typedef struct IndexOnlyScanState @@ -1709,6 +1711,8 @@ typedef struct IndexOnlyScanState TupleTableSlot *ioss_TableSlot; Buffer ioss_VMBuffer; Size ioss_PscanLen; + int *ioss_NameCStringAttNums; + int ioss_NameCStringCount; } IndexOnlyScanState; /* ---------------- diff --git a/src/test/regress/expected/index_including.out b/src/test/regress/expected/index_including.out index 86510687c7..ea8b2454bf 100644 --- a/src/test/regress/expected/index_including.out +++ b/src/test/regress/expected/index_including.out @@ -398,3 +398,28 @@ Indexes: "tbl_c1_c2_c3_c4_key" UNIQUE CONSTRAINT, btree (c1, c2) INCLUDE (c3, c4) DROP TABLE tbl; +/* + * 10. Test coverage for names stored as cstrings in indexes + */ +CREATE TABLE nametbl (c1 int, c2 name, c3 float); +CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3); +INSERT INTO nametbl VALUES(1, 'two', 3.0); +VACUUM nametbl; +SET enable_seqscan = 0; +-- Ensure we get an index only scan plan +EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1; + QUERY PLAN +---------------------------------------------------- + Index Only Scan using nametbl_c1_c2_idx on nametbl + Index Cond: ((c2 = 'two'::name) AND (c1 = 1)) +(2 rows) + +-- Validate the results look sane +SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1; + c2 | c1 | c3 +-----+----+---- + two | 1 | 3 +(1 row) + +RESET enable_seqscan; +DROP TABLE nametbl; diff --git a/src/test/regress/sql/index_including.sql b/src/test/regress/sql/index_including.sql index 44b340053b..ad9cbdd028 100644 --- a/src/test/regress/sql/index_including.sql +++ b/src/test/regress/sql/index_including.sql @@ -217,3 +217,22 @@ ALTER TABLE tbl ALTER c1 TYPE bigint; ALTER TABLE tbl ALTER c3 TYPE bigint; \d tbl DROP TABLE tbl; + +/* + * 10. Test coverage for names stored as cstrings in indexes + */ +CREATE TABLE nametbl (c1 int, c2 name, c3 float); +CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3); +INSERT INTO nametbl VALUES(1, 'two', 3.0); +VACUUM nametbl; +SET enable_seqscan = 0; + +-- Ensure we get an index only scan plan +EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1; + +-- Validate the results look sane +SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1; + +RESET enable_seqscan; + +DROP TABLE nametbl; \ No newline at end of file -- 2.40.1