From c154f024a58f96e8e65db5c621ae34f6d630b84b Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 6 Aug 2024 12:08:35 +0200 Subject: [PATCH] Remove hardcoded hash opclass function signature exceptions hashvalidate(), which validates the signatures of support functions for the hash AM, contained several hardcoded exceptions. For example, hash/date_ops support function 1 was hashint4(), which would ordinarily fail validation because the function argument is int4, not date. But this works internally because int4 and date are of the same size. There are several more exceptions like this that happen to work and were allowed historically but would now fail the function signature validation. This patch removes those exceptions by providing new support functions that have the proper declared signatures. They internally share most of the code with the "wrong" functions they replace, so the behavior is still the same. With the exceptions gone, hashvalidate() is now simplified and relies fully on check_amproc_signature(). TODO: Maybe hashvarlena() and hashvarlenaextended() should be removed from pg_proc.dat, since their use as opclass support functions is dubious. They can continue to exist in the C code as internal support functions. TODO: catversion --- src/backend/access/hash/hashfunc.c | 12 +++ src/backend/access/hash/hashvalidate.c | 129 +++++-------------------- src/backend/utils/adt/bool.c | 13 +++ src/backend/utils/adt/date.c | 12 +++ src/backend/utils/adt/timestamp.c | 12 +++ src/backend/utils/adt/xid.c | 37 +++++++ src/include/catalog/pg_amproc.dat | 30 +++--- src/include/catalog/pg_proc.dat | 42 ++++++++ 8 files changed, 169 insertions(+), 118 deletions(-) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 86f1adecb75..577ca470145 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -406,3 +406,15 @@ hashvarlenaextended(PG_FUNCTION_ARGS) return result; } + +Datum +hashbytea(PG_FUNCTION_ARGS) +{ + return hashvarlena(fcinfo); +} + +Datum +hashbyteaextended(PG_FUNCTION_ARGS) +{ + return hashvarlenaextended(fcinfo); +} diff --git a/src/backend/access/hash/hashvalidate.c b/src/backend/access/hash/hashvalidate.c index 40164e2ea2b..330355d4c5b 100644 --- a/src/backend/access/hash/hashvalidate.c +++ b/src/backend/access/hash/hashvalidate.c @@ -22,19 +22,13 @@ #include "catalog/pg_amproc.h" #include "catalog/pg_opclass.h" #include "catalog/pg_opfamily.h" -#include "catalog/pg_proc.h" #include "catalog/pg_type.h" -#include "parser/parse_coerce.h" #include "utils/builtins.h" -#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/regproc.h" #include "utils/syscache.h" -static bool check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype); - - /* * Validator for a hash opclass. * @@ -90,6 +84,7 @@ hashvalidate(Oid opclassoid) { HeapTuple proctup = &proclist->members[i]->tuple; Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup); + bool ok; /* * All hash functions should be registered with matching left/right @@ -109,30 +104,17 @@ hashvalidate(Oid opclassoid) switch (procform->amprocnum) { case HASHSTANDARD_PROC: + ok = check_amproc_signature(procform->amproc, INT4OID, true, + 1, 1, procform->amproclefttype); + break; case HASHEXTENDED_PROC: - if (!check_hash_func_signature(procform->amproc, procform->amprocnum, - procform->amproclefttype)) - { - ereport(INFO, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d", - opfamilyname, "hash", - format_procedure(procform->amproc), - procform->amprocnum))); - result = false; - } - else - { - /* Remember which types we can hash */ - hashabletypes = - list_append_unique_oid(hashabletypes, - procform->amproclefttype); - } + ok = check_amproc_signature(procform->amproc, INT8OID, true, + 2, 2, procform->amproclefttype, INT8OID); break; case HASHOPTIONS_PROC: if (!check_amoptsproc_signature(procform->amproc)) result = false; - break; + continue; default: ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -141,8 +123,27 @@ hashvalidate(Oid opclassoid) format_procedure(procform->amproc), procform->amprocnum))); result = false; + continue; break; } + + if (!ok) + { + ereport(INFO, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d", + opfamilyname, "hash", + format_procedure(procform->amproc), + procform->amprocnum))); + result = false; + } + else + { + /* Remember which types we can hash */ + hashabletypes = + list_append_unique_oid(hashabletypes, + procform->amproclefttype); + } } /* Check individual operators */ @@ -267,84 +268,6 @@ hashvalidate(Oid opclassoid) } -/* - * We need a custom version of check_amproc_signature because of assorted - * hacks in the core hash opclass definitions. - */ -static bool -check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype) -{ - bool result = true; - Oid restype; - int16 nargs; - HeapTuple tp; - Form_pg_proc procform; - - switch (amprocnum) - { - case HASHSTANDARD_PROC: - restype = INT4OID; - nargs = 1; - break; - - case HASHEXTENDED_PROC: - restype = INT8OID; - nargs = 2; - break; - - default: - elog(ERROR, "invalid amprocnum"); - } - - tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for function %u", funcid); - procform = (Form_pg_proc) GETSTRUCT(tp); - - if (procform->prorettype != restype || procform->proretset || - procform->pronargs != nargs) - result = false; - - if (!IsBinaryCoercible(argtype, procform->proargtypes.values[0])) - { - /* - * Some of the built-in hash opclasses cheat by using hash functions - * that are different from but physically compatible with the opclass - * datatype. In some of these cases, even a "binary coercible" check - * fails because there's no relevant cast. For the moment, fix it by - * having a list of allowed cases. Test the specific function - * identity, not just its input type, because hashvarlena() takes - * INTERNAL and allowing any such function seems too scary. - */ - if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) && - (argtype == DATEOID || - argtype == XIDOID || argtype == CIDOID)) - /* okay, allowed use of hashint4() */ ; - else if ((funcid == F_HASHINT8 || funcid == F_HASHINT8EXTENDED) && - (argtype == XID8OID)) - /* okay, allowed use of hashint8() */ ; - else if ((funcid == F_TIMESTAMP_HASH || - funcid == F_TIMESTAMP_HASH_EXTENDED) && - argtype == TIMESTAMPTZOID) - /* okay, allowed use of timestamp_hash() */ ; - else if ((funcid == F_HASHCHAR || funcid == F_HASHCHAREXTENDED) && - argtype == BOOLOID) - /* okay, allowed use of hashchar() */ ; - else if ((funcid == F_HASHVARLENA || funcid == F_HASHVARLENAEXTENDED) && - argtype == BYTEAOID) - /* okay, allowed use of hashvarlena() */ ; - else - result = false; - } - - /* If function takes a second argument, it must be for a 64-bit salt. */ - if (nargs == 2 && procform->proargtypes.values[1] != INT8OID) - result = false; - - ReleaseSysCache(tp); - return result; -} - /* * Prechecking function for adding operators/functions to a hash opfamily. */ diff --git a/src/backend/utils/adt/bool.c b/src/backend/utils/adt/bool.c index 85e6786563e..a68a112d900 100644 --- a/src/backend/utils/adt/bool.c +++ b/src/backend/utils/adt/bool.c @@ -17,6 +17,7 @@ #include +#include "common/hashfn.h" #include "libpq/pqformat.h" #include "utils/builtins.h" @@ -273,6 +274,18 @@ boolge(PG_FUNCTION_ARGS) PG_RETURN_BOOL(arg1 >= arg2); } +Datum +hashbool(PG_FUNCTION_ARGS) +{ + return hash_uint32((int32) PG_GETARG_BOOL(0)); +} + +Datum +hashboolextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended((int32) PG_GETARG_BOOL(0), PG_GETARG_INT64(1)); +} + /* * boolean-and and boolean-or aggregates. */ diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 9c854e0e5c3..8130f3e8ac0 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -455,6 +455,18 @@ date_sortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +Datum +hashdate(PG_FUNCTION_ARGS) +{ + return hash_uint32(PG_GETARG_DATEADT(0)); +} + +Datum +hashdateextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended(PG_GETARG_DATEADT(0), PG_GETARG_INT64(1)); +} + Datum date_finite(PG_FUNCTION_ARGS) { diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 69fe7860ede..3ed817e2efd 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -2318,6 +2318,18 @@ timestamp_hash_extended(PG_FUNCTION_ARGS) return hashint8extended(fcinfo); } +Datum +timestamptz_hash(PG_FUNCTION_ARGS) +{ + return hashint8(fcinfo); +} + +Datum +timestamptz_hash_extended(PG_FUNCTION_ARGS) +{ + return hashint8extended(fcinfo); +} + /* * Cross-type comparison functions for timestamp vs timestamptz */ diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c index ae273b19610..2f34a5dc1f0 100644 --- a/src/backend/utils/adt/xid.c +++ b/src/backend/utils/adt/xid.c @@ -19,6 +19,7 @@ #include "access/multixact.h" #include "access/transam.h" #include "access/xact.h" +#include "common/hashfn.h" #include "common/int.h" #include "libpq/pqformat.h" #include "utils/builtins.h" @@ -97,6 +98,18 @@ xidneq(PG_FUNCTION_ARGS) PG_RETURN_BOOL(!TransactionIdEquals(xid1, xid2)); } +Datum +hashxid(PG_FUNCTION_ARGS) +{ + return hash_uint32(PG_GETARG_TRANSACTIONID(0)); +} + +Datum +hashxidextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended(PG_GETARG_TRANSACTIONID(0), PG_GETARG_INT64(1)); +} + /* * xid_age - compute age of an XID (relative to latest stable xid) */ @@ -287,6 +300,18 @@ xid8cmp(PG_FUNCTION_ARGS) PG_RETURN_INT32(-1); } +Datum +hashxid8(PG_FUNCTION_ARGS) +{ + return hashint8(fcinfo); +} + +Datum +hashxid8extended(PG_FUNCTION_ARGS) +{ + return hashint8extended(fcinfo); +} + Datum xid8_larger(PG_FUNCTION_ARGS) { @@ -374,3 +399,15 @@ cideq(PG_FUNCTION_ARGS) PG_RETURN_BOOL(arg1 == arg2); } + +Datum +hashcid(PG_FUNCTION_ARGS) +{ + return hash_uint32(PG_GETARG_COMMANDID(0)); +} + +Datum +hashcidextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended(PG_GETARG_COMMANDID(0), PG_GETARG_INT64(1)); +} diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index f639c3a6a51..242bdc0acc1 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -304,9 +304,9 @@ { amprocfamily => 'hash/char_ops', amproclefttype => 'char', amprocrighttype => 'char', amprocnum => '2', amproc => 'hashcharextended' }, { amprocfamily => 'hash/date_ops', amproclefttype => 'date', - amprocrighttype => 'date', amprocnum => '1', amproc => 'hashint4' }, + amprocrighttype => 'date', amprocnum => '1', amproc => 'hashdate' }, { amprocfamily => 'hash/date_ops', amproclefttype => 'date', - amprocrighttype => 'date', amprocnum => '2', amproc => 'hashint4extended' }, + amprocrighttype => 'date', amprocnum => '2', amproc => 'hashdateextended' }, { amprocfamily => 'hash/array_ops', amproclefttype => 'anyarray', amprocrighttype => 'anyarray', amprocnum => '1', amproc => 'hash_array' }, { amprocfamily => 'hash/array_ops', amproclefttype => 'anyarray', @@ -376,10 +376,10 @@ amproc => 'hash_numeric_extended' }, { amprocfamily => 'hash/timestamptz_ops', amproclefttype => 'timestamptz', amprocrighttype => 'timestamptz', amprocnum => '1', - amproc => 'timestamp_hash' }, + amproc => 'timestamptz_hash' }, { amprocfamily => 'hash/timestamptz_ops', amproclefttype => 'timestamptz', amprocrighttype => 'timestamptz', amprocnum => '2', - amproc => 'timestamp_hash_extended' }, + amproc => 'timestamptz_hash_extended' }, { amprocfamily => 'hash/timetz_ops', amproclefttype => 'timetz', amprocrighttype => 'timetz', amprocnum => '1', amproc => 'timetz_hash' }, { amprocfamily => 'hash/timetz_ops', amproclefttype => 'timetz', @@ -392,26 +392,26 @@ amprocrighttype => 'timestamp', amprocnum => '2', amproc => 'timestamp_hash_extended' }, { amprocfamily => 'hash/bool_ops', amproclefttype => 'bool', - amprocrighttype => 'bool', amprocnum => '1', amproc => 'hashchar' }, + amprocrighttype => 'bool', amprocnum => '1', amproc => 'hashbool' }, { amprocfamily => 'hash/bool_ops', amproclefttype => 'bool', - amprocrighttype => 'bool', amprocnum => '2', amproc => 'hashcharextended' }, + amprocrighttype => 'bool', amprocnum => '2', amproc => 'hashboolextended' }, { amprocfamily => 'hash/bytea_ops', amproclefttype => 'bytea', - amprocrighttype => 'bytea', amprocnum => '1', amproc => 'hashvarlena' }, + amprocrighttype => 'bytea', amprocnum => '1', amproc => 'hashbytea' }, { amprocfamily => 'hash/bytea_ops', amproclefttype => 'bytea', amprocrighttype => 'bytea', amprocnum => '2', - amproc => 'hashvarlenaextended' }, + amproc => 'hashbyteaextended' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint4' }, + amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashxid' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint4extended' }, + amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashxidextended' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', - amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashint8' }, + amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashxid8' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', - amprocrighttype => 'xid8', amprocnum => '2', amproc => 'hashint8extended' }, + amprocrighttype => 'xid8', amprocnum => '2', amproc => 'hashxid8extended' }, { amprocfamily => 'hash/cid_ops', amproclefttype => 'cid', - amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashint4' }, + amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashcid' }, { amprocfamily => 'hash/cid_ops', amproclefttype => 'cid', - amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashint4extended' }, + amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashcidextended' }, { amprocfamily => 'hash/tid_ops', amproclefttype => 'tid', amprocrighttype => 'tid', amprocnum => '1', amproc => 'hashtid' }, { amprocfamily => 'hash/tid_ops', amproclefttype => 'tid', @@ -819,7 +819,7 @@ amprocrighttype => 'bytea', amprocnum => '5', amproc => 'brin_bloom_options' }, { amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', - amprocrighttype => 'bytea', amprocnum => '11', amproc => 'hashvarlena' }, + amprocrighttype => 'bytea', amprocnum => '11', amproc => 'hashbytea' }, # minmax "char" { amprocfamily => 'brin/char_minmax_ops', amproclefttype => 'char', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d36f6001bb1..370a2e91994 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1225,6 +1225,12 @@ { oid => '772', descr => 'hash', proname => 'hashvarlenaextended', prorettype => 'int8', proargtypes => 'internal int8', prosrc => 'hashvarlenaextended' }, +{ oid => '562', descr => 'hash', + proname => 'hashbytea', prorettype => 'int4', proargtypes => 'bytea', + prosrc => 'hashbytea' }, +{ oid => '563', descr => 'hash', + proname => 'hashbyteaextended', prorettype => 'int8', + proargtypes => 'bytea int8', prosrc => 'hashbyteaextended' }, { oid => '457', descr => 'hash', proname => 'hashoidvector', prorettype => 'int4', proargtypes => 'oidvector', prosrc => 'hashoidvector' }, @@ -1261,6 +1267,36 @@ { oid => '781', descr => 'hash', proname => 'hashmacaddr8extended', prorettype => 'int8', proargtypes => 'macaddr8 int8', prosrc => 'hashmacaddr8extended' }, +{ oid => '786', descr => 'hash', + proname => 'hashdate', prorettype => 'int4', proargtypes => 'date', + prosrc => 'hashdate' }, +{ oid => '787', descr => 'hash', + proname => 'hashdateextended', prorettype => 'int8', + proargtypes => 'date int8', prosrc => 'hashdateextended' }, +{ oid => '788', descr => 'hash', + proname => 'hashbool', prorettype => 'int4', proargtypes => 'bool', + prosrc => 'hashbool' }, +{ oid => '789', descr => 'hash', + proname => 'hashboolextended', prorettype => 'int8', + proargtypes => 'bool int8', prosrc => 'hashboolextended' }, +{ oid => '812', descr => 'hash', + proname => 'hashxid', prorettype => 'int4', proargtypes => 'xid', + prosrc => 'hashxid' }, +{ oid => '813', descr => 'hash', + proname => 'hashxidextended', prorettype => 'int8', + proargtypes => 'xid int8', prosrc => 'hashxidextended' }, +{ oid => '560', descr => 'hash', + proname => 'hashxid8', prorettype => 'int4', proargtypes => 'xid8', + prosrc => 'hashxid8' }, +{ oid => '561', descr => 'hash', + proname => 'hashxid8extended', prorettype => 'int8', + proargtypes => 'xid8 int8', prosrc => 'hashxid8extended' }, +{ oid => '814', descr => 'hash', + proname => 'hashcid', prorettype => 'int4', proargtypes => 'cid', + prosrc => 'hashcid' }, +{ oid => '815', descr => 'hash', + proname => 'hashcidextended', prorettype => 'int8', + proargtypes => 'cid int8', prosrc => 'hashcidextended' }, { oid => '438', descr => 'count the number of NULL arguments', proname => 'num_nulls', provariadic => 'any', proisstrict => 'f', @@ -6180,6 +6216,12 @@ { oid => '3411', descr => 'hash', proname => 'timestamp_hash_extended', prorettype => 'int8', proargtypes => 'timestamp int8', prosrc => 'timestamp_hash_extended' }, +{ oid => '2173', descr => 'hash', + proname => 'timestamptz_hash', prorettype => 'int4', proargtypes => 'timestamptz', + prosrc => 'timestamptz_hash' }, +{ oid => '2775', descr => 'hash', + proname => 'timestamptz_hash_extended', prorettype => 'int8', + proargtypes => 'timestamptz int8', prosrc => 'timestamptz_hash_extended' }, { oid => '2041', descr => 'intervals overlap?', proname => 'overlaps', proisstrict => 'f', prorettype => 'bool', proargtypes => 'timestamp timestamp timestamp timestamp', -- 2.46.0