From 423a2ea6688b2e71b149c6cf2cbef9eb5b280f0e Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 9 Sep 2024 14:22:00 +0200 Subject: [PATCH v2] Remove hardcoded hash opclass function signature exceptions hashvalidate(), which validates the signatures of support functions for the hash AM, contained several hardcoded exceptions. For example, hash/date_ops support function 1 was hashint4(), which would ordinarily fail validation because the function argument is int4, not date. But this works internally because int4 and date are of the same size. There are several more exceptions like this that happen to work and were allowed historically but would now fail the function signature validation. This patch removes those exceptions by providing new support functions that have the proper declared signatures. They internally share most of the code with the "wrong" functions they replace, so the behavior is still the same. With the exceptions gone, hashvalidate() is now simplified and relies fully on check_amproc_signature(). hashvarlena() and hashvarlenaextended() are removed from pg_proc.dat. Their use as opclass support functions is dubious, and since they have an argument of type internal, they can't be called directly. The underlying C functions continue to exist in the C code as internal support functions. (Some extensions define SQL-level hash functions that use them as the C implementation.) Discussion: https://www.postgresql.org/message-id/flat/29c3b746-69e7-482a-b37c-dbbf7e5b009b@eisentraut.org TODO: catversion --- src/backend/access/hash/hashfunc.c | 21 ++++ src/backend/access/hash/hashvalidate.c | 130 +++++-------------------- src/backend/utils/adt/bool.c | 13 +++ src/backend/utils/adt/date.c | 12 +++ src/backend/utils/adt/timestamp.c | 12 +++ src/backend/utils/adt/xid.c | 37 +++++++ src/include/catalog/pg_amproc.dat | 30 +++--- src/include/catalog/pg_proc.dat | 48 +++++++-- 8 files changed, 177 insertions(+), 126 deletions(-) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 86f1adecb75..dd44e4330af 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -376,7 +376,16 @@ hashtextextended(PG_FUNCTION_ARGS) /* * hashvarlena() can be used for any varlena datatype in which there are * no non-significant bits, ie, distinct bitpatterns never compare as equal. + * + * (However, you need to define an SQL-level wrapper function around it with + * the concrete input data type; otherwise hashvalidate() won't accept it. + * Moreover, at least for built-in types, a C-level wrapper function is also + * recommended; otherwise, the opr_sanity test will get upset.) */ + +extern Datum hashvarlena(PG_FUNCTION_ARGS); +extern Datum hashvarlenaextended(PG_FUNCTION_ARGS); + Datum hashvarlena(PG_FUNCTION_ARGS) { @@ -406,3 +415,15 @@ hashvarlenaextended(PG_FUNCTION_ARGS) return result; } + +Datum +hashbytea(PG_FUNCTION_ARGS) +{ + return hashvarlena(fcinfo); +} + +Datum +hashbyteaextended(PG_FUNCTION_ARGS) +{ + return hashvarlenaextended(fcinfo); +} diff --git a/src/backend/access/hash/hashvalidate.c b/src/backend/access/hash/hashvalidate.c index 40164e2ea2b..66a3b7b49ad 100644 --- a/src/backend/access/hash/hashvalidate.c +++ b/src/backend/access/hash/hashvalidate.c @@ -22,19 +22,13 @@ #include "catalog/pg_amproc.h" #include "catalog/pg_opclass.h" #include "catalog/pg_opfamily.h" -#include "catalog/pg_proc.h" #include "catalog/pg_type.h" -#include "parser/parse_coerce.h" #include "utils/builtins.h" -#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/regproc.h" #include "utils/syscache.h" -static bool check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype); - - /* * Validator for a hash opclass. * @@ -90,6 +84,7 @@ hashvalidate(Oid opclassoid) { HeapTuple proctup = &proclist->members[i]->tuple; Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup); + bool ok; /* * All hash functions should be registered with matching left/right @@ -109,29 +104,15 @@ hashvalidate(Oid opclassoid) switch (procform->amprocnum) { case HASHSTANDARD_PROC: + ok = check_amproc_signature(procform->amproc, INT4OID, true, + 1, 1, procform->amproclefttype); + break; case HASHEXTENDED_PROC: - if (!check_hash_func_signature(procform->amproc, procform->amprocnum, - procform->amproclefttype)) - { - ereport(INFO, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d", - opfamilyname, "hash", - format_procedure(procform->amproc), - procform->amprocnum))); - result = false; - } - else - { - /* Remember which types we can hash */ - hashabletypes = - list_append_unique_oid(hashabletypes, - procform->amproclefttype); - } + ok = check_amproc_signature(procform->amproc, INT8OID, true, + 2, 2, procform->amproclefttype, INT8OID); break; case HASHOPTIONS_PROC: - if (!check_amoptsproc_signature(procform->amproc)) - result = false; + ok = check_amoptsproc_signature(procform->amproc); break; default: ereport(INFO, @@ -141,7 +122,24 @@ hashvalidate(Oid opclassoid) format_procedure(procform->amproc), procform->amprocnum))); result = false; - break; + continue; /* don't want additional message */ + } + + if (!ok) + { + ereport(INFO, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d", + opfamilyname, "hash", + format_procedure(procform->amproc), + procform->amprocnum))); + result = false; + } + + /* Remember which types we can hash */ + if (ok && (procform->amprocnum == HASHSTANDARD_PROC || procform->amprocnum == HASHEXTENDED_PROC)) + { + hashabletypes = list_append_unique_oid(hashabletypes, procform->amproclefttype); } } @@ -267,84 +265,6 @@ hashvalidate(Oid opclassoid) } -/* - * We need a custom version of check_amproc_signature because of assorted - * hacks in the core hash opclass definitions. - */ -static bool -check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype) -{ - bool result = true; - Oid restype; - int16 nargs; - HeapTuple tp; - Form_pg_proc procform; - - switch (amprocnum) - { - case HASHSTANDARD_PROC: - restype = INT4OID; - nargs = 1; - break; - - case HASHEXTENDED_PROC: - restype = INT8OID; - nargs = 2; - break; - - default: - elog(ERROR, "invalid amprocnum"); - } - - tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for function %u", funcid); - procform = (Form_pg_proc) GETSTRUCT(tp); - - if (procform->prorettype != restype || procform->proretset || - procform->pronargs != nargs) - result = false; - - if (!IsBinaryCoercible(argtype, procform->proargtypes.values[0])) - { - /* - * Some of the built-in hash opclasses cheat by using hash functions - * that are different from but physically compatible with the opclass - * datatype. In some of these cases, even a "binary coercible" check - * fails because there's no relevant cast. For the moment, fix it by - * having a list of allowed cases. Test the specific function - * identity, not just its input type, because hashvarlena() takes - * INTERNAL and allowing any such function seems too scary. - */ - if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) && - (argtype == DATEOID || - argtype == XIDOID || argtype == CIDOID)) - /* okay, allowed use of hashint4() */ ; - else if ((funcid == F_HASHINT8 || funcid == F_HASHINT8EXTENDED) && - (argtype == XID8OID)) - /* okay, allowed use of hashint8() */ ; - else if ((funcid == F_TIMESTAMP_HASH || - funcid == F_TIMESTAMP_HASH_EXTENDED) && - argtype == TIMESTAMPTZOID) - /* okay, allowed use of timestamp_hash() */ ; - else if ((funcid == F_HASHCHAR || funcid == F_HASHCHAREXTENDED) && - argtype == BOOLOID) - /* okay, allowed use of hashchar() */ ; - else if ((funcid == F_HASHVARLENA || funcid == F_HASHVARLENAEXTENDED) && - argtype == BYTEAOID) - /* okay, allowed use of hashvarlena() */ ; - else - result = false; - } - - /* If function takes a second argument, it must be for a 64-bit salt. */ - if (nargs == 2 && procform->proargtypes.values[1] != INT8OID) - result = false; - - ReleaseSysCache(tp); - return result; -} - /* * Prechecking function for adding operators/functions to a hash opfamily. */ diff --git a/src/backend/utils/adt/bool.c b/src/backend/utils/adt/bool.c index 85e6786563e..a68a112d900 100644 --- a/src/backend/utils/adt/bool.c +++ b/src/backend/utils/adt/bool.c @@ -17,6 +17,7 @@ #include +#include "common/hashfn.h" #include "libpq/pqformat.h" #include "utils/builtins.h" @@ -273,6 +274,18 @@ boolge(PG_FUNCTION_ARGS) PG_RETURN_BOOL(arg1 >= arg2); } +Datum +hashbool(PG_FUNCTION_ARGS) +{ + return hash_uint32((int32) PG_GETARG_BOOL(0)); +} + +Datum +hashboolextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended((int32) PG_GETARG_BOOL(0), PG_GETARG_INT64(1)); +} + /* * boolean-and and boolean-or aggregates. */ diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 9c854e0e5c3..8130f3e8ac0 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -455,6 +455,18 @@ date_sortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +Datum +hashdate(PG_FUNCTION_ARGS) +{ + return hash_uint32(PG_GETARG_DATEADT(0)); +} + +Datum +hashdateextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended(PG_GETARG_DATEADT(0), PG_GETARG_INT64(1)); +} + Datum date_finite(PG_FUNCTION_ARGS) { diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index db9eea90982..e00cd3c9690 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -2298,6 +2298,18 @@ timestamp_hash_extended(PG_FUNCTION_ARGS) return hashint8extended(fcinfo); } +Datum +timestamptz_hash(PG_FUNCTION_ARGS) +{ + return hashint8(fcinfo); +} + +Datum +timestamptz_hash_extended(PG_FUNCTION_ARGS) +{ + return hashint8extended(fcinfo); +} + /* * Cross-type comparison functions for timestamp vs timestamptz */ diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c index ae273b19610..2f34a5dc1f0 100644 --- a/src/backend/utils/adt/xid.c +++ b/src/backend/utils/adt/xid.c @@ -19,6 +19,7 @@ #include "access/multixact.h" #include "access/transam.h" #include "access/xact.h" +#include "common/hashfn.h" #include "common/int.h" #include "libpq/pqformat.h" #include "utils/builtins.h" @@ -97,6 +98,18 @@ xidneq(PG_FUNCTION_ARGS) PG_RETURN_BOOL(!TransactionIdEquals(xid1, xid2)); } +Datum +hashxid(PG_FUNCTION_ARGS) +{ + return hash_uint32(PG_GETARG_TRANSACTIONID(0)); +} + +Datum +hashxidextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended(PG_GETARG_TRANSACTIONID(0), PG_GETARG_INT64(1)); +} + /* * xid_age - compute age of an XID (relative to latest stable xid) */ @@ -287,6 +300,18 @@ xid8cmp(PG_FUNCTION_ARGS) PG_RETURN_INT32(-1); } +Datum +hashxid8(PG_FUNCTION_ARGS) +{ + return hashint8(fcinfo); +} + +Datum +hashxid8extended(PG_FUNCTION_ARGS) +{ + return hashint8extended(fcinfo); +} + Datum xid8_larger(PG_FUNCTION_ARGS) { @@ -374,3 +399,15 @@ cideq(PG_FUNCTION_ARGS) PG_RETURN_BOOL(arg1 == arg2); } + +Datum +hashcid(PG_FUNCTION_ARGS) +{ + return hash_uint32(PG_GETARG_COMMANDID(0)); +} + +Datum +hashcidextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended(PG_GETARG_COMMANDID(0), PG_GETARG_INT64(1)); +} diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index f639c3a6a51..242bdc0acc1 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -304,9 +304,9 @@ { amprocfamily => 'hash/char_ops', amproclefttype => 'char', amprocrighttype => 'char', amprocnum => '2', amproc => 'hashcharextended' }, { amprocfamily => 'hash/date_ops', amproclefttype => 'date', - amprocrighttype => 'date', amprocnum => '1', amproc => 'hashint4' }, + amprocrighttype => 'date', amprocnum => '1', amproc => 'hashdate' }, { amprocfamily => 'hash/date_ops', amproclefttype => 'date', - amprocrighttype => 'date', amprocnum => '2', amproc => 'hashint4extended' }, + amprocrighttype => 'date', amprocnum => '2', amproc => 'hashdateextended' }, { amprocfamily => 'hash/array_ops', amproclefttype => 'anyarray', amprocrighttype => 'anyarray', amprocnum => '1', amproc => 'hash_array' }, { amprocfamily => 'hash/array_ops', amproclefttype => 'anyarray', @@ -376,10 +376,10 @@ amproc => 'hash_numeric_extended' }, { amprocfamily => 'hash/timestamptz_ops', amproclefttype => 'timestamptz', amprocrighttype => 'timestamptz', amprocnum => '1', - amproc => 'timestamp_hash' }, + amproc => 'timestamptz_hash' }, { amprocfamily => 'hash/timestamptz_ops', amproclefttype => 'timestamptz', amprocrighttype => 'timestamptz', amprocnum => '2', - amproc => 'timestamp_hash_extended' }, + amproc => 'timestamptz_hash_extended' }, { amprocfamily => 'hash/timetz_ops', amproclefttype => 'timetz', amprocrighttype => 'timetz', amprocnum => '1', amproc => 'timetz_hash' }, { amprocfamily => 'hash/timetz_ops', amproclefttype => 'timetz', @@ -392,26 +392,26 @@ amprocrighttype => 'timestamp', amprocnum => '2', amproc => 'timestamp_hash_extended' }, { amprocfamily => 'hash/bool_ops', amproclefttype => 'bool', - amprocrighttype => 'bool', amprocnum => '1', amproc => 'hashchar' }, + amprocrighttype => 'bool', amprocnum => '1', amproc => 'hashbool' }, { amprocfamily => 'hash/bool_ops', amproclefttype => 'bool', - amprocrighttype => 'bool', amprocnum => '2', amproc => 'hashcharextended' }, + amprocrighttype => 'bool', amprocnum => '2', amproc => 'hashboolextended' }, { amprocfamily => 'hash/bytea_ops', amproclefttype => 'bytea', - amprocrighttype => 'bytea', amprocnum => '1', amproc => 'hashvarlena' }, + amprocrighttype => 'bytea', amprocnum => '1', amproc => 'hashbytea' }, { amprocfamily => 'hash/bytea_ops', amproclefttype => 'bytea', amprocrighttype => 'bytea', amprocnum => '2', - amproc => 'hashvarlenaextended' }, + amproc => 'hashbyteaextended' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint4' }, + amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashxid' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint4extended' }, + amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashxidextended' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', - amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashint8' }, + amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashxid8' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', - amprocrighttype => 'xid8', amprocnum => '2', amproc => 'hashint8extended' }, + amprocrighttype => 'xid8', amprocnum => '2', amproc => 'hashxid8extended' }, { amprocfamily => 'hash/cid_ops', amproclefttype => 'cid', - amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashint4' }, + amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashcid' }, { amprocfamily => 'hash/cid_ops', amproclefttype => 'cid', - amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashint4extended' }, + amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashcidextended' }, { amprocfamily => 'hash/tid_ops', amproclefttype => 'tid', amprocrighttype => 'tid', amprocnum => '1', amproc => 'hashtid' }, { amprocfamily => 'hash/tid_ops', amproclefttype => 'tid', @@ -819,7 +819,7 @@ amprocrighttype => 'bytea', amprocnum => '5', amproc => 'brin_bloom_options' }, { amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', - amprocrighttype => 'bytea', amprocnum => '11', amproc => 'hashvarlena' }, + amprocrighttype => 'bytea', amprocnum => '11', amproc => 'hashbytea' }, # minmax "char" { amprocfamily => 'brin/char_minmax_ops', amproclefttype => 'char', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index ff5436acacf..ccf2ddb7c7b 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1219,12 +1219,12 @@ { oid => '448', descr => 'hash', proname => 'hashtextextended', prorettype => 'int8', proargtypes => 'text int8', prosrc => 'hashtextextended' }, -{ oid => '456', descr => 'hash', - proname => 'hashvarlena', prorettype => 'int4', proargtypes => 'internal', - prosrc => 'hashvarlena' }, -{ oid => '772', descr => 'hash', - proname => 'hashvarlenaextended', prorettype => 'int8', - proargtypes => 'internal int8', prosrc => 'hashvarlenaextended' }, +{ oid => '9708', descr => 'hash', + proname => 'hashbytea', prorettype => 'int4', proargtypes => 'bytea', + prosrc => 'hashbytea' }, +{ oid => '9709', descr => 'hash', + proname => 'hashbyteaextended', prorettype => 'int8', + proargtypes => 'bytea int8', prosrc => 'hashbyteaextended' }, { oid => '457', descr => 'hash', proname => 'hashoidvector', prorettype => 'int4', proargtypes => 'oidvector', prosrc => 'hashoidvector' }, @@ -1261,6 +1261,36 @@ { oid => '781', descr => 'hash', proname => 'hashmacaddr8extended', prorettype => 'int8', proargtypes => 'macaddr8 int8', prosrc => 'hashmacaddr8extended' }, +{ oid => '9710', descr => 'hash', + proname => 'hashdate', prorettype => 'int4', proargtypes => 'date', + prosrc => 'hashdate' }, +{ oid => '9711', descr => 'hash', + proname => 'hashdateextended', prorettype => 'int8', + proargtypes => 'date int8', prosrc => 'hashdateextended' }, +{ oid => '9712', descr => 'hash', + proname => 'hashbool', prorettype => 'int4', proargtypes => 'bool', + prosrc => 'hashbool' }, +{ oid => '9713', descr => 'hash', + proname => 'hashboolextended', prorettype => 'int8', + proargtypes => 'bool int8', prosrc => 'hashboolextended' }, +{ oid => '9714', descr => 'hash', + proname => 'hashxid', prorettype => 'int4', proargtypes => 'xid', + prosrc => 'hashxid' }, +{ oid => '9715', descr => 'hash', + proname => 'hashxidextended', prorettype => 'int8', + proargtypes => 'xid int8', prosrc => 'hashxidextended' }, +{ oid => '9716', descr => 'hash', + proname => 'hashxid8', prorettype => 'int4', proargtypes => 'xid8', + prosrc => 'hashxid8' }, +{ oid => '9717', descr => 'hash', + proname => 'hashxid8extended', prorettype => 'int8', + proargtypes => 'xid8 int8', prosrc => 'hashxid8extended' }, +{ oid => '9718', descr => 'hash', + proname => 'hashcid', prorettype => 'int4', proargtypes => 'cid', + prosrc => 'hashcid' }, +{ oid => '9719', descr => 'hash', + proname => 'hashcidextended', prorettype => 'int8', + proargtypes => 'cid int8', prosrc => 'hashcidextended' }, { oid => '438', descr => 'count the number of NULL arguments', proname => 'num_nulls', provariadic => 'any', proisstrict => 'f', @@ -6180,6 +6210,12 @@ { oid => '3411', descr => 'hash', proname => 'timestamp_hash_extended', prorettype => 'int8', proargtypes => 'timestamp int8', prosrc => 'timestamp_hash_extended' }, +{ oid => '9720', descr => 'hash', + proname => 'timestamptz_hash', prorettype => 'int4', proargtypes => 'timestamptz', + prosrc => 'timestamptz_hash' }, +{ oid => '9721', descr => 'hash', + proname => 'timestamptz_hash_extended', prorettype => 'int8', + proargtypes => 'timestamptz int8', prosrc => 'timestamptz_hash_extended' }, { oid => '2041', descr => 'intervals overlap?', proname => 'overlaps', proisstrict => 'f', prorettype => 'bool', proargtypes => 'timestamp timestamp timestamp timestamp', -- 2.46.0