From 901f381aa2618c42b3092c2d3d7de061169eaf46 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Wed, 20 Nov 2024 16:30:11 -0600
Subject: [PATCH v3] attempt multibyte-aware truncation of database names

---
 src/backend/utils/init/postinit.c | 82 +++++++++++++++++++++++++++++--
 1 file changed, 79 insertions(+), 3 deletions(-)
 100.0% src/backend/utils/init/

diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 5b657a3f13..ef6fe705c2 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -1003,14 +1003,90 @@ InitPostgres(const char *in_dbname, Oid dboid,
 	{
 		HeapTuple	tuple;
 		Form_pg_database dbform;
+		char		trunc_dbname[NAMEDATALEN];
+		int			curr_len = NAMEDATALEN - 1;
 
-		tuple = GetDatabaseTuple(in_dbname);
+		/* truncate to NAMEDATALEN-1 bytes first */
+		strncpy(trunc_dbname, in_dbname, curr_len);
+		trunc_dbname[curr_len] = '\0';
+
+		tuple = GetDatabaseTuple(trunc_dbname);
+
+		if (HeapTupleIsValid(tuple))
+			strcpy(dbname, trunc_dbname);
+
+		/*
+		 * Try shorter lengths to handle potential multibyte character splits.
+		 * We only need to check back to NAMEDATALEN - MAX_MULTIBYTE_CHAR_LEN
+		 * since no valid multibyte character can be longer than that.
+		 */
+		while (curr_len > NAMEDATALEN - MAX_MULTIBYTE_CHAR_LEN)
+		{
+			/*
+			 * If we've hit an ASCII byte (high bit clear), we know we can't
+			 * be in the middle of a multibyte character, because all bytes of
+			 * a multibyte character must have their high bits set.
+			 */
+			if (!IS_HIGHBIT_SET(trunc_dbname[curr_len - 1]))
+				break;
+
+			/*
+			 * Check if we might be splitting a multibyte char by looking at
+			 * the high bits of the current position and the previous byte in
+			 * the original string.
+			 *
+			 * Note: We must check against in_dbname when looking at curr_len
+			 * because that byte has been truncated from trunc_dbname.
+			 */
+			if (curr_len < strlen(in_dbname) &&
+				IS_HIGHBIT_SET(in_dbname[curr_len]))
+			{
+				HeapTuple	tmp;
+
+				/*
+				 * Might be splitting a multibyte char, try one byte shorter
+				 * to find a possible character boundary.
+				 */
+				curr_len--;
+				trunc_dbname[curr_len] = '\0';
+
+				tmp = GetDatabaseTuple(trunc_dbname);
+
+				/*
+				 * If we already had a match then the name is ambiguous and we
+				 * must fail.
+				 */
+				if (HeapTupleIsValid(tmp))
+				{
+					if (HeapTupleIsValid(tuple))
+						ereport(FATAL,
+								(errmsg("ambiguous database name")));
+					tuple = tmp;
+					strcpy(dbname, trunc_dbname);
+				}
+			}
+			else
+				/* not in the middle of a multibyte char */
+				break;
+		}
+
+		/* If we didn't find any valid database name, report an error */
 		if (!HeapTupleIsValid(tuple))
 			ereport(FATAL,
 					(errcode(ERRCODE_UNDEFINED_DATABASE),
 					 errmsg("database \"%s\" does not exist", in_dbname)));
+
 		dbform = (Form_pg_database) GETSTRUCT(tuple);
 		dboid = dbform->oid;
+
+		if (MyProcPort && MyProcPort->database_name &&
+			strcmp(MyProcPort->database_name, dbname) != 0)
+		{
+			pfree(MyProcPort->database_name);
+			MyProcPort->database_name = pstrdup(dbname);
+
+			/* XXX: should we fix process title? */
+		}
 	}
 	else if (!OidIsValid(dboid))
 	{
@@ -1067,12 +1143,12 @@ InitPostgres(const char *in_dbname, Oid dboid,
 			datform = (Form_pg_database) GETSTRUCT(tuple);
 
 		if (!HeapTupleIsValid(tuple) ||
-			(in_dbname && namestrcmp(&datform->datname, in_dbname)))
+			(in_dbname && namestrcmp(&datform->datname, dbname)))
 		{
 			if (in_dbname)
 				ereport(FATAL,
 						(errcode(ERRCODE_UNDEFINED_DATABASE),
-						 errmsg("database \"%s\" does not exist", in_dbname),
+						 errmsg("database \"%s\" does not exist", dbname),
 						 errdetail("It seems to have just been dropped or renamed.")));
 			else
 				ereport(FATAL,
-- 
2.34.1

