From 0e853e3074afd029c665d14cab750cacdba149b9 Mon Sep 17 00:00:00 2001
From: Antonin Houska <ah@cybertec.at>
Date: Fri, 5 Jul 2019 16:24:01 +0200
Subject: [PATCH 06/17] Allow user to use password instead of encryption key.

pg_keytool reads the password from stdin, uses "key derivation function (KDF)
parameters" stored in the data directory to derive the key and writes the key
to standard output.

The cluster can be created this way

	initdb -D data -K "echo securepwd | pg_keytool -D data  -w"

and started either this way

	pg_ctl -D data -K "echo securepwd | pg_keytool -D data  -w" start

or this way

	postgres -D data

and (in another session)

	echo securepwd | pg_keytool -D data -ws

Both initdb and pg_ctl can substitute the data directory for %D pattern in the
key encryption command. Thus user uses pg_keytool to derive the password, he
can in fact say

	initdb -D data -K "echo securepwd | pg_keytool -D %D  -w"

and

	pg_ctl -D data -K "echo securepwd | pg_keytool -D %D  -w" start

respectively.
---
 src/bin/initdb/initdb.c           |  26 +++-
 src/bin/pg_ctl/pg_ctl.c           |   2 +-
 src/bin/pg_keytool/pg_keytool.c   |  87 +++++++++---
 src/fe_utils/encryption.c         | 273 +++++++++++++++++++++++++++++++++++++-
 src/include/fe_utils/encryption.h |   8 +-
 src/include/storage/encryption.h  |   8 ++
 6 files changed, 379 insertions(+), 25 deletions(-)

diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index f2a582e975..bfa8f5fad4 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -3004,11 +3004,31 @@ initialize_data_directory(void)
 	write_version_file(NULL);
 
 	/*
-	 * If the cluster will be encrypted, run the command to generate the
-	 * encryption key.
+	 * If the cluster will be encrypted, write the KDF file so that encryption
+	 * key can be derived from password.
 	 */
 	if (encryption_key_command)
-		run_encryption_key_command(encryption_key);
+	{
+		/*
+		 * XXX Since execution of encryption_key_command produce the key (as
+		 * opposed to password), we don't know if the command received the key
+		 * itself or a password. If DBA provided initdb with a key, he will
+		 * never use password in the future (there was no KDF so far so the
+		 * key could not be derived from password, and the password can hardly
+		 * be derived from the key), so the KDF file may be useless. We don't
+		 * have enough information to recognize this special case, so just
+		 * initialize and write the KDF unconditionally.
+		 */
+		init_kdf();
+		write_kdf_file(pg_data);
+
+		/*
+		 * The key command is allowed to use pg_keytool, which in turn needs
+		 * the KDF parameters. The KDF parameters are now available so we can
+		 * run the command.
+		 */
+		run_encryption_key_command(encryption_key, pg_data);
+	}
 
 	/* Select suitable configuration settings */
 	set_null_conf();
diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 7e414ac048..e8e4eee162 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -881,7 +881,7 @@ do_start(void)
 		 * If encryption key is needed, retrieve it before trying to start
 		 * postmaster.
 		 */
-		run_encryption_key_command(encryption_key);
+		run_encryption_key_command(encryption_key, pg_data);
 
 		/*
 		 * Where should the key be sent?
diff --git a/src/bin/pg_keytool/pg_keytool.c b/src/bin/pg_keytool/pg_keytool.c
index 322625da41..c5151ad57c 100644
--- a/src/bin/pg_keytool/pg_keytool.c
+++ b/src/bin/pg_keytool/pg_keytool.c
@@ -43,6 +43,7 @@ usage(const char *progname)
 	printf(_("Usage:\n"));
 	printf(_("  %s [OPTION]...\n"), progname);
 	printf(_("\nOptions:\n"));
+	printf(_("  -D, --pgdata=DATADIR   data directory\n"));
 	/* Display default host */
 	env = getenv("PGHOST");
 	printf(_("  -h, --host=HOSTNAME    database server host or socket directory (default: \"%s\")\n"),
@@ -52,8 +53,9 @@ usage(const char *progname)
 	printf(_("  -p, --port=PORT        database server port (default: \"%s\")\n"),
 			env ? env : DEF_PGPORT_STR);
 	printf(_("  -s,                    send output to database server\n"));
+	printf(_("  -w                     expect password on input, not a key\n"));
 	printf(_("  -?, --help             show this help, then exit\n\n"));
-	printf(_("Key is read from stdin and sent either to stdout or to PostgreSQL server being started\n"));
+	printf(_("Password or key is read from stdin. Key is sent to PostgreSQL server being started\n"));
 }
 #endif							/* USE_ENCRYPTION */
 
@@ -69,9 +71,12 @@ main(int argc, char **argv)
 	int			c;
 	char		*host = NULL;
 	char		*port_str = NULL;
+	char	   *DataDir = NULL;
 	bool		to_server = false;
+	bool		expect_password = false;
 	int			i, n;
 	int			optindex;
+	char		password[ENCRYPTION_PWD_MAX_LENGTH];
 	char		key_chars[ENCRYPTION_KEY_CHARS];
 
 	static struct option long_options[] =
@@ -99,11 +104,15 @@ main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "h:p:s",
+	while ((c = getopt_long(argc, argv, "h:D:p:sw",
 							long_options, &optindex)) != -1)
 	{
 		switch (c)
 		{
+			case 'D':
+				DataDir = optarg;
+				break;
+
 			case 'h':
 				host = pg_strdup(optarg);
 				break;
@@ -116,6 +125,10 @@ main(int argc, char **argv)
 				to_server = true;
 				break;
 
+			case 'w':
+				expect_password = true;
+				break;
+
 			case '?':
 				/* Actual help option given */
 				if (strcmp(argv[optind - 1], "-?") == 0)
@@ -146,34 +159,78 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
-	/* Read the key. */
+	/*
+	 * The KDF file is needed to derive the key from password, and this file
+	 * is located in the data directory.
+	 */
+	if (expect_password && DataDir == NULL)
+	{
+		pg_log_error("%s: no data directory specified", progname);
+		pg_log_error("Try \"%s --help\" for more information.", progname);
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * Read the credentials (key or password).
+	 */
 	n = 0;
 	/* Key length in characters (two characters per hexadecimal digit) */
 	while ((c = getchar()) != EOF && c != '\n')
 	{
-		if (n >= ENCRYPTION_KEY_CHARS)
+		if (!expect_password)
 		{
-			pg_log_error("The key is too long");
-			exit(EXIT_FAILURE);
+			if (n >= ENCRYPTION_KEY_CHARS)
+			{
+				pg_log_error("The key is too long");
+				exit(EXIT_FAILURE);
+			}
+
+			key_chars[n++] = c;
 		}
+		else
+		{
+			if (n >= ENCRYPTION_PWD_MAX_LENGTH)
+			{
+				pg_log_error("The password is too long");
+				exit(EXIT_FAILURE);
+			}
 
-		key_chars[n++] = c;
+			password[n++] = c;
+		}
 	}
 
-	if (n < ENCRYPTION_KEY_CHARS)
+	/* If password was received, turn it into encryption key. */
+	if (!expect_password)
 	{
-		pg_log_error("The key is too short");
-		exit(EXIT_FAILURE);
-	}
+		if (n < ENCRYPTION_KEY_CHARS)
+		{
+			pg_log_error("The key is too short");
+			exit(EXIT_FAILURE);
+		}
 
-	for (i = 0; i < ENCRYPTION_KEY_LENGTH; i++)
+		for (i = 0; i < ENCRYPTION_KEY_LENGTH; i++)
+		{
+			if (sscanf(key_chars + 2 * i, "%2hhx", encryption_key + i) == 0)
+			{
+				pg_log_error("Invalid character in encryption key at position %d",
+							 2 * i);
+				exit(EXIT_FAILURE);
+			}
+		}
+	}
+	else
 	{
-		if (sscanf(key_chars + 2 * i, "%2hhx", encryption_key + i) == 0)
+		if (n < ENCRYPTION_PWD_MIN_LENGTH)
 		{
-			pg_log_error("Invalid character in encryption key at position %d",
-						 2 * i);
+			pg_log_error("The password is too short");
 			exit(EXIT_FAILURE);
 		}
+
+		/* Read the KDF parameters. */
+		read_kdf_file(DataDir);
+
+		/* Run the KDF. */
+		derive_key_from_password(encryption_key, password, n);
 	}
 
 	/*
diff --git a/src/fe_utils/encryption.c b/src/fe_utils/encryption.c
index ca37c9f373..134b3bde9b 100644
--- a/src/fe_utils/encryption.c
+++ b/src/fe_utils/encryption.c
@@ -21,31 +21,294 @@
 #include "common/file_perm.h"
 #include "common/logging.h"
 #include "fe_utils/encryption.h"
-#include "storage/encryption.h"
 #include "libpq-fe.h"
 #include "libpq-int.h"
 #include "libpq/pqcomm.h"
 
 char	   *encryption_key_command = NULL;
 
+#define KDF_PARAMS_FILE			"global/kdf_params"
+#define KDF_PARAMS_FILE_SIZE	512
+
+/*
+ * Key derivation function.
+ */
+typedef enum KDFKind
+{
+	KDF_OPENSSL_PKCS5_PBKDF2_HMAC_SHA = 0
+} KFDKind;
+
+typedef struct KDFParamsPBKDF2
+{
+	unsigned long int niter;
+	unsigned char salt[ENCRYPTION_KDF_SALT_LEN];
+} KDFParamsPBKDF2;
+
+/*
+ * Parameters of the key derivation function.
+ *
+ * The parameters are generated by initdb and stored into a file, which is
+ * then read during PG startup. This is similar to storing various settings in
+ * pg_control. However an existing KDF file is read only, so it does not have
+ * to be stored in shared memory.
+ */
+typedef struct KDFParamsData
+{
+	KFDKind		function;
+
+	/*
+	 * Function-specific parameters.
+	 */
+	union
+	{
+		KDFParamsPBKDF2 pbkdf2;
+	}			data;
+
+	/* CRC of all above ... MUST BE LAST! */
+	pg_crc32c	crc;
+} KDFParamsData;
+
+extern KDFParamsData *KDFParams;
+
+/*
+ * Pointer to the KDF parameters.
+ */
+KDFParamsData *KDFParams = NULL;
+
+/* Initialize KDF file. */
+void
+init_kdf(void)
+{
+	KDFParamsPBKDF2 *params;
+	struct timeval tv;
+	uint64	salt;
+
+	/*
+	 * The initialization should not be repeated.
+	 */
+	Assert(KDFParams == NULL);
+
+	KDFParams = palloc0(KDF_PARAMS_FILE_SIZE);
+	KDFParams->function = KDF_OPENSSL_PKCS5_PBKDF2_HMAC_SHA;
+	params = &KDFParams->data.pbkdf2;
+
+	/*
+	 * Currently we derive the salt in the same way as system identifier,
+	 * however these two values are not supposed to match. XXX Is it worth the
+	 * effort if initdb derives the system identifier, passes it to this
+	 * function and also sends it to the bootstrap process? Not sure.
+	 */
+	gettimeofday(&tv, NULL);
+	salt = ((uint64) tv.tv_sec) << 32;
+	salt |= ((uint64) tv.tv_usec) << 12;
+	salt |= getpid() & 0xFFF;
+
+	memcpy(params->salt, &salt, sizeof(uint64));
+	params->niter = ENCRYPTION_KDF_NITER;
+}
+
+/*
+ * Write KDFParamsData to file.
+ */
+void
+write_kdf_file(char *dir)
+{
+	char		path[MAXPGPATH];
+	int			fd;
+
+	Assert(KDFParams != NULL);
+
+	/* Account for both file separator and terminating NULL character. */
+	if ((strlen(dir) + 1 + strlen(KDF_PARAMS_FILE) + 1) > MAXPGPATH)
+	{
+		pg_log_fatal("KDF directory is too long");
+		exit(EXIT_FAILURE);
+	}
+
+	snprintf(path, MAXPGPATH, "%s/%s", dir, KDF_PARAMS_FILE);
+
+	/* Contents are protected with a CRC */
+	INIT_CRC32C(KDFParams->crc);
+	COMP_CRC32C(KDFParams->crc,
+				(char *) KDFParams,
+				offsetof(KDFParamsData, crc));
+	FIN_CRC32C(KDFParams->crc);
+
+	fd = open(path, O_WRONLY | O_CREAT | PG_BINARY,
+			  pg_file_create_mode);
+	if (fd < 0)
+	{
+		pg_log_fatal("could not create key derivation file \"%s\": %m", path);
+		exit(EXIT_FAILURE);
+	}
+
+	if (write(fd, KDFParams, KDF_PARAMS_FILE_SIZE) != KDF_PARAMS_FILE_SIZE)
+	{
+		/* if write didn't set errno, assume problem is no disk space */
+		if (errno == 0)
+			errno = ENOSPC;
+		pg_log_fatal("could not write to key derivation file \"%s\": %m",
+					 path);
+		exit(EXIT_FAILURE);
+	}
+
+	if (close(fd))
+	{
+		pg_log_fatal("could not close key setup file: %m");
+		exit(EXIT_FAILURE);
+	}
+}
+
+/*
+ * Read KDFParamsData from file and store it in local memory.
+ *
+ * If dir is NULL, assume we're in the data directory.
+ *
+ * postmaster should call the function early enough for any other process to
+ * inherit valid pointer to the data.
+ */
+void
+read_kdf_file(char *dir)
+{
+	pg_crc32c	crc;
+	char		path[MAXPGPATH];
+	int			fd;
+
+	/* Account for both file separator and terminating NULL character. */
+	if ((strlen(dir) + 1 + strlen(KDF_PARAMS_FILE) + 1) > MAXPGPATH)
+	{
+		pg_log_fatal("KDF directory is too long");
+		exit(EXIT_FAILURE);
+	}
+
+	snprintf(path, MAXPGPATH, "%s/%s", dir, KDF_PARAMS_FILE);
+
+	KDFParams = palloc0(KDF_PARAMS_FILE_SIZE);
+	fd = open(path, O_RDONLY | PG_BINARY, S_IRUSR);
+
+	if (fd < 0)
+	{
+		pg_log_fatal("could not open key setup file \"%s\": %m", path);
+		exit(EXIT_FAILURE);
+	}
+
+	if (read(fd, KDFParams, sizeof(KDFParamsData)) != sizeof(KDFParamsData))
+	{
+		pg_log_fatal("could not read from key setup file \"%s\": %m", path);
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+
+	/* Now check the CRC. */
+	INIT_CRC32C(crc);
+	COMP_CRC32C(crc,
+				(char *) KDFParams,
+				offsetof(KDFParamsData, crc));
+	FIN_CRC32C(crc);
+
+	if (!EQ_CRC32C(crc, KDFParams->crc))
+	{
+		pg_log_fatal("incorrect checksum in key setup file \"%s\"", path);
+		exit(EXIT_FAILURE);
+	}
+
+
+	if (KDFParams->function != KDF_OPENSSL_PKCS5_PBKDF2_HMAC_SHA)
+	{
+		pg_log_fatal("unsupported KDF function");
+		exit(EXIT_FAILURE);
+	}
+}
+
+/*
+ * Run the key derivation function and initialize encryption_key variable.
+ */
+void
+derive_key_from_password(unsigned char *encryption_key, const char *password,
+						 int len)
+{
+	KDFParamsPBKDF2 *params;
+	int			rc;
+
+	params = &KDFParams->data.pbkdf2;
+	rc = PKCS5_PBKDF2_HMAC(password,
+						   len,
+						   params->salt,
+						   ENCRYPTION_KDF_SALT_LEN,
+						   params->niter,
+						   EVP_sha1(),
+						   ENCRYPTION_KEY_LENGTH,
+						   encryption_key);
+
+	if (rc != 1)
+	{
+		pg_log_fatal("failed to derive key from password");
+		exit(EXIT_FAILURE);
+	}
+}
+
 /*
  * Run the command that is supposed to generate encryption key and store it
- * where encryption_key points to.
+ * where encryption_key points to. If valid string is passed for data_dir,
+ * it's used to replace '%D' pattern in the command.
  */
 void
-run_encryption_key_command(unsigned char *encryption_key)
+run_encryption_key_command(unsigned char *encryption_key, char *data_dir)
 {
 	FILE	   *fp;
+	char	cmd[MAXPGPATH];
+	char	*sp, *dp, *endp;
 	char	   *buf;
 	int		read_len, i, c;
 
 	Assert(encryption_key_command != NULL &&
 		   strlen(encryption_key_command) > 0);
 
-	fp = popen(encryption_key_command, "r");
+	/*
+	 * Replace %D pattern in the command with the actual data directory path.
+	 */
+	dp = cmd;
+	endp = cmd + MAXPGPATH - 1;
+	*endp = '\0';
+	for (sp = encryption_key_command; *sp; sp++)
+	{
+		if (*sp == '%')
+		{
+			if (sp[1] == 'D')
+			{
+				if (data_dir == NULL)
+				{
+					pg_log_fatal("data directory is not known, %%D pattern cannot be replaced");
+					exit(EXIT_FAILURE);
+				}
+
+				sp++;
+				strlcpy(dp, data_dir, endp - dp);
+				make_native_path(dp);
+				dp += strlen(dp);
+			}
+			else if (dp < endp)
+				*dp++ = *sp;
+			else
+				break;
+		}
+		else
+		{
+			if (dp < endp)
+				*dp++ = *sp;
+			else
+				break;
+		}
+	}
+	*dp = '\0';
+
+	pg_log_debug("executing encryption key command \"%s\"", cmd);
+
+	fp = popen(cmd, "r");
 	if (fp == NULL)
 	{
-		pg_log_fatal("Failed to execute \"%s\"", encryption_key_command);
+		pg_log_fatal("Failed to execute \"%s\"", cmd);
 		exit(EXIT_FAILURE);
 	}
 
diff --git a/src/include/fe_utils/encryption.h b/src/include/fe_utils/encryption.h
index 7307557ed6..da302fe494 100644
--- a/src/include/fe_utils/encryption.h
+++ b/src/include/fe_utils/encryption.h
@@ -15,6 +15,12 @@
 /* Executable to retrieve the encryption key. */
 extern char *encryption_key_command;
 
-extern void run_encryption_key_command(unsigned char *encryption_key);
+extern void init_kdf(void);
+extern void write_kdf_file(char *dir);
+extern void read_kdf_file(char *dir);
+extern void derive_key_from_password(unsigned char *encryption_key,
+									 const char *password, int len);
+extern void run_encryption_key_command(unsigned char *encryption_key,
+									   char *data_dir);
 extern bool send_key_to_postmaster(const char *host, const char *port,
 								   const unsigned char *encryption_Key);
diff --git a/src/include/storage/encryption.h b/src/include/storage/encryption.h
index 4f7b96d4f3..72bcae0972 100644
--- a/src/include/storage/encryption.h
+++ b/src/include/storage/encryption.h
@@ -57,6 +57,14 @@ typedef enum CipherKind
 	PG_CIPHER_AES_BLOCK_CBC_256_STREAM_CTR_256
 }			CipherKind;
 
+/*
+ * TODO Tune these values.
+ */
+#define ENCRYPTION_PWD_MIN_LENGTH	8
+#define ENCRYPTION_PWD_MAX_LENGTH	16
+#define ENCRYPTION_KDF_NITER		1048576
+#define	ENCRYPTION_KDF_SALT_LEN		sizeof(uint64)
+
 /* Key to encrypt / decrypt data. */
 extern unsigned char encryption_key[];
 
-- 
2.13.7

