From 5868d43af039b86a9b0cc6303646b7782a20f77b Mon Sep 17 00:00:00 2001
From: Antonin Houska <ah@cybertec.at>
Date: Fri, 5 Jul 2019 16:24:01 +0200
Subject: [PATCH 09/17] Enable encryption of relations.

---
 src/backend/access/heap/rewriteheap.c |  16 ++++++
 src/backend/access/nbtree/nbtsort.c   |   2 +
 src/backend/commands/dbcommands.c     |  17 +++++-
 src/backend/replication/basebackup.c  |   2 +-
 src/backend/storage/buffer/bufmgr.c   |  15 +++++
 src/backend/storage/buffer/localbuf.c |   8 +++
 src/backend/storage/file/copydir.c    |  99 ++++++++++++++++++++++++++------
 src/backend/storage/file/encryption.c | 104 ++++++++++++++++++++++++++++++++++
 src/backend/storage/file/reinit.c     |  53 ++++++++++++-----
 src/backend/storage/page/bufpage.c    |  26 +--------
 src/backend/storage/smgr/md.c         |  41 +++++++++++++-
 src/include/storage/copydir.h         |   6 +-
 src/include/storage/encryption.h      |  22 ++++++-
 src/include/storage/reinit.h          |   3 +-
 14 files changed, 350 insertions(+), 64 deletions(-)

diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 369694fa2e..37aba1eabe 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -330,18 +330,27 @@ end_heap_rewrite(RewriteState state)
 	/* Write the last page, if any */
 	if (state->rs_buffer_valid)
 	{
+		bool	lsn_is_fake = false;
+
 		if (state->rs_use_wal)
 			log_newpage(&state->rs_new_rel->rd_node,
 						MAIN_FORKNUM,
 						state->rs_blockno,
 						state->rs_buffer,
 						true);
+		else if (data_encrypted)
+			lsn_is_fake = EnforceLSNUpdateForEncryption((char *)
+														state->rs_buffer);
+
 		RelationOpenSmgr(state->rs_new_rel);
 
 		PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
 
 		smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno,
 				   (char *) state->rs_buffer, true);
+
+		if (lsn_is_fake)
+			RestoreInvalidLSN((char *) state->rs_buffer);
 	}
 
 	/*
@@ -692,6 +701,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 
 		if (len + saveFreeSpace > pageFreeSpace)
 		{
+			bool	lsn_is_fake = false;
+
 			/* Doesn't fit, so write out the existing page */
 
 			/* XLOG stuff */
@@ -701,6 +712,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 							state->rs_blockno,
 							page,
 							true);
+			else if (data_encrypted)
+				lsn_is_fake = EnforceLSNUpdateForEncryption((char *) page);
 
 			/*
 			 * Now write the page. We say isTemp = true even if it's not a
@@ -715,6 +728,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 			smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM,
 					   state->rs_blockno, (char *) page, true);
 
+			if (lsn_is_fake)
+				RestoreInvalidLSN((char *) page);
+
 			state->rs_blockno++;
 			state->rs_buffer_valid = false;
 		}
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index d0b9013caf..e451d6054f 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -660,6 +660,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
 		/* We use the heap NEWPAGE record type for this */
 		log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
 	}
+	else if (data_encrypted)
+		EnforceLSNUpdateForEncryption((char *) page);
 
 	/*
 	 * If we have to write pages nonsequentially, fill in the space with
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 863f89f19d..7ea783d096 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -609,6 +609,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 			Oid			dsttablespace;
 			char	   *srcpath;
 			char	   *dstpath;
+			RelFileNode src_node = {srctablespace, src_dboid, InvalidOid};
+			RelFileNode dst_node;
 			struct stat st;
 
 			/* No need to copy global tablespace */
@@ -630,6 +632,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 			else
 				dsttablespace = srctablespace;
 
+			dst_node.spcNode = dsttablespace;
+			dst_node.dbNode = dboid;
+			dst_node.relNode= InvalidOid;
+
 			dstpath = GetDatabasePath(dboid, dsttablespace);
 
 			/*
@@ -637,7 +643,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 			 *
 			 * We don't need to copy subdirectories
 			 */
-			copydir(srcpath, dstpath, false);
+			copydir(srcpath, dstpath, &src_node, &dst_node);
 
 			/* Record the filesystem change in XLOG */
 			{
@@ -1272,10 +1278,13 @@ movedb(const char *dbname, const char *tblspcname)
 	PG_ENSURE_ERROR_CLEANUP(movedb_failure_callback,
 							PointerGetDatum(&fparms));
 	{
+		RelFileNode src_node = {src_tblspcoid, db_id, InvalidOid};
+		RelFileNode dst_node = {dst_tblspcoid, db_id, InvalidOid};
+
 		/*
 		 * Copy files from the old tablespace to the new one
 		 */
-		copydir(src_dbpath, dst_dbpath, false);
+		copydir(src_dbpath, dst_dbpath, &src_node, &dst_node);
 
 		/*
 		 * Record the filesystem change in XLOG
@@ -2109,6 +2118,8 @@ dbase_redo(XLogReaderState *record)
 		char	   *src_path;
 		char	   *dst_path;
 		struct stat st;
+		RelFileNode src_node = {xlrec->src_tablespace_id, xlrec->src_db_id, InvalidOid};
+		RelFileNode dst_node = {xlrec->tablespace_id, xlrec->db_id, InvalidOid};
 
 		src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
 		dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
@@ -2138,7 +2149,7 @@ dbase_redo(XLogReaderState *record)
 		 *
 		 * We don't need to copy subdirectories
 		 */
-		copydir(src_path, dst_path, false);
+		copydir(src_path, dst_path, &src_node, &dst_node);
 	}
 	else if (info == XLOG_DBASE_DROP)
 	{
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 7a1b38466b..678fdfa5a2 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -1104,7 +1104,7 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 		/* Exclude all forks for unlogged tables except the init fork */
 		if (isDbDir &&
 			parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
-												&relForkNum))
+												&relForkNum, NULL))
 		{
 			/* Never exclude init forks */
 			if (relForkNum != INIT_FORKNUM)
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7332e6b590..a36fb7bea7 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2678,6 +2678,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 	Block		bufBlock;
 	char	   *bufToWrite;
 	uint32		buf_state;
+	bool	lsn_is_fake = false;
 
 	/*
 	 * Acquire the buffer's io_in_progress lock.  If StartBufferIO returns
@@ -2742,6 +2743,9 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 	 */
 	bufBlock = BufHdrGetBlock(buf);
 
+	if (data_encrypted && !(buf_state & BM_PERMANENT))
+		lsn_is_fake = EnforceLSNUpdateForEncryption(bufBlock);
+
 	/*
 	 * Update page checksum if desired.  Since we have only shared lock on the
 	 * buffer, other processes might be updating hint bits in it, so we must
@@ -2771,6 +2775,9 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 
 	pgBufferUsage.shared_blks_written++;
 
+	if (lsn_is_fake)
+		RestoreInvalidLSN(bufBlock);
+
 	/*
 	 * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
 	 * end the io_in_progress state.
@@ -3209,6 +3216,7 @@ FlushRelationBuffers(Relation rel)
 			{
 				ErrorContextCallback errcallback;
 				Page		localpage;
+				bool	lsn_is_fake = false;
 
 				localpage = (char *) LocalBufHdrGetBlock(bufHdr);
 
@@ -3218,6 +3226,10 @@ FlushRelationBuffers(Relation rel)
 				errcallback.previous = error_context_stack;
 				error_context_stack = &errcallback;
 
+				if (data_encrypted)
+					lsn_is_fake = EnforceLSNUpdateForEncryption((char *)
+																localpage);
+
 				PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
 				smgrwrite(rel->rd_smgr,
@@ -3226,6 +3238,9 @@ FlushRelationBuffers(Relation rel)
 						  localpage,
 						  false);
 
+				if (lsn_is_fake)
+					RestoreInvalidLSN((char *) localpage);
+
 				buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
 				pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
 
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 391b6d6e16..cd36ec78a7 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -20,6 +20,7 @@
 #include "executor/instrument.h"
 #include "storage/buf_internals.h"
 #include "storage/bufmgr.h"
+#include "storage/encryption.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/resowner_private.h"
@@ -207,10 +208,14 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	{
 		SMgrRelation oreln;
 		Page		localpage = (char *) LocalBufHdrGetBlock(bufHdr);
+		bool	lsn_is_fake = false;
 
 		/* Find smgr relation for buffer */
 		oreln = smgropen(bufHdr->tag.rnode, MyBackendId);
 
+		if (data_encrypted)
+			lsn_is_fake = EnforceLSNUpdateForEncryption((char *) localpage);
+
 		PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
 		/* And write... */
@@ -220,6 +225,9 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 				  localpage,
 				  false);
 
+		if (lsn_is_fake)
+			RestoreInvalidLSN(localpage);
+
 		/* Mark not-dirty now in case we error out below */
 		buf_state &= ~BM_DIRTY;
 		pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c
index 30f6200a86..b98e9c10d8 100644
--- a/src/backend/storage/file/copydir.c
+++ b/src/backend/storage/file/copydir.c
@@ -23,24 +23,29 @@
 #include <sys/stat.h>
 
 #include "storage/copydir.h"
+#include "storage/encryption.h"
 #include "storage/fd.h"
+#include "storage/reinit.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 
 /*
  * copydir: copy a directory
  *
- * If recurse is false, subdirectories are ignored.  Anything that's not
- * a directory or a regular file is ignored.
+ * RelFileNode values must specify tablespace and database oids for source
+ * and target to support re-encryption if necessary. relNode value in provided
+ * structs will be clobbered.
  */
 void
-copydir(char *fromdir, char *todir, bool recurse)
+copydir(char *fromdir, char *todir, RelFileNode *fromNode, RelFileNode *toNode)
 {
 	DIR		   *xldir;
 	struct dirent *xlde;
 	char		fromfile[MAXPGPATH * 2];
 	char		tofile[MAXPGPATH * 2];
 
+	Assert(!data_encrypted || (fromNode != NULL && toNode != NULL));
+
 	if (MakePGDirectory(todir) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
@@ -67,14 +72,32 @@ copydir(char *fromdir, char *todir, bool recurse)
 					(errcode_for_file_access(),
 					 errmsg("could not stat file \"%s\": %m", fromfile)));
 
-		if (S_ISDIR(fst.st_mode))
+		if (S_ISREG(fst.st_mode))
 		{
-			/* recurse to handle subdirectories */
-			if (recurse)
-				copydir(fromfile, tofile, true);
+			int			oidchars;
+			ForkNumber	forkNum;
+			int			segment;
+
+			/*
+			 * For encrypted databases we need to reencrypt files with new
+			 * tweaks.
+			 */
+			if (data_encrypted &&
+				parse_filename_for_nontemp_relation(xlde->d_name,
+													&oidchars, &forkNum, &segment))
+			{
+				char		oidbuf[OIDCHARS + 1];
+
+				memcpy(oidbuf, xlde->d_name, oidchars);
+				oidbuf[oidchars] = '\0';
+
+				/* We scribble over the provided RelFileNodes here */
+				fromNode->relNode = toNode->relNode = atol(oidbuf);
+				copy_file(fromfile, tofile, fromNode, toNode, forkNum, forkNum, segment);
+			}
+			else
+				copy_file(fromfile, tofile, NULL, NULL, 0, 0, 0);
 		}
-		else if (S_ISREG(fst.st_mode))
-			copy_file(fromfile, tofile);
 	}
 	FreeDir(xldir);
 
@@ -121,17 +144,22 @@ copydir(char *fromdir, char *todir, bool recurse)
 }
 
 /*
- * copy one file
+ * copy one file. If decryption and reencryption may be needed specify
+ * relfilenodes for source and target.
  */
 void
-copy_file(char *fromfile, char *tofile)
+copy_file(char *fromfile, char *tofile, RelFileNode *fromNode,
+		  RelFileNode *toNode, ForkNumber fromForkNum, ForkNumber toForkNum,
+		  int segment)
 {
 	char	   *buffer;
 	int			srcfd;
 	int			dstfd;
 	int			nbytes;
+	int			bytesread;
 	off_t		offset;
 	off_t		flush_offset;
+	BlockNumber blockNum = segment * RELSEG_SIZE;
 
 	/* Size of copy buffer (read and write requests) */
 #define COPY_BUF_SIZE (8 * BLCKSZ)
@@ -186,15 +214,50 @@ copy_file(char *fromfile, char *tofile)
 			flush_offset = offset;
 		}
 
-		pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_READ);
-		nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
-		pgstat_report_wait_end();
-		if (nbytes < 0)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", fromfile)));
+		/*
+		 * Try to read as much as we fit in the buffer so we can deal with
+		 * complete blocks if we need to reencrypt.
+		 */
+		nbytes = 0;
+		while (nbytes < COPY_BUF_SIZE)
+		{
+			pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_READ);
+			bytesread = read(srcfd, buffer + nbytes, COPY_BUF_SIZE - nbytes);
+			pgstat_report_wait_end();
+			if (bytesread < 0)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", fromfile)));
+			nbytes += bytesread;
+			if (bytesread == 0)
+				break;
+		}
 		if (nbytes == 0)
 			break;
+
+		/*
+		 * If the database is encrypted we need to decrypt the data here and
+		 * reencrypt it to adjust the tweak values of blocks.
+		 */
+		if (data_encrypted)
+		{
+			if (fromNode != NULL)
+			{
+				Assert(toNode != NULL);
+
+				/*
+				 * There's no reason not to have whole number of pages read
+				 * Computation of the number of blocks below relies on this
+				 * fact.
+				 */
+				Assert(nbytes % BLCKSZ == 0);
+
+				blockNum = ReencryptBlock(buffer, nbytes / BLCKSZ,
+										  fromNode, toNode, fromForkNum,
+										  toForkNum, blockNum);
+			}
+		}
+
 		errno = 0;
 		pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_WRITE);
 		if ((int) write(dstfd, buffer, nbytes) != nbytes)
diff --git a/src/backend/storage/file/encryption.c b/src/backend/storage/file/encryption.c
index 971b6928e4..e79f967fbb 100644
--- a/src/backend/storage/file/encryption.c
+++ b/src/backend/storage/file/encryption.c
@@ -63,6 +63,7 @@ char encryption_verification[ENCRYPTION_SAMPLE_SIZE];
 
 bool	encryption_setup_done = false;
 
+PGAlignedBlock encrypt_buf;
 char	   *encrypt_buf_xlog = NULL;
 
 #ifdef USE_ENCRYPTION
@@ -425,3 +426,106 @@ XLogEncryptionTweak(char *tweak, TimeLineID timeline, XLogSegNo segment,
 	tweak += sizeof(XLogSegNo);
 	memcpy(tweak, &offset, sizeof(offset));
 }
+
+/*
+ * Copying relations between tablespaces/databases means that the tweak values
+ * of each block will change. This function transcodes a series of blocks with
+ * new tweak values. Returns the new block number for convenience.
+ */
+BlockNumber
+ReencryptBlock(char *buffer, int blocks,
+			   RelFileNode *srcNode, RelFileNode *dstNode,
+			   ForkNumber srcForkNum, ForkNumber dstForkNum,
+			   BlockNumber blockNum)
+{
+	char	   *cur;
+	char		srcTweak[TWEAK_SIZE];
+	char		dstTweak[TWEAK_SIZE];
+
+	for (cur = buffer; cur < buffer + blocks * BLCKSZ; cur += BLCKSZ)
+	{
+		mdtweak(srcTweak, srcNode, srcForkNum, blockNum);
+		mdtweak(dstTweak, dstNode, dstForkNum, blockNum);
+		decrypt_block(cur, cur, BLCKSZ, srcTweak, false);
+		encrypt_block(cur, cur, BLCKSZ, dstTweak, false);
+		blockNum++;
+	}
+	return blockNum;
+}
+
+/*
+ * md files are encrypted block at a time. Tweak will alias higher numbered
+ * forks for huge tables.
+ */
+void
+mdtweak(char *tweak, RelFileNode *relnode, ForkNumber forknum, BlockNumber blocknum)
+{
+	uint32		fork_and_block = (forknum << 24) ^ blocknum;
+
+	memcpy(tweak, relnode, sizeof(RelFileNode));
+	memcpy(tweak + sizeof(RelFileNode), &fork_and_block, 4);
+}
+
+#ifndef FRONTEND
+/*
+ * When page is encrypted using a cipher in the cipher-block chaining (CBC)
+ * mode, the fact that the page starts with LSN makes it harder for adversary
+ * to see which part of the plain (unencrypted) page changed: the LSN changes
+ * even if only the plain data at the end of the page changed, and, due to the
+ * chaining, the encrypted page becomes completely different.  Although no LSN
+ * is needed for unlogged tables, we still want to keep the advantage
+ * described here. So we set pd_lsn to "fake LSN" before each write.
+ *
+ * Note that caller needs to restore InvalidXLogRecPtr after the write so that
+ * it can be recognized later that the page needs to be treated specially.
+ *
+ * LW_SHARED on the buffer contents is sufficient because pd_lsn is not
+ * expected to be used for other purposes.
+ *
+ * Returns true iff the LSN was updated.
+ */
+bool
+EnforceLSNUpdateForEncryption(char	*buf_contents)
+{
+	PageHeader	hdr = (PageHeader) buf_contents;
+	XLogRecPtr	recptr;
+
+	/* Failure indicates incorrect user of the function. */
+	Assert(data_encrypted);
+
+	/*
+	 * All-zero page shouldn't be encrypted, so don't care about the fake LSN.
+	 */
+	if (PageIsNew(hdr))
+		return false;
+
+	recptr = PageXLogRecPtrGet(hdr->pd_lsn);
+
+	/*
+	 * Valid LSN indicates that the relation is either logged or it's a page
+	 * of unlogged GIST index. In either case we must not touch the LSN.
+	 */
+	if (!XLogRecPtrIsInvalid(recptr))
+		return false;
+
+	PageXLogRecPtrSet(hdr->pd_lsn, GetFakeLSNForUnloggedRel());
+
+	return true;
+}
+
+/*
+ * If EnforceLSNUpdateForEncryption() returned true and if the function can be
+ * passed that buffer again, use this function to restore the
+ * InvalidXLogRecPtr value.
+ */
+void
+RestoreInvalidLSN(char	*buf_contents)
+{
+	PageHeader	hdr = (PageHeader) buf_contents;
+
+	/* Failure indicates incorrect user of the function. */
+	Assert(data_encrypted);
+
+	PageXLogRecPtrSet(hdr->pd_lsn, (XLogRecPtr) InvalidXLogRecPtr);
+}
+#endif	/* FRONTEND */
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index ccdf80d614..6af20cfe64 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -16,6 +16,7 @@
 
 #include <unistd.h>
 
+#include "catalog/pg_tablespace.h"
 #include "common/relpath.h"
 #include "storage/copydir.h"
 #include "storage/fd.h"
@@ -24,9 +25,9 @@
 #include "utils/memutils.h"
 
 static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
-												  int op);
+									  int op, Oid spcOid);
 static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
-											   int op);
+								   int op, Oid spcOid, Oid dbOid);
 
 typedef struct
 {
@@ -68,7 +69,7 @@ ResetUnloggedRelations(int op)
 	/*
 	 * First process unlogged files in pg_default ($PGDATA/base)
 	 */
-	ResetUnloggedRelationsInTablespaceDir("base", op);
+	ResetUnloggedRelationsInTablespaceDir("base", op, DEFAULTTABLESPACE_OID);
 
 	/*
 	 * Cycle through directories for all non-default tablespaces.
@@ -77,13 +78,16 @@ ResetUnloggedRelations(int op)
 
 	while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
 	{
+		Oid			spcOid;
+
 		if (strcmp(spc_de->d_name, ".") == 0 ||
 			strcmp(spc_de->d_name, "..") == 0)
 			continue;
 
 		snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
 				 spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
-		ResetUnloggedRelationsInTablespaceDir(temp_path, op);
+		spcOid = atoi(spc_de->d_name);
+		ResetUnloggedRelationsInTablespaceDir(temp_path, op, spcOid);
 	}
 
 	FreeDir(spc_dir);
@@ -99,7 +103,8 @@ ResetUnloggedRelations(int op)
  * Process one tablespace directory for ResetUnloggedRelations
  */
 static void
-ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
+ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op,
+									  Oid spcOid)
 {
 	DIR		   *ts_dir;
 	struct dirent *de;
@@ -126,6 +131,8 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 
 	while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
 	{
+		Oid			dbOid;
+
 		/*
 		 * We're only interested in the per-database directories, which have
 		 * numeric names.  Note that this code will also (properly) ignore "."
@@ -134,9 +141,10 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 		if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
 			continue;
 
+		dbOid = atoi(de->d_name);
 		snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
 				 tsdirname, de->d_name);
-		ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
+		ResetUnloggedRelationsInDbspaceDir(dbspace_path, op, spcOid, dbOid);
 	}
 
 	FreeDir(ts_dir);
@@ -146,7 +154,8 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
  * Process one per-dbspace directory for ResetUnloggedRelations
  */
 static void
-ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
+ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op,
+								   Oid spcOid, Oid dbOid)
 {
 	DIR		   *dbspace_dir;
 	struct dirent *de;
@@ -187,7 +196,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
 			/* Skip anything that doesn't look like a relation data file. */
 			if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
-													 &forkNum))
+													 &forkNum, NULL))
 				continue;
 
 			/* Also skip it unless this is the init fork. */
@@ -229,7 +238,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
 			/* Skip anything that doesn't look like a relation data file. */
 			if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
-													 &forkNum))
+													 &forkNum, NULL))
 				continue;
 
 			/* We never remove the init fork. */
@@ -279,13 +288,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 		{
 			ForkNumber	forkNum;
 			int			oidchars;
+			int			segment;
 			char		oidbuf[OIDCHARS + 1];
 			char		srcpath[MAXPGPATH * 2];
 			char		dstpath[MAXPGPATH];
 
 			/* Skip anything that doesn't look like a relation data file. */
 			if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
-													 &forkNum))
+													 &forkNum, &segment))
 				continue;
 
 			/* Also skip it unless this is the init fork. */
@@ -305,7 +315,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
 			/* OK, we're ready to perform the actual copy. */
 			elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
-			copy_file(srcpath, dstpath);
+			{
+				RelFileNode srcNode = {spcOid, dbOid, atol(oidbuf)};
+				RelFileNode dstNode = srcNode;
+
+				copy_file(srcpath, dstpath, &srcNode, &dstNode,
+						  INIT_FORKNUM, MAIN_FORKNUM, segment);
+			}
 		}
 
 		FreeDir(dbspace_dir);
@@ -327,7 +343,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
 			/* Skip anything that doesn't look like a relation data file. */
 			if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
-													 &forkNum))
+													 &forkNum, NULL))
 				continue;
 
 			/* Also skip it unless this is the init fork. */
@@ -372,9 +388,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
  */
 bool
 parse_filename_for_nontemp_relation(const char *name, int *oidchars,
-									ForkNumber *fork)
+									ForkNumber *fork, int *segment)
 {
 	int			pos;
+	int			segstart = 0;
 
 	/* Look for a non-empty string of digits (that isn't too long). */
 	for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
@@ -401,6 +418,7 @@ parse_filename_for_nontemp_relation(const char *name, int *oidchars,
 	{
 		int			segchar;
 
+		segstart = pos + 1;
 		for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
 			;
 		if (segchar <= 1)
@@ -411,5 +429,14 @@ parse_filename_for_nontemp_relation(const char *name, int *oidchars,
 	/* Now we should be at the end. */
 	if (name[pos] != '\0')
 		return false;
+
+	if (segment != NULL)
+	{
+		if (segstart == 0)
+			*segment = 0;
+		else
+			*segment = atoi(name + segstart);
+	}
+
 	return true;
 }
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 6b49810e37..b6c358a642 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -17,6 +17,7 @@
 #include "access/htup_details.h"
 #include "access/itup.h"
 #include "access/xlog.h"
+#include "common/string.h"
 #include "pgstat.h"
 #include "storage/checksum.h"
 #include "utils/memdebug.h"
@@ -82,11 +83,8 @@ bool
 PageIsVerified(Page page, BlockNumber blkno)
 {
 	PageHeader	p = (PageHeader) page;
-	size_t	   *pagebytes;
-	int			i;
 	bool		checksum_failure = false;
 	bool		header_sane = false;
-	bool		all_zeroes = false;
 	uint16		checksum = 0;
 
 	/*
@@ -119,26 +117,8 @@ PageIsVerified(Page page, BlockNumber blkno)
 			return true;
 	}
 
-	/*
-	 * Check all-zeroes case. Luckily BLCKSZ is guaranteed to always be a
-	 * multiple of size_t - and it's much faster to compare memory using the
-	 * native word size.
-	 */
-	StaticAssertStmt(BLCKSZ == (BLCKSZ / sizeof(size_t)) * sizeof(size_t),
-					 "BLCKSZ has to be a multiple of sizeof(size_t)");
-
-	all_zeroes = true;
-	pagebytes = (size_t *) page;
-	for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
-	{
-		if (pagebytes[i] != 0)
-		{
-			all_zeroes = false;
-			break;
-		}
-	}
-
-	if (all_zeroes)
+	/* Check all-zeroes case */
+	if (IsAllZero((char *) page, BLCKSZ))
 		return true;
 
 	/*
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 64acc3fa43..1b986eac01 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -32,6 +32,7 @@
 #include "postmaster/bgwriter.h"
 #include "storage/fd.h"
 #include "storage/bufmgr.h"
+#include "storage/encryption.h"
 #include "storage/md.h"
 #include "storage/relfilenode.h"
 #include "storage/smgr.h"
@@ -86,6 +87,7 @@ typedef struct _MdfdVec
 
 static MemoryContext MdCxt;		/* context for all MdfdVec objects */
 
+static char *md_encryption_tweak;
 
 /* Populate a file tag describing an md.c segment file. */
 #define INIT_MD_FILETAG(a,xx_rnode,xx_forknum,xx_segno) \
@@ -139,6 +141,8 @@ static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
 static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
 							  MdfdVec *seg);
 
+static void mdencrypt(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer);
+static void mddecrypt(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer);
 
 /*
  *	mdinit() -- Initialize private state for magnetic disk storage manager.
@@ -149,6 +153,8 @@ mdinit(void)
 	MdCxt = AllocSetContextCreate(TopMemoryContext,
 								  "MdSmgr",
 								  ALLOCSET_DEFAULT_SIZES);
+
+	md_encryption_tweak = MemoryContextAllocZero(MdCxt, TWEAK_SIZE);
 }
 
 /*
@@ -401,6 +407,12 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
 
+	if (data_encrypted)
+	{
+		mdencrypt(reln, forknum, blocknum, buffer);
+		buffer = encrypt_buf.data;
+	}
+
 	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
 	{
 		if (nbytes < 0)
@@ -587,6 +599,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	off_t		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
+	char	   *buffer_read = buffer;
 
 	TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
 										reln->smgr_rnode.node.spcNode,
@@ -601,7 +614,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
 
-	nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
+	if (data_encrypted)
+		buffer_read = encrypt_buf.data;
+
+	nbytes = FileRead(v->mdfd_vfd, buffer_read, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
 
 	TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
 									   reln->smgr_rnode.node.spcNode,
@@ -636,6 +652,8 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 							blocknum, FilePathName(v->mdfd_vfd),
 							nbytes, BLCKSZ)));
 	}
+	else if (data_encrypted)
+		mddecrypt(reln, forknum, blocknum, buffer);
 }
 
 /*
@@ -671,6 +689,11 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
 
+	if (data_encrypted)
+	{
+		mdencrypt(reln, forknum, blocknum, buffer);
+		buffer = encrypt_buf.data;
+	}
 	nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
 
 	TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
@@ -1248,6 +1271,22 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 	return (BlockNumber) (len / BLCKSZ);
 }
 
+
+static void
+mdencrypt(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
+{
+	mdtweak(md_encryption_tweak, &(reln->smgr_rnode.node), forknum, blocknum);
+	encrypt_block(buffer, encrypt_buf.data, BLCKSZ, md_encryption_tweak,
+				  false);
+}
+
+static void
+mddecrypt(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *dest)
+{
+	mdtweak(md_encryption_tweak, &(reln->smgr_rnode.node), forknum, blocknum);
+	decrypt_block(encrypt_buf.data, dest, BLCKSZ, md_encryption_tweak, false);
+}
+
 /*
  * Sync a file to disk, given a file tag.  Write the path into an output
  * buffer so the caller can use it in error messages.
diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h
index 525cc6203e..21c18bb6c3 100644
--- a/src/include/storage/copydir.h
+++ b/src/include/storage/copydir.h
@@ -13,7 +13,9 @@
 #ifndef COPYDIR_H
 #define COPYDIR_H
 
-extern void copydir(char *fromdir, char *todir, bool recurse);
-extern void copy_file(char *fromfile, char *tofile);
+#include "storage/relfilenode.h"
+
+extern void copydir(char *fromdir, char *todir, RelFileNode *fromNode, RelFileNode *toNode);
+extern void copy_file(char *fromfile, char *tofile, RelFileNode *fromNode, RelFileNode *toNode, ForkNumber fromForkNum, ForkNumber toForkNum, int segment);
 
 #endif							/* COPYDIR_H */
diff --git a/src/include/storage/encryption.h b/src/include/storage/encryption.h
index c6ad9b43bd..eab8b50e98 100644
--- a/src/include/storage/encryption.h
+++ b/src/include/storage/encryption.h
@@ -117,8 +117,15 @@ extern char encryption_verification[];
 extern bool	encryption_setup_done;
 
 /*
- * XLOG encryption/decryption buffer. This buffer spans multiple pages, in
- * order to reduce the number of syscalls when doing I/O.
+ * In some cases we need a separate copy of the data because encryption
+ * in-place (typically in the shared buffers) would make the data unusable for
+ * backends.
+ */
+extern PGAlignedBlock encrypt_buf;
+
+/*
+ * The same for XLOG. This buffer spans multiple pages, in order to reduce the
+ * number of syscalls when doing I/O.
  *
  * XXX Fine tune the buffer size.
  */
@@ -144,5 +151,16 @@ extern void encryption_error(bool fatal, char *message);
 
 extern void XLogEncryptionTweak(char *tweak, TimeLineID timeline,
 					XLogSegNo segment, uint32 offset);
+extern BlockNumber ReencryptBlock(char *buffer, int blocks,
+			   RelFileNode *srcNode, RelFileNode *dstNode,
+			   ForkNumber srcForkNum, ForkNumber dstForkNum,
+			   BlockNumber blockNum);
+extern void mdtweak(char *tweak, RelFileNode *relnode, ForkNumber forknum,
+		BlockNumber blocknum);
+
+#ifndef FRONTEND
+extern bool EnforceLSNUpdateForEncryption(char	*buf_contents);
+extern void RestoreInvalidLSN(char	*buf_contents);
+#endif	/* FRONTEND */
 
 #endif							/* ENCRYPTION_H */
diff --git a/src/include/storage/reinit.h b/src/include/storage/reinit.h
index 63d0b56d48..0e028cac4d 100644
--- a/src/include/storage/reinit.h
+++ b/src/include/storage/reinit.h
@@ -20,7 +20,8 @@
 
 extern void ResetUnloggedRelations(int op);
 extern bool parse_filename_for_nontemp_relation(const char *name,
-												int *oidchars, ForkNumber *fork);
+									int *oidchars, ForkNumber *fork,
+									int *segment);
 
 #define UNLOGGED_RELATION_CLEANUP		0x0001
 #define UNLOGGED_RELATION_INIT			0x0002
-- 
2.13.7

