From 958130099bc76534031acc4d3f2e94ccbcfa5451 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 15 Aug 2023 15:09:28 +0900
Subject: [PATCH] Add test to emulate random garbage data during WAL replay,
 doing OOMs

This requires an extra flush wait at the end of LogLogicalMessage() to
make the test stable.
---
 src/backend/replication/logical/message.c |  7 ++-
 src/test/recovery/t/038_wal_invalid.pl    | 69 +++++++++++++++++++++++
 2 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 src/test/recovery/t/038_wal_invalid.pl

diff --git a/src/backend/replication/logical/message.c b/src/backend/replication/logical/message.c
index c5de14afc6..cebe6c5d05 100644
--- a/src/backend/replication/logical/message.c
+++ b/src/backend/replication/logical/message.c
@@ -47,6 +47,7 @@ LogLogicalMessage(const char *prefix, const char *message, size_t size,
 				  bool transactional)
 {
 	xl_logical_message xlrec;
+	XLogRecPtr	lsn;
 
 	/*
 	 * Force xid to be allocated if we're emitting a transactional message.
@@ -71,7 +72,11 @@ LogLogicalMessage(const char *prefix, const char *message, size_t size,
 	/* allow origin filtering */
 	XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
 
-	return XLogInsert(RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE);
+	lsn = XLogInsert(RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE);
+
+	/* Make sure that the message hits disk before leaving */
+	XLogFlush(lsn);
+	return lsn;
 }
 
 /*
diff --git a/src/test/recovery/t/038_wal_invalid.pl b/src/test/recovery/t/038_wal_invalid.pl
new file mode 100644
index 0000000000..87a6e93734
--- /dev/null
+++ b/src/test/recovery/t/038_wal_invalid.pl
@@ -0,0 +1,69 @@
+
+# Copyright (c) 2021-2023, PostgreSQL Global Development Group
+
+# Test for code paths involving detection of incorrect WAL records.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize and start node with wal_level = minimal and some
+# configuration aimed at minimizing the WAL activity of the cluster.
+my $node = PostgreSQL::Test::Cluster->new('primary');
+$node->init;
+$node->append_conf('postgresql.conf', q[
+wal_level = minimal
+autovacuum = off
+checkpoint_timeout = '30min'
+]);
+$node->start;
+
+# Generate two WAL records that will be used for the follow-up test,
+# using pg_logical_emit_message.  This function returns the end LSN
+# of the record inserted, and the LSN of the first record is what
+# we use to invalidate the beginning of the second record.  This
+# emulates some historical patterns where a record is read on a
+# page read from a recycled segment at the end of WAL.  The second
+# record could be anything, but pg_logical_emit_message() does not
+# impact the cluster consistency.
+my $result = $node->safe_psql('postgres',
+  q{SELECT * FROM pg_walfile_name_offset(pg_logical_emit_message(false, 'a', 'a'))});
+my ($wal_segment_name, $lsn_offset) = split /\|/, $result;
+$lsn_offset = int($lsn_offset);
+
+$node->safe_psql('postgres',
+  q{SELECT pg_logical_emit_message(false, 'a', 'a')});
+
+# Shut down the cluster, to replay the previous records.
+$node->stop('immediate');
+
+# Add 4 bytes of garbage at the beginning of the second record, to
+# emulate the case of a recycled page.  The previous offset points
+# at the beginning of the second record's xl_tot_len.
+my $pgdata = $node->data_dir;
+
+my $fh;
+open($fh, '+<', "$pgdata/pg_wal/$wal_segment_name")
+    or BAIL_OUT("open failed: $!");
+sysseek($fh, $lsn_offset, 0)
+    or BAIL_OUT("sysseek failed: $!");
+syswrite($fh, pack("L", 0xFFFFFFFF)) or BAIL_OUT("syswrite failed: $!");
+close($fh)
+    or BAIL_OUT("close failed: $!");
+
+# The node should be able to start.
+my $log_location = -s $node->logfile;
+$node->start;
+$result = $node->safe_psql('postgres', q(SELECT 1));
+is($result, "1", "node is up and running");
+
+# Confirm thatrecovery has finished with the expected error
+my $logfile = slurp_file($node->logfile, $log_location);
+ok( $logfile =~
+	  qr/LOG: .* invalid magic number 0000 in WAL segment $wal_segment_name/,
+	"recovery completed up to the invalid record"
+);
+
+done_testing();
-- 
2.40.1

