diff --git a/contrib/test_decoding/expected/toast.out b/contrib/test_decoding/expected/toast.out
index 735b14c..389748f 100644
--- a/contrib/test_decoding/expected/toast.out
+++ b/contrib/test_decoding/expected/toast.out
@@ -292,6 +292,64 @@ SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot',
  COMMIT
 (235 rows)
 
+-- test we can decode "old" tuples bigger than the max heap tuple size correctly
+DROP TABLE IF EXISTS toasted_several;
+NOTICE:  table "toasted_several" does not exist, skipping
+CREATE TABLE toasted_several (
+    id serial unique not null,
+    toasted_key text primary key,
+    toasted_col1 text,
+    toasted_col2 text
+);
+ALTER TABLE toasted_several REPLICA IDENTITY FULL;
+ALTER TABLE toasted_several ALTER COLUMN toasted_key SET STORAGE EXTERNAL;
+ALTER TABLE toasted_several ALTER COLUMN toasted_col1 SET STORAGE EXTERNAL;
+ALTER TABLE toasted_several ALTER COLUMN toasted_col2 SET STORAGE EXTERNAL;
+INSERT INTO toasted_several(toasted_key) VALUES(repeat('9876543210', 2000));
+SELECT regexp_replace(data, '^(.{100}).*(.{100})$', '\1..\2') FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+                                                                                               regexp_replace                                                                                               
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ table public.toasted_several: INSERT: id[integer]:1 toasted_key[text]:'98765432109876543210987654321..098765432109876543210987654321098765432109876543210' toasted_col1[text]:null toasted_col2[text]:null
+ COMMIT
+(3 rows)
+
+-- test update of a toasted key without changing it
+UPDATE toasted_several SET toasted_col1 = toasted_key;
+UPDATE toasted_several SET toasted_col2 = toasted_col1;
+SELECT regexp_replace(data, '^(.{100}).*(.{100})$', '\1..\2') FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+                                                                                               regexp_replace                                                                                               
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ table public.toasted_several: INSERT: id[integer]:1 toasted_key[text]:'98765432109876543210987654321..098765432109876543210987654321098765432109876543210' toasted_col1[text]:null toasted_col2[text]:null
+ COMMIT
+ BEGIN
+ table public.toasted_several: UPDATE: old-key: id[integer]:1 toasted_key[text]:'98765432109876543210..432109876543210987654321098765432109876543210987654321098765432109876543210' toasted_col2[text]:null
+ COMMIT
+ BEGIN
+ table public.toasted_several: UPDATE: old-key: id[integer]:1 toasted_key[text]:'98765432109876543210..876543210987654321098765432109876543210987654321098765432109876543210987654321098765432109876543210'
+ COMMIT
+(9 rows)
+
+/*
+ * update with large tuplebuf, in a transaction large enough to force to spool to disk
+ */
+BEGIN;
+INSERT INTO toasted_several(toasted_key) SELECT * FROM generate_series(1, 10234);
+UPDATE toasted_several SET toasted_col1 = toasted_col2 WHERE id = 1;
+DELETE FROM toasted_several WHERE id = 1;
+COMMIT;
+DROP TABLE toasted_several;
+SELECT regexp_replace(data, '^(.{100}).*(.{100})$', '\1..\2') FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1')
+WHERE data NOT LIKE '%INSERT: %';
+                                                                                               regexp_replace                                                                                               
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ table public.toasted_several: UPDATE: old-key: id[integer]:1 toasted_key[text]:'98765432109876543210..7654321098765432109876543210987654321098765432109876543210' toasted_col2[text]:unchanged-toast-datum
+ table public.toasted_several: DELETE: id[integer]:1 toasted_key[text]:'98765432109876543210987654321..876543210987654321098765432109876543210987654321098765432109876543210987654321098765432109876543210'
+ COMMIT
+(4 rows)
+
 SELECT pg_drop_replication_slot('regression_slot');
  pg_drop_replication_slot 
 --------------------------
diff --git a/contrib/test_decoding/sql/toast.sql b/contrib/test_decoding/sql/toast.sql
index 26d6b4f..dcb74e3 100644
--- a/contrib/test_decoding/sql/toast.sql
+++ b/contrib/test_decoding/sql/toast.sql
@@ -265,4 +265,41 @@ ALTER TABLE toasted_copy ALTER COLUMN data SET STORAGE EXTERNAL;
 203	untoasted200
 \.
 SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+
+-- test we can decode "old" tuples bigger than the max heap tuple size correctly
+DROP TABLE IF EXISTS toasted_several;
+CREATE TABLE toasted_several (
+    id serial unique not null,
+    toasted_key text primary key,
+    toasted_col1 text,
+    toasted_col2 text
+);
+ALTER TABLE toasted_several REPLICA IDENTITY FULL;
+ALTER TABLE toasted_several ALTER COLUMN toasted_key SET STORAGE EXTERNAL;
+ALTER TABLE toasted_several ALTER COLUMN toasted_col1 SET STORAGE EXTERNAL;
+ALTER TABLE toasted_several ALTER COLUMN toasted_col2 SET STORAGE EXTERNAL;
+
+INSERT INTO toasted_several(toasted_key) VALUES(repeat('9876543210', 2000));
+
+SELECT regexp_replace(data, '^(.{100}).*(.{100})$', '\1..\2') FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+
+-- test update of a toasted key without changing it
+UPDATE toasted_several SET toasted_col1 = toasted_key;
+UPDATE toasted_several SET toasted_col2 = toasted_col1;
+
+SELECT regexp_replace(data, '^(.{100}).*(.{100})$', '\1..\2') FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+
+/*
+ * update with large tuplebuf, in a transaction large enough to force to spool to disk
+ */
+BEGIN;
+INSERT INTO toasted_several(toasted_key) SELECT * FROM generate_series(1, 10234);
+UPDATE toasted_several SET toasted_col1 = toasted_col2 WHERE id = 1;
+DELETE FROM toasted_several WHERE id = 1;
+COMMIT;
+
+DROP TABLE toasted_several;
+
+SELECT regexp_replace(data, '^(.{100}).*(.{100})$', '\1..\2') FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1')
+WHERE data NOT LIKE '%INSERT: %';
 SELECT pg_drop_replication_slot('regression_slot');
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 56be1ed..c62e453 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -78,6 +78,14 @@ static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tup);
  * Take every XLogReadRecord()ed record and perform the actions required to
  * decode it using the output plugin already setup in the logical decoding
  * context.
+ *
+ * NB: Note that every record's xid needs to be processed by reorderbuffer
+ * (xids contained in the content of records are not relevant for this rule).
+ * That means that for records which'd otherwise not go through the
+ * reorderbuffer ReorderBufferProcessXid() has to be called. We don't want to
+ * call ReorderBufferProcessXid for each record type by default, because
+ * e.g. empty xacts can be handled more efficiently if there's no previous
+ * state for them.
  */
 void
 LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *record)
@@ -135,6 +143,9 @@ LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *recor
 		case RM_BRIN_ID:
 		case RM_COMMIT_TS_ID:
 		case RM_REPLORIGIN_ID:
+			/* just deal with xid, and done */
+			ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(record),
+									buf.origptr);
 			break;
 		case RM_NEXT_ID:
 			elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) XLogRecGetRmid(buf.record));
@@ -150,6 +161,9 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	SnapBuild  *builder = ctx->snapshot_builder;
 	uint8		info = XLogRecGetInfo(buf->record) & ~XLR_INFO_MASK;
 
+	ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(buf->record),
+							buf->origptr);
+
 	switch (info)
 	{
 			/* this is also used in END_OF_RECOVERY checkpoints */
@@ -260,6 +274,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			 * transactions in the changestream allowing for a kind of
 			 * distributed 2PC.
 			 */
+			ReorderBufferProcessXid(reorder, XLogRecGetXid(r), buf->origptr);
 			break;
 		default:
 			elog(ERROR, "unexpected RM_XACT_ID record type: %u", info);
@@ -276,6 +291,8 @@ DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	XLogReaderState *r = buf->record;
 	uint8		info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
 
+	ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(r), buf->origptr);
+
 	switch (info)
 	{
 		case XLOG_RUNNING_XACTS:
@@ -313,6 +330,8 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	TransactionId xid = XLogRecGetXid(buf->record);
 	SnapBuild  *builder = ctx->snapshot_builder;
 
+	ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
+
 	/* no point in doing anything yet */
 	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
 		return;
@@ -366,6 +385,8 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	TransactionId xid = XLogRecGetXid(buf->record);
 	SnapBuild  *builder = ctx->snapshot_builder;
 
+	ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
+
 	/* no point in doing anything yet */
 	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
 		return;
@@ -584,7 +605,8 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 		Size		tuplelen;
 		char	   *tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
 
-		change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+		change->data.tp.newtuple =
+			ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
 
 		DecodeXLogTuple(tupledata, tuplelen, change->data.tp.newtuple);
 	}
@@ -630,7 +652,8 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	{
 		data = XLogRecGetBlockData(r, 0, &datalen);
 
-		change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+		change->data.tp.newtuple =
+			ReorderBufferGetTupleBuf(ctx->reorder, datalen);
 
 		DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
 	}
@@ -641,7 +664,8 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 		data = XLogRecGetData(r) + SizeOfHeapUpdate;
 		datalen = XLogRecGetDataLen(r) - SizeOfHeapUpdate;
 
-		change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+		change->data.tp.oldtuple =
+			ReorderBufferGetTupleBuf(ctx->reorder, datalen);
 
 		DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
 	}
@@ -691,13 +715,15 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	/* old primary key stored */
 	if (xlrec->flags & XLH_DELETE_CONTAINS_OLD)
 	{
+		Size	len = XLogRecGetDataLen(r) - SizeOfHeapDelete;
+
 		Assert(XLogRecGetDataLen(r) > (SizeOfHeapDelete + SizeOfHeapHeader));
 
-		change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+		change->data.tp.oldtuple =
+			ReorderBufferGetTupleBuf(ctx->reorder, len);
 
 		DecodeXLogTuple((char *) xlrec + SizeOfHeapDelete,
-						XLogRecGetDataLen(r) - SizeOfHeapDelete,
-						change->data.tp.oldtuple);
+						len, change->data.tp.oldtuple);
 	}
 
 	change->data.tp.clear_toast_afterwards = true;
@@ -757,35 +783,36 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 		 */
 		if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE)
 		{
-			change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+			xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data);
+			data = ((char *) xlhdr) + SizeOfMultiInsertTuple;
+			datalen = xlhdr->datalen;
+
+			change->data.tp.newtuple =
+				ReorderBufferGetTupleBuf(ctx->reorder, datalen);
 
 			tuple = change->data.tp.newtuple;
 
 			/* not a disk based tuple */
 			ItemPointerSetInvalid(&tuple->tuple.t_self);
 
-			xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data);
-			data = ((char *) xlhdr) + SizeOfMultiInsertTuple;
-			datalen = xlhdr->datalen;
-
 			/*
 			 * We can only figure this out after reassembling the
 			 * transactions.
 			 */
 			tuple->tuple.t_tableOid = InvalidOid;
-			tuple->tuple.t_data = &tuple->t_data.header;
+			tuple->tuple.t_data = &tuple->header;
 			tuple->tuple.t_len = datalen + SizeofHeapTupleHeader;
 
-			memset(&tuple->t_data.header, 0, SizeofHeapTupleHeader);
+			memset(&tuple->header, 0, SizeofHeapTupleHeader);
 
-			memcpy((char *) &tuple->t_data.header + SizeofHeapTupleHeader,
+			memcpy((char *) &tuple->header + SizeofHeapTupleHeader,
 				   (char *) data,
 				   datalen);
 			data += datalen;
 
-			tuple->t_data.header.t_infomask = xlhdr->t_infomask;
-			tuple->t_data.header.t_infomask2 = xlhdr->t_infomask2;
-			tuple->t_data.header.t_hoff = xlhdr->t_hoff;
+			tuple->header.t_infomask = xlhdr->t_infomask;
+			tuple->header.t_infomask2 = xlhdr->t_infomask2;
+			tuple->header.t_hoff = xlhdr->t_hoff;
 		}
 
 		/*
@@ -853,7 +880,6 @@ DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
 	int			datalen = len - SizeOfHeapHeader;
 
 	Assert(datalen >= 0);
-	Assert(datalen <= MaxHeapTupleSize);
 
 	tuple->tuple.t_len = datalen + SizeofHeapTupleHeader;
 
@@ -862,20 +888,20 @@ DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
 
 	/* we can only figure this out after reassembling the transactions */
 	tuple->tuple.t_tableOid = InvalidOid;
-	tuple->tuple.t_data = &tuple->t_data.header;
+	tuple->tuple.t_data = &tuple->header;
 
 	/* data is not stored aligned, copy to aligned storage */
 	memcpy((char *) &xlhdr,
 		   data,
 		   SizeOfHeapHeader);
 
-	memset(&tuple->t_data.header, 0, SizeofHeapTupleHeader);
+	memset(&tuple->header, 0, SizeofHeapTupleHeader);
 
-	memcpy((char *) &tuple->t_data.header + SizeofHeapTupleHeader,
+	memcpy((char *) &tuple->header + SizeofHeapTupleHeader,
 		   data + SizeOfHeapHeader,
 		   datalen);
 
-	tuple->t_data.header.t_infomask = xlhdr.t_infomask;
-	tuple->t_data.header.t_infomask2 = xlhdr.t_infomask2;
-	tuple->t_data.header.t_hoff = xlhdr.t_hoff;
+	tuple->header.t_infomask = xlhdr.t_infomask;
+	tuple->header.t_infomask2 = xlhdr.t_infomask2;
+	tuple->header.t_hoff = xlhdr.t_hoff;
 }
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 78acced..8d7c2f6 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -444,13 +444,31 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change)
 
 
 /*
- * Get an unused, possibly preallocated, ReorderBufferTupleBuf
+ * Get an unused, possibly preallocated, ReorderBufferTupleBuf fitting at
+ * least a tuple of size tuplelen.
  */
 ReorderBufferTupleBuf *
-ReorderBufferGetTupleBuf(ReorderBuffer *rb)
+ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuplelen)
 {
 	ReorderBufferTupleBuf *tuple;
 
+	tuplelen += SizeofHeapTupleHeader;
+
+	/*
+	 * Most tuples are below MaxHeapTupleSize, but the ones generated for
+	 * oldtuples can be bigger, as they don't have out-of-line toast
+	 * columns. These we allocate individually.
+	 */
+	if (tuplelen > MaxHeapTupleSize)
+	{
+		tuple = (ReorderBufferTupleBuf *)
+			MemoryContextAlloc(rb->context,
+							   SizeofReorderBufferTupleBuf
+							   + tuplelen);
+		tuple->alloc_tuple_size = tuplelen;
+		return tuple;
+	}
+
 	/* check the slab cache */
 	if (rb->nr_cached_tuplebufs)
 	{
@@ -458,13 +476,17 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb)
 		tuple = slist_container(ReorderBufferTupleBuf, node,
 								slist_pop_head_node(&rb->cached_tuplebufs));
 #ifdef USE_ASSERT_CHECKING
-		memset(tuple, 0xa9, sizeof(ReorderBufferTupleBuf));
+		memset(&tuple->tuple, 0xa9, sizeof(HeapTupleData));
+		memset(&tuple->header, 0xa9, MaxHeapTupleSize);
 #endif
 	}
 	else
 	{
 		tuple = (ReorderBufferTupleBuf *)
-			MemoryContextAlloc(rb->context, sizeof(ReorderBufferTupleBuf));
+			MemoryContextAlloc(rb->context,
+							   SizeofReorderBufferTupleBuf
+							   + MaxHeapTupleSize);
+		tuple->alloc_tuple_size = MaxHeapTupleSize;
 	}
 
 	return tuple;
@@ -479,13 +501,22 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb)
 void
 ReorderBufferReturnTupleBuf(ReorderBuffer *rb, ReorderBufferTupleBuf *tuple)
 {
+	/* never use the slab cache for oversized tuples */
+	if (tuple->alloc_tuple_size > MaxHeapTupleSize)
+	{
+		pfree(tuple);
+		return;
+	}
+
 	/* check whether to put into the slab cache */
 	if (rb->nr_cached_tuplebufs < max_cached_tuplebufs)
 	{
 		rb->nr_cached_tuplebufs++;
 		slist_push_head(&rb->cached_tuplebufs, &tuple->node);
-		VALGRIND_MAKE_MEM_UNDEFINED(tuple, sizeof(ReorderBufferTupleBuf));
+		VALGRIND_MAKE_MEM_UNDEFINED(&tuple->header, tuple->alloc_tuple_size);
+		VALGRIND_MAKE_MEM_UNDEFINED(&tuple, SizeofReorderBufferTupleBuf);
 		VALGRIND_MAKE_MEM_DEFINED(&tuple->node, sizeof(tuple->node));
+		VALGRIND_MAKE_MEM_DEFINED(&tuple->alloc_tuple_size, sizeof(tuple->alloc_tuple_size));
 	}
 	else
 	{
@@ -1739,18 +1770,15 @@ ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 	ReorderBufferCleanupTXN(rb, txn);
 }
 
-
 /*
- * Check whether a transaction is already known in this module.xs
+ *
  */
-bool
-ReorderBufferIsXidKnown(ReorderBuffer *rb, TransactionId xid)
+void
+ReorderBufferProcessXid(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 {
-	ReorderBufferTXN *txn;
-
-	txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
-								false);
-	return txn != NULL;
+	/* many records won't have an xid assigned, centralize check here */
+	if (xid != InvalidTransactionId)
+		ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
 }
 
 /*
@@ -2087,11 +2115,11 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 				newtup = change->data.tp.newtuple;
 
 				if (oldtup)
-					oldlen = offsetof(ReorderBufferTupleBuf, t_data) +
+					oldlen = SizeofReorderBufferTupleBuf +
 						oldtup->tuple.t_len;
 
 				if (newtup)
-					newlen = offsetof(ReorderBufferTupleBuf, t_data) +
+					newlen = SizeofReorderBufferTupleBuf +
 						newtup->tuple.t_len;
 
 				sz += oldlen;
@@ -2330,29 +2358,34 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 		case REORDER_BUFFER_CHANGE_UPDATE:
 		case REORDER_BUFFER_CHANGE_DELETE:
 		case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
-			if (change->data.tp.newtuple)
-			{
-				Size		len = offsetof(ReorderBufferTupleBuf, t_data) +
-				((ReorderBufferTupleBuf *) data)->tuple.t_len;
-
-				change->data.tp.newtuple = ReorderBufferGetTupleBuf(rb);
-				memcpy(change->data.tp.newtuple, data, len);
-				change->data.tp.newtuple->tuple.t_data =
-					&change->data.tp.newtuple->t_data.header;
-				data += len;
-			}
-
 			if (change->data.tp.oldtuple)
 			{
-				Size		len = offsetof(ReorderBufferTupleBuf, t_data) +
-				((ReorderBufferTupleBuf *) data)->tuple.t_len;
+				Size		tuplelen =
+					((ReorderBufferTupleBuf *) data)->tuple.t_len;
+				Size		len = SizeofReorderBufferTupleBuf + tuplelen;
 
-				change->data.tp.oldtuple = ReorderBufferGetTupleBuf(rb);
+				change->data.tp.oldtuple =
+					ReorderBufferGetTupleBuf(rb, tuplelen - SizeofHeapTupleHeader);
 				memcpy(change->data.tp.oldtuple, data, len);
 				change->data.tp.oldtuple->tuple.t_data =
-					&change->data.tp.oldtuple->t_data.header;
+					&change->data.tp.oldtuple->header;
 				data += len;
 			}
+
+			if (change->data.tp.newtuple)
+			{
+				Size		tuplelen =
+					((ReorderBufferTupleBuf *) data)->tuple.t_len;
+				Size		len = SizeofReorderBufferTupleBuf + tuplelen;
+
+				change->data.tp.newtuple =
+					ReorderBufferGetTupleBuf(rb, tuplelen - SizeofHeapTupleHeader);
+				memcpy(change->data.tp.newtuple, data, len);
+				change->data.tp.newtuple->tuple.t_data =
+					&change->data.tp.newtuple->header;
+				data += len;
+			}
+
 			break;
 		case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
 			{
@@ -2729,7 +2762,7 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
 	 */
 	tmphtup = heap_form_tuple(desc, attrs, isnull);
 	Assert(newtup->tuple.t_len <= MaxHeapTupleSize);
-	Assert(&newtup->t_data.header == newtup->tuple.t_data);
+	Assert(&newtup->header == newtup->tuple.t_data);
 
 	memcpy(newtup->tuple.t_data, tmphtup->t_data, tmphtup->t_len);
 	newtup->tuple.t_len = tmphtup->t_len;
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index ed823ec..179b85a 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -635,8 +635,6 @@ SnapBuildClearExportedSnapshot(void)
 bool
 SnapBuildProcessChange(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn)
 {
-	bool		is_old_tx;
-
 	/*
 	 * We can't handle data in transactions if we haven't built a snapshot
 	 * yet, so don't store them.
@@ -657,9 +655,7 @@ SnapBuildProcessChange(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn)
 	 * If the reorderbuffer doesn't yet have a snapshot, add one now, it will
 	 * be needed to decode the change we're currently processing.
 	 */
-	is_old_tx = ReorderBufferIsXidKnown(builder->reorder, xid);
-
-	if (!is_old_tx || !ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
+	if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
 	{
 		/* only build a new snapshot if we don't have a prebuilt one */
 		if (builder->snapshot == NULL)
diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h
index d33ea27..f7e4c51 100644
--- a/src/include/replication/reorderbuffer.h
+++ b/src/include/replication/reorderbuffer.h
@@ -23,16 +23,18 @@ typedef struct ReorderBufferTupleBuf
 	/* position in preallocated list */
 	slist_node	node;
 
+	/* pre-allocated size of tuple buffer, different from tuple size */
+	Size	alloc_tuple_size;
+
 	/* tuple, stored sequentially */
 	HeapTupleData tuple;
-	union
-	{
-		HeapTupleHeaderData header;
-		char		data[MaxHeapTupleSize];
-		double		align_it;	/* ensure t_data is MAXALIGN'd */
-	}			t_data;
+	HeapTupleHeaderData header;
+
+	/* actual tuple data follows */
 } ReorderBufferTupleBuf;
 
+#define SizeofReorderBufferTupleBuf offsetof(ReorderBufferTupleBuf, header)
+
 /*
  * Types of the change passed to a 'change' callback.
  *
@@ -341,7 +343,7 @@ struct ReorderBuffer
 ReorderBuffer *ReorderBufferAllocate(void);
 void		ReorderBufferFree(ReorderBuffer *);
 
-ReorderBufferTupleBuf *ReorderBufferGetTupleBuf(ReorderBuffer *);
+ReorderBufferTupleBuf *ReorderBufferGetTupleBuf(ReorderBuffer *, Size len);
 void		ReorderBufferReturnTupleBuf(ReorderBuffer *, ReorderBufferTupleBuf *tuple);
 ReorderBufferChange *ReorderBufferGetChange(ReorderBuffer *);
 void		ReorderBufferReturnChange(ReorderBuffer *, ReorderBufferChange *);
@@ -366,7 +368,7 @@ void ReorderBufferAddNewTupleCids(ReorderBuffer *, TransactionId, XLogRecPtr lsn
 						 CommandId cmin, CommandId cmax, CommandId combocid);
 void ReorderBufferAddInvalidations(ReorderBuffer *, TransactionId, XLogRecPtr lsn,
 							  Size nmsgs, SharedInvalidationMessage *msgs);
-bool		ReorderBufferIsXidKnown(ReorderBuffer *, TransactionId xid);
+void		ReorderBufferProcessXid(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn);
 void		ReorderBufferXidSetCatalogChanges(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn);
 bool		ReorderBufferXidHasCatalogChanges(ReorderBuffer *, TransactionId xid);
 bool		ReorderBufferXidHasBaseSnapshot(ReorderBuffer *, TransactionId xid);
