From c3aeed33b351475896c2b4002cff125f1edeeb42 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 14 Jun 2024 16:25:04 -0500 Subject: [PATCH v3 1/1] Use CREATE DATABASE ... STRATEGY = FILE_COPY in pg_upgrade. While the strategy is considered very costly due to expensive checkpoints when the target system has configured large shared_buffers, the issues that those checkpoints protect against don't apply in a pg_upgrade system. By skipping the checkpoints during CREATE DATABASE ... STRATEGY=FILE_COPY and using that strategy for pg_upgrade, we can save a lot of time and IO that would otherwise be spent on reading pages from disk, generating WAL records, writing those WAL records to disk, and then writing the pages to disk - which has the potential of at least double the IO of FILE_COPY. Co-authored-by: Matthias van de Meent Reviewed-by: Ranier Vilela, Dilip Kumar Discussion: https://postgr.es/m/Zl9ta3FtgdjizkJ5%40nathan --- src/backend/commands/dbcommands.c | 18 +++++++++++++++--- src/bin/pg_dump/pg_dump.c | 9 ++++++++- src/bin/pg_upgrade/pg_upgrade.c | 12 ++++++++++++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index be629ea92c..99d7a9ba3c 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -563,9 +563,14 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid, * happened while we're copying files, a file might be deleted just when * we're about to copy it, causing the lstat() call in copydir() to fail * with ENOENT. + * + * In binary upgrade mode, we can skip this checkpoint because we are + * careful to ensure that template0 is fully written to disk prior to any + * CREATE DATABASE commands. */ - RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | - CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL); + if (!IsBinaryUpgrade) + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | + CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL); /* * Iterate through all tablespaces of the template database, and copy each @@ -657,10 +662,17 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid, * seem to be much we can do about that except document it as a * limitation. * + * In binary upgrade mode, we can skip this checkpoint because neither of + * these problems applies: we don't ever replay the WAL generated during + * pg_upgrade, and we don't concurrently modify template0 (not to mention + * that trying to take a backup during pg_upgrade is pointless). + * * See CreateDatabaseUsingWalLog() for a less cheesy CREATE DATABASE * strategy that avoids these problems. */ - RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); + if (!IsBinaryUpgrade) + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | + CHECKPOINT_WAIT); } /* diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e324070828..1e51a63442 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -3123,10 +3123,17 @@ dumpDatabase(Archive *fout) * since those can't be altered later. Other DB properties are left to * the DATABASE PROPERTIES entry, so that they can be applied after * reconnecting to the target DB. + * + * For binary upgrade, we use the FILE_COPY strategy because testing has + * shown it to be faster. When the server is in binary upgrade mode, it + * will also skip the checkpoints normally triggered by CREATE DATABASE + * STRATEGY = FILE_COPY. */ if (dopt->binary_upgrade) { - appendPQExpBuffer(creaQry, "CREATE DATABASE %s WITH TEMPLATE = template0 OID = %u", + appendPQExpBuffer(creaQry, + "CREATE DATABASE %s WITH TEMPLATE = template0 " + "OID = %u STRATEGY = FILE_COPY", qdatname, dbCatId.oid); } else diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index af370768b6..45f095a230 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -534,9 +534,21 @@ static void create_new_objects(void) { int dbnum; + PGconn *conn_new_template1; prep_status_progress("Restoring database schemas in the new cluster"); + /* + * Ensure that any changes to template0 are fully written out to disk + * prior to restoring the databases. This is necessary because we use the + * FILE_COPY strategy to create the databases (which testing has shown to + * be faster), and we skip the checkpoints that this strategy ordinarily + * incurs. + */ + conn_new_template1 = connectToServer(&new_cluster, "template1"); + PQclear(executeQueryOrDie(conn_new_template1, "CHECKPOINT")); + PQfinish(conn_new_template1); + /* * We cannot process the template1 database concurrently with others, * because when it's transiently dropped, connection attempts would fail. -- 2.39.3 (Apple Git-146)