From 6e6d5345d45092bc0acc5ca31c7d7d663fe2ccad Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 1 Apr 2025 20:46:24 -0500 Subject: [PATCH v12n4 2/3] pg_dump: Reduce memory usage of dumps with statistics. Right now, pg_dump stores all generated commands for statistics in memory. These commands can be quite large and therefore can significantly increase pg_dump's memory footprint. To fix, wait until we are about to write out the commands before generating them, and be sure to free the commands after writing. This is implemented via a new defnDumper callback that works much like the dataDumper one but is specially designed for TOC entries. Custom dumps that include data might write the TOC twice (to update data offset information), which would ordinarily cause pg_dump to run the attribute statistics queries twice. However, as a hack, we save the length of the written-out entry in the first pass, and we skip over it in the second. While there is no known technical problem with executing the queries multiple times and rewriting the results, it's expensive and feels risky, so it seems prudent to avoid it. Author: Corey Huinker Reviewed-by: Jeff Davis Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com --- src/bin/pg_dump/pg_backup.h | 1 + src/bin/pg_dump/pg_backup_archiver.c | 44 ++++++++++++++++++++++++++-- src/bin/pg_dump/pg_backup_archiver.h | 6 ++++ src/bin/pg_dump/pg_dump.c | 44 ++++++++++++++++++++-------- 4 files changed, 81 insertions(+), 14 deletions(-) diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 658986de6f8..781f8fa1cc9 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -285,6 +285,7 @@ typedef int DumpId; * Function pointer prototypes for assorted callback methods. */ +typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg); typedef int (*DataDumperPtr) (Archive *AH, const void *userArg); typedef void (*SetupWorkerPtrType) (Archive *AH); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 1d131e5a57d..334b5dedfd7 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -1266,6 +1266,9 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId, newToc->dataDumperArg = opts->dumpArg; newToc->hadDumper = opts->dumpFn ? true : false; + newToc->defnDumper = opts->defnFn; + newToc->defnDumperArg = opts->defnArg; + newToc->formatData = NULL; newToc->dataLength = 0; @@ -2621,7 +2624,33 @@ WriteToc(ArchiveHandle *AH) WriteStr(AH, te->tag); WriteStr(AH, te->desc); WriteInt(AH, te->section); - WriteStr(AH, te->defn); + + if (te->defnLen) + { + /* + * We only set defnLen when a definition is generated by the + * defnDumper during WriteToc(), so this must be a second + * WriteToc() pass. The defnDumper might execute queries, and + * while running the same queries twice should in theory work + * fine, it's expensive and feels risky. So, we just seek through + * those entries. Presently, the only time we do a second + * WriteToc() pass is for custom-format dumps when we've already + * verified fseeko() works, so we can use it without checking. + * We'll need to figure out something else if this changes. + */ + if (fseeko(AH->FH, te->defnLen, SEEK_CUR) != 0) + pg_fatal("error during file seek: %m"); + } + else if (te->defnDumper) + { + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg); + + te->defnLen = WriteStr(AH, defn); + pg_free(defn); + } + else + WriteStr(AH, te->defn); + WriteStr(AH, te->dropStmt); WriteStr(AH, te->copyStmt); WriteStr(AH, te->namespace); @@ -3849,7 +3878,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) /* * Actually print the definition. Normally we can just print the defn - * string if any, but we have three special cases: + * string if any, but we have four special cases: * * 1. A crude hack for suppressing AUTHORIZATION clause that old pg_dump * versions put into CREATE SCHEMA. Don't mutate the variant for schema @@ -3862,6 +3891,10 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) * 3. ACL LARGE OBJECTS entries need special processing because they * contain only one copy of the ACL GRANT/REVOKE commands, which we must * apply to each large object listed in the associated BLOB METADATA. + * + * 4. Entries with a defnDumper need to call it to generate the + * definition. This is primarily intended to provide a way to save memory + * for objects that need a lot of it (e.g., statistics data). */ if (ropt->noOwner && strcmp(te->desc, "SCHEMA") == 0 && strncmp(te->defn, "--", 2) != 0) @@ -3877,6 +3910,13 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) { IssueACLPerBlob(AH, te); } + else if (te->defnDumper) + { + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg); + + ahprintf(AH, "%s\n\n", defn); + pg_free(defn); + } else if (te->defn && strlen(te->defn) > 0) { ahprintf(AH, "%s\n\n", te->defn); diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index a2064f471ed..b7ebc2b39cd 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -368,6 +368,10 @@ struct _tocEntry const void *dataDumperArg; /* Arg for above routine */ void *formatData; /* TOC Entry data specific to file format */ + DefnDumperPtr defnDumper; /* routine to dump definition statement */ + const void *defnDumperArg; /* arg for above routine */ + size_t defnLen; /* length of dumped definition */ + /* working state while dumping/restoring */ pgoff_t dataLength; /* item's data size; 0 if none or unknown */ int reqs; /* do we need schema and/or data of object @@ -407,6 +411,8 @@ typedef struct _archiveOpts int nDeps; DataDumperPtr dumpFn; const void *dumpArg; + DefnDumperPtr defnFn; + const void *defnArg; } ArchiveOpts; #define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__} /* Called to add a TOC entry */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 4ca34be230c..46fb70e0a8b 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -10560,13 +10560,16 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, } /* - * dumpRelationStats -- + * dumpRelationStats_dumper -- * - * Dump command to import stats into the relation on the new database. + * Generate command to import stats into the relation on the new database. + * This routine is called by the Archiver when it wants the statistics to be + * dumped. */ -static void -dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) +static char * +dumpRelationStats_dumper(Archive *fout, const void *userArg) { + const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg; const DumpableObject *dobj = &rsinfo->dobj; PGresult *res; PQExpBuffer query; @@ -10586,10 +10589,7 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) int i_range_length_histogram; int i_range_empty_frac; int i_range_bounds_histogram; - - /* nothing to do if we are not dumping statistics */ - if (!fout->dopt->dumpStatistics) - return; + char *ret; query = createPQExpBuffer(); if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) @@ -10770,17 +10770,37 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) PQclear(res); + destroyPQExpBuffer(query); + ret = out->data; + pg_free(out); + return ret; +} + +/* + * dumpRelationStats -- + * + * Make an ArchiveEntry for the relation statistics. The Archiver will take + * care of gathering the statistics and generating the restore commands when + * they are needed. + */ +static void +dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) +{ + const DumpableObject *dobj = &rsinfo->dobj; + + /* nothing to do if we are not dumping statistics */ + if (!fout->dopt->dumpStatistics) + return; + ArchiveEntry(fout, nilCatalogId, createDumpId(), ARCHIVE_OPTS(.tag = dobj->name, .namespace = dobj->namespace->dobj.name, .description = "STATISTICS DATA", .section = rsinfo->section, - .createStmt = out->data, + .defnFn = dumpRelationStats_dumper, + .defnArg = rsinfo, .deps = dobj->dependencies, .nDeps = dobj->nDeps)); - - destroyPQExpBuffer(out); - destroyPQExpBuffer(query); } /* -- 2.39.5 (Apple Git-154)