From 420bd1d32782a713f5ccbc60f15aa93a3a277f7a Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 2 Apr 2025 20:55:12 -0500 Subject: [PATCH v12n5 3/3] pg_dump: Retrieve attribute statistics in batches. Currently, pg_dump gathers attribute statistics with a query per relation, which can cause pg_dump to take significantly longer, especially when there are many tables. This commit improves matters by gathering attribute statistics for 64 relations at a time. Some simple testing showed this was the ideal batch size, but performance may vary depending on workload. This change increases the memory usage of pg_dump a bit, but that isn't expected to be too egregious and is arguably well worth the trade-off. Our lookahead code for determining the next batch of relations for which to gather attribute statistics is simple: we walk the TOC sequentially looking for eligible entries. However, the assumption that we will dump all such entries in this order doesn't hold up for dump formats that use RestoreArchive(). This is because RestoreArchive() does multiple passes through the TOC and selectively dumps certain entries each time. This is particularly troublesome for index stats and a subset of matview stats; both are in SECTION_POST_DATA, but matview stats that depend on matview data are dumped in RESTORE_PASS_POST_ACL, while all other statistics data is dumped in RESTORE_PASS_MAIN. To deal with this, this commit moves all statistics data entries in SECTION_POST_DATA to RESTORE_PASS_POST_ACL, which ensures that we always dump statistics data entries in TOC order. One convenient side effect of this change is that we can revert a decent chunk of commit a0a4601765. Author: Corey Huinker Co-authored-by: Nathan Bossart Reviewed-by: Jeff Davis Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com --- src/bin/pg_dump/pg_backup.h | 5 +- src/bin/pg_dump/pg_backup_archiver.c | 56 +++++----- src/bin/pg_dump/pg_dump.c | 146 +++++++++++++++++++++++---- 3 files changed, 155 insertions(+), 52 deletions(-) diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 781f8fa1cc9..9005b4253b4 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -285,7 +285,10 @@ typedef int DumpId; * Function pointer prototypes for assorted callback methods. */ -typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg); +/* forward declaration to avoid including pg_backup_archiver.h here */ +typedef struct _tocEntry TocEntry; + +typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg, const TocEntry *te); typedef int (*DataDumperPtr) (Archive *AH, const void *userArg); typedef void (*SetupWorkerPtrType) (Archive *AH); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index d7c64583242..7343867584a 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -72,7 +72,7 @@ static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te); static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te); static void processSearchPathEntry(ArchiveHandle *AH, TocEntry *te); static int _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH); -static RestorePass _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te); +static RestorePass _tocEntryRestorePass(TocEntry *te); static bool _tocEntryIsACL(TocEntry *te); static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); @@ -102,8 +102,7 @@ static void pending_list_append(TocEntry *l, TocEntry *te); static void pending_list_remove(TocEntry *te); static int TocEntrySizeCompareQsort(const void *p1, const void *p2); static int TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg); -static void move_to_ready_heap(ArchiveHandle *AH, - TocEntry *pending_list, +static void move_to_ready_heap(TocEntry *pending_list, binaryheap *ready_heap, RestorePass pass); static TocEntry *pop_next_work_item(binaryheap *ready_heap, @@ -749,7 +748,7 @@ RestoreArchive(Archive *AHX) if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) == 0) continue; /* ignore if not to be dumped at all */ - switch (_tocEntryRestorePass(AH, te)) + switch (_tocEntryRestorePass(te)) { case RESTORE_PASS_MAIN: (void) restore_toc_entry(AH, te, false); @@ -768,7 +767,7 @@ RestoreArchive(Archive *AHX) for (te = AH->toc->next; te != AH->toc; te = te->next) { if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && - _tocEntryRestorePass(AH, te) == RESTORE_PASS_ACL) + _tocEntryRestorePass(te) == RESTORE_PASS_ACL) (void) restore_toc_entry(AH, te, false); } } @@ -778,7 +777,7 @@ RestoreArchive(Archive *AHX) for (te = AH->toc->next; te != AH->toc; te = te->next) { if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && - _tocEntryRestorePass(AH, te) == RESTORE_PASS_POST_ACL) + _tocEntryRestorePass(te) == RESTORE_PASS_POST_ACL) (void) restore_toc_entry(AH, te, false); } } @@ -2650,7 +2649,7 @@ WriteToc(ArchiveHandle *AH) } else if (te->defnDumper) { - char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg); + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te); te->defnLen = WriteStr(AH, defn); pg_free(defn); @@ -3256,7 +3255,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) * See notes with the RestorePass typedef in pg_backup_archiver.h. */ static RestorePass -_tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te) +_tocEntryRestorePass(TocEntry *te) { /* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */ if (strcmp(te->desc, "ACL") == 0 || @@ -3279,23 +3278,17 @@ _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te) /* * If statistics data is dependent on materialized view data, it must be - * deferred to RESTORE_PASS_POST_ACL. + * deferred to RESTORE_PASS_POST_ACL. Those entries are marked with + * SECTION_POST_DATA already, and some other stats entries (e.g., stats + * for indexes) will also be marked SECTION_POST_DATA. Furthermore, our + * lookahead code in fetchAttributeStats() assumes we dump all statistics + * data entries in TOC order. To ensure this assumption holds, we move + * all statistics data entries in SECTION_POST_DATA to + * RESTORE_PASS_POST_ACL. */ - if (strcmp(te->desc, "STATISTICS DATA") == 0) - { - for (int i = 0; i < te->nDeps; i++) - { - DumpId depid = te->dependencies[i]; - - if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL) - { - TocEntry *otherte = AH->tocsByDumpId[depid]; - - if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0) - return RESTORE_PASS_POST_ACL; - } - } - } + if (strcmp(te->desc, "STATISTICS DATA") == 0 && + te->section == SECTION_POST_DATA) + return RESTORE_PASS_POST_ACL; /* All else can be handled in the main pass. */ return RESTORE_PASS_MAIN; @@ -3945,7 +3938,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) } else if (te->defnDumper) { - char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg); + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te); te->defnLen = ahprintf(AH, "%s\n\n", defn); pg_free(defn); @@ -4343,7 +4336,7 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list) * not set skipped_some in this case, since by assumption no main-pass * items could depend on these. */ - if (_tocEntryRestorePass(AH, next_work_item) != RESTORE_PASS_MAIN) + if (_tocEntryRestorePass(next_work_item) != RESTORE_PASS_MAIN) do_now = false; if (do_now) @@ -4425,7 +4418,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, * process in the current restore pass. */ AH->restorePass = RESTORE_PASS_MAIN; - move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass); + move_to_ready_heap(pending_list, ready_heap, AH->restorePass); /* * main parent loop @@ -4474,7 +4467,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, /* Advance to next restore pass */ AH->restorePass++; /* That probably allows some stuff to be made ready */ - move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass); + move_to_ready_heap(pending_list, ready_heap, AH->restorePass); /* Loop around to see if anything's now ready */ continue; } @@ -4645,8 +4638,7 @@ TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg) * which applies the same logic one-at-a-time.) */ static void -move_to_ready_heap(ArchiveHandle *AH, - TocEntry *pending_list, +move_to_ready_heap(TocEntry *pending_list, binaryheap *ready_heap, RestorePass pass) { @@ -4659,7 +4651,7 @@ move_to_ready_heap(ArchiveHandle *AH, next_te = te->pending_next; if (te->depCount == 0 && - _tocEntryRestorePass(AH, te) == pass) + _tocEntryRestorePass(te) == pass) { /* Remove it from pending_list ... */ pending_list_remove(te); @@ -5053,7 +5045,7 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te, * memberships changed. */ if (otherte->depCount == 0 && - _tocEntryRestorePass(AH, otherte) == AH->restorePass && + _tocEntryRestorePass(otherte) == AH->restorePass && otherte->pending_prev != NULL && ready_heap != NULL) { diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 026c8d2779c..1b8303cd0ce 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -209,6 +209,9 @@ static int nbinaryUpgradeClassOids = 0; static SequenceItem *sequences = NULL; static int nsequences = 0; +/* Maximum number of relations to fetch in a fetchAttributeStats() call. */ +#define MAX_ATTR_STATS_RELS 64 + /* * The default number of rows per INSERT when * --inserts is specified without --rows-per-insert @@ -10553,6 +10556,78 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, appendPQExpBuffer(out, "::%s", argtype); } +/* + * fetchAttributeStats -- + * + * Fetch next batch of rows for getAttributeStats(). + */ +static PGresult * +fetchAttributeStats(Archive *fout) +{ + ArchiveHandle *AH = (ArchiveHandle *) fout; + PQExpBuffer nspnames = createPQExpBuffer(); + PQExpBuffer relnames = createPQExpBuffer(); + int count = 0; + PGresult *res = NULL; + static TocEntry *te; + static bool restarted; + + /* If we're just starting, set our TOC pointer. */ + if (!te) + te = AH->toc->next; + + /* + * We can't avoid a second TOC scan for the tar format because it writes + * restore.sql separately, which means we must execute all of our queries + * a second time. This feels risky, but there is no known reason it + * should generate different output than the first pass. Even if it does, + * the worst case is that restore.sql might have different statistics data + * than the archive. + */ + if (!restarted && te == AH->toc && AH->format == archTar) + { + te = AH->toc->next; + restarted = true; + } + + /* + * Scan the TOC for the next set of relevant stats entries. We assume + * that statistics are dumped in the order they are listed in the TOC. + * This is perhaps not the sturdiest assumption, so we verify it matches + * reality in dumpRelationStats_dumper(). + */ + for (; te != AH->toc && count < MAX_ATTR_STATS_RELS; te = te->next) + { + if (te->reqs && strcmp(te->desc, "STATISTICS DATA") == 0) + { + RelStatsInfo *rsinfo = (RelStatsInfo *) te->defnDumperArg; + + appendPQExpBuffer(nspnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.namespace->dobj.name)); + appendPQExpBuffer(relnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.name)); + count++; + } + } + + /* Execute the query for the next batch of relations. */ + if (count > 0) + { + PQExpBuffer query = createPQExpBuffer(); + + appendPQExpBuffer(query, "EXECUTE getAttributeStats(" + "'{%s}'::pg_catalog.name[]," + "'{%s}'::pg_catalog.name[])", + nspnames->data, relnames->data); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + destroyPQExpBuffer(query); + } + + destroyPQExpBuffer(nspnames); + destroyPQExpBuffer(relnames); + return res; +} + /* * dumpRelationStats_dumper -- * @@ -10561,14 +10636,17 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, * dumped. */ static char * -dumpRelationStats_dumper(Archive *fout, const void *userArg) +dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te) { const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg; const DumpableObject *dobj = &rsinfo->dobj; - PGresult *res; + static PGresult *res; + static int rownum; PQExpBuffer query; PQExpBufferData out_data; PQExpBuffer out = &out_data; + int i_schemaname; + int i_tablename; int i_attname; int i_inherited; int i_null_frac; @@ -10584,13 +10662,30 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) int i_range_length_histogram; int i_range_empty_frac; int i_range_bounds_histogram; + static TocEntry *next_te; + + /* + * fetchAttributeStats() assumes that the statistics are dumped in the + * order they are listed in the TOC. We verify that here for safety. + */ + if (!next_te) + next_te = ((ArchiveHandle *) fout)->toc; + + next_te = next_te->next; + while (!next_te->reqs || strcmp(next_te->desc, "STATISTICS DATA") != 0) + next_te = next_te->next; + + if (te != next_te) + pg_fatal("stats dumped out of order (current: %d %s %s) (expected: %d %s %s)", + te->dumpId, te->desc, te->tag, + next_te->dumpId, next_te->desc, next_te->tag); query = createPQExpBuffer(); if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) { appendPQExpBufferStr(query, - "PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n" - "SELECT s.attname, s.inherited, " + "PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n" + "SELECT s.schemaname, s.tablename, s.attname, s.inherited, " "s.null_frac, s.avg_width, s.n_distinct, " "s.most_common_vals, s.most_common_freqs, " "s.histogram_bounds, s.correlation, " @@ -10608,11 +10703,21 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) "NULL AS range_empty_frac," "NULL AS range_bounds_histogram "); + /* + * The results must be in the order of the relations supplied in the + * parameters to ensure we remain in sync as we walk through the TOC. + * The redundant filter clause on s.tablename = ANY(...) seems + * sufficient to convince the planner to use the + * pg_class_relname_nsp_index, which avoids an full scan of pg_stats. + * This may not work for all versions. + */ appendPQExpBufferStr(query, "FROM pg_catalog.pg_stats s " - "WHERE s.schemaname = $1 " - "AND s.tablename = $2 " - "ORDER BY s.attname, s.inherited"); + "JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) " + "ON s.schemaname = u.schemaname " + "AND s.tablename = u.tablename " + "WHERE s.tablename = ANY($2) " + "ORDER BY u.ord, s.attname, s.inherited"); ExecuteSqlStatement(fout, query->data); @@ -10642,16 +10747,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(out, "\n);\n"); + /* Fetch the next batch of attribute statistics if needed. */ + if (rownum >= PQntuples(res)) + { + PQclear(res); + res = fetchAttributeStats(fout); + rownum = 0; + } - /* fetch attribute stats */ - appendPQExpBufferStr(query, "EXECUTE getAttributeStats("); - appendStringLiteralAH(query, dobj->namespace->dobj.name, fout); - appendPQExpBufferStr(query, ", "); - appendStringLiteralAH(query, dobj->name, fout); - appendPQExpBufferStr(query, ");"); - - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - + i_schemaname = PQfnumber(res, "schemaname"); + i_tablename = PQfnumber(res, "tablename"); i_attname = PQfnumber(res, "attname"); i_inherited = PQfnumber(res, "inherited"); i_null_frac = PQfnumber(res, "null_frac"); @@ -10669,10 +10774,15 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) i_range_bounds_histogram = PQfnumber(res, "range_bounds_histogram"); /* restore attribute stats */ - for (int rownum = 0; rownum < PQntuples(res); rownum++) + for (; rownum < PQntuples(res); rownum++) { const char *attname; + /* Stop if the next stat row in our cache isn't for this relation. */ + if (strcmp(dobj->name, PQgetvalue(res, rownum, i_tablename)) != 0 || + strcmp(dobj->namespace->dobj.name, PQgetvalue(res, rownum, i_schemaname)) != 0) + break; + appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n"); appendPQExpBuffer(out, "\t'version', '%u'::integer,\n", fout->remoteVersion); @@ -10762,8 +10872,6 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(out, "\n);\n"); } - PQclear(res); - destroyPQExpBuffer(query); return out->data; } -- 2.39.5 (Apple Git-154)