From 3c7ff6e0eea0ed6435e17160df67227f3d43d7cf Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 1 Apr 2025 11:35:19 -0500 Subject: [PATCH v12n3 3/3] pg_dump: Retrieve attribute statistics in batches. Currently, pg_dump gathers attribute statistics with a query per relation, which can cause pg_dump to take significantly longer, especially when there are many tables. This commit improves matters by gathering attribute statistics for 64 relations at a time. Some simple testing showed this was the ideal batch size, but performance may vary depending on workload. To construct the next set of relations for the query, we scan through the TOC list for relevant entries. Ordinarily, we can stop issuing queries once we reach the end of the list. However, custom-format dumps that include data run the statistics queries twice (thanks to commit XXXXXXXXXX), so we allow a second pass in that case. Our tests showed that batching more than makes up for any losses from running the queries twice. This change increases the memory usage of pg_dump a bit, but that isn't expected to be too egregious and is arguably well worth the trade-off. Author: Corey Huinker Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com --- src/bin/pg_dump/pg_dump.c | 111 +++++++++++++++++++++++++++++++------- 1 file changed, 93 insertions(+), 18 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 46fb70e0a8b..5dfa12e2ce9 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -209,6 +209,9 @@ static int nbinaryUpgradeClassOids = 0; static SequenceItem *sequences = NULL; static int nsequences = 0; +/* Maximum number of relations to fetch in a fetchAttributeStats() call. */ +#define MAX_ATTR_STATS_RELS 64 + /* * The default number of rows per INSERT when * --inserts is specified without --rows-per-insert @@ -10559,6 +10562,70 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, appendPQExpBuffer(out, "::%s", argtype); } +/* + * fetchAttributeStats -- + * + * Fetch next batch of rows for getAttributeStats(). + */ +static PGresult * +fetchAttributeStats(Archive *fout) +{ + ArchiveHandle *AH = (ArchiveHandle *) fout; + PQExpBuffer nspnames = createPQExpBuffer(); + PQExpBuffer relnames = createPQExpBuffer(); + int count = 0; + PGresult *res = NULL; + static bool restarted; + static TocEntry *te; + + /* If we're just starting, set our TOC pointer. */ + if (!te) + te = AH->toc->next; + + /* + * Restart the TOC scan once for custom-format dumps that include data. + * This is necessary because we'll call WriteToc() twice in that case. + */ + if (!restarted && te == AH->toc && + AH->format == archCustom && fout->dopt->dumpData) + { + te = AH->toc->next; + restarted = true; + } + + /* Scan the TOC for the next set of relevant stats entries. */ + for (; te != AH->toc && count < MAX_ATTR_STATS_RELS; te = te->next) + { + if (te->reqs && strcmp(te->desc, "STATISTICS DATA") == 0) + { + RelStatsInfo *rsinfo = (RelStatsInfo *) te->defnDumperArg; + + appendPQExpBuffer(nspnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.namespace->dobj.name)); + appendPQExpBuffer(relnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.name)); + count++; + } + } + + /* Execute the query for the next batch of relations. */ + if (count > 0) + { + PQExpBuffer query = createPQExpBuffer(); + + appendPQExpBuffer(query, "EXECUTE getAttributeStats(" + "'{%s}'::pg_catalog.name[]," + "'{%s}'::pg_catalog.name[])", + nspnames->data, relnames->data); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + destroyPQExpBuffer(query); + } + + destroyPQExpBuffer(nspnames); + destroyPQExpBuffer(relnames); + return res; +} + /* * dumpRelationStats_dumper -- * @@ -10571,9 +10638,12 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) { const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg; const DumpableObject *dobj = &rsinfo->dobj; - PGresult *res; + static PGresult *res; + static int rownum; PQExpBuffer query; PQExpBuffer out; + int i_schemaname; + int i_tablename; int i_attname; int i_inherited; int i_null_frac; @@ -10595,8 +10665,8 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) { appendPQExpBufferStr(query, - "PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n" - "SELECT s.attname, s.inherited, " + "PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n" + "SELECT s.schemaname, s.tablename, s.attname, s.inherited, " "s.null_frac, s.avg_width, s.n_distinct, " "s.most_common_vals, s.most_common_freqs, " "s.histogram_bounds, s.correlation, " @@ -10616,9 +10686,11 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(query, "FROM pg_catalog.pg_stats s " - "WHERE s.schemaname = $1 " - "AND s.tablename = $2 " - "ORDER BY s.attname, s.inherited"); + "JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) " + "ON s.schemaname = u.schemaname " + "AND s.tablename = u.tablename " + "WHERE s.tablename = ANY($2) " + "ORDER BY u.ord, s.attname, s.inherited"); ExecuteSqlStatement(fout, query->data); @@ -10648,16 +10720,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(out, "\n);\n"); + /* Fetch the next batch of attribute statistics if needed. */ + if (rownum >= PQntuples(res)) + { + PQclear(res); + res = fetchAttributeStats(fout); + rownum = 0; + } - /* fetch attribute stats */ - appendPQExpBufferStr(query, "EXECUTE getAttributeStats("); - appendStringLiteralAH(query, dobj->namespace->dobj.name, fout); - appendPQExpBufferStr(query, ", "); - appendStringLiteralAH(query, dobj->name, fout); - appendPQExpBufferStr(query, ");"); - - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - + i_schemaname = PQfnumber(res, "schemaname"); + i_tablename = PQfnumber(res, "tablename"); i_attname = PQfnumber(res, "attname"); i_inherited = PQfnumber(res, "inherited"); i_null_frac = PQfnumber(res, "null_frac"); @@ -10675,10 +10747,15 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) i_range_bounds_histogram = PQfnumber(res, "range_bounds_histogram"); /* restore attribute stats */ - for (int rownum = 0; rownum < PQntuples(res); rownum++) + for (; rownum < PQntuples(res); rownum++) { const char *attname; + /* Stop if the next stat row in our cache isn't for this relation. */ + if (strcmp(dobj->name, PQgetvalue(res, rownum, i_tablename)) != 0 || + strcmp(dobj->namespace->dobj.name, PQgetvalue(res, rownum, i_schemaname)) != 0) + break; + appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n"); appendPQExpBuffer(out, "\t'version', '%u'::integer,\n", fout->remoteVersion); @@ -10768,8 +10845,6 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(out, "\n);\n"); } - PQclear(res); - destroyPQExpBuffer(query); ret = out->data; pg_free(out); -- 2.39.5 (Apple Git-154)