From 299c7b6d0f5b009d42ee5a4a28278ab3c1bc725c Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 1 Apr 2025 21:00:24 -0500 Subject: [PATCH v12n4 3/3] pg_dump: Retrieve attribute statistics in batches. Currently, pg_dump gathers attribute statistics with a query per relation, which can cause pg_dump to take significantly longer, especially when there are many tables. This commit improves matters by gathering attribute statistics for 64 relations at a time. Some simple testing showed this was the ideal batch size, but performance may vary depending on workload. This change increases the memory usage of pg_dump a bit, but that isn't expected to be too egregious and is arguably well worth the trade-off. Author: Corey Huinker Reviewed-by: Jeff Davis Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com --- src/bin/pg_dump/pg_dump.c | 104 +++++++++++++++++++++++++++++++------- 1 file changed, 86 insertions(+), 18 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 46fb70e0a8b..9d60c77865f 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -209,6 +209,9 @@ static int nbinaryUpgradeClassOids = 0; static SequenceItem *sequences = NULL; static int nsequences = 0; +/* Maximum number of relations to fetch in a fetchAttributeStats() call. */ +#define MAX_ATTR_STATS_RELS 64 + /* * The default number of rows per INSERT when * --inserts is specified without --rows-per-insert @@ -10559,6 +10562,63 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, appendPQExpBuffer(out, "::%s", argtype); } +/* + * fetchAttributeStats -- + * + * Fetch next batch of rows for getAttributeStats(). + */ +static PGresult * +fetchAttributeStats(Archive *fout) +{ + ArchiveHandle *AH = (ArchiveHandle *) fout; + PQExpBuffer nspnames = createPQExpBuffer(); + PQExpBuffer relnames = createPQExpBuffer(); + int count = 0; + PGresult *res = NULL; + static TocEntry *te; + + /* If we're just starting, set our TOC pointer. */ + if (!te) + te = AH->toc->next; + + /* + * Scan the TOC for the next set of relevant stats entries. We assume + * that statistics are dumped in the order they are listed in the TOC. + * This is perhaps not the sturdiest assumption, but it appears to + * presently be true in all cases. + */ + for (; te != AH->toc && count < MAX_ATTR_STATS_RELS; te = te->next) + { + if (te->reqs && strcmp(te->desc, "STATISTICS DATA") == 0) + { + RelStatsInfo *rsinfo = (RelStatsInfo *) te->defnDumperArg; + + appendPQExpBuffer(nspnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.namespace->dobj.name)); + appendPQExpBuffer(relnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.name)); + count++; + } + } + + /* Execute the query for the next batch of relations. */ + if (count > 0) + { + PQExpBuffer query = createPQExpBuffer(); + + appendPQExpBuffer(query, "EXECUTE getAttributeStats(" + "'{%s}'::pg_catalog.name[]," + "'{%s}'::pg_catalog.name[])", + nspnames->data, relnames->data); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + destroyPQExpBuffer(query); + } + + destroyPQExpBuffer(nspnames); + destroyPQExpBuffer(relnames); + return res; +} + /* * dumpRelationStats_dumper -- * @@ -10571,9 +10631,12 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) { const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg; const DumpableObject *dobj = &rsinfo->dobj; - PGresult *res; + static PGresult *res; + static int rownum; PQExpBuffer query; PQExpBuffer out; + int i_schemaname; + int i_tablename; int i_attname; int i_inherited; int i_null_frac; @@ -10595,8 +10658,8 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) { appendPQExpBufferStr(query, - "PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n" - "SELECT s.attname, s.inherited, " + "PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n" + "SELECT s.schemaname, s.tablename, s.attname, s.inherited, " "s.null_frac, s.avg_width, s.n_distinct, " "s.most_common_vals, s.most_common_freqs, " "s.histogram_bounds, s.correlation, " @@ -10616,9 +10679,11 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(query, "FROM pg_catalog.pg_stats s " - "WHERE s.schemaname = $1 " - "AND s.tablename = $2 " - "ORDER BY s.attname, s.inherited"); + "JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) " + "ON s.schemaname = u.schemaname " + "AND s.tablename = u.tablename " + "WHERE s.tablename = ANY($2) " + "ORDER BY u.ord, s.attname, s.inherited"); ExecuteSqlStatement(fout, query->data); @@ -10648,16 +10713,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(out, "\n);\n"); + /* Fetch the next batch of attribute statistics if needed. */ + if (rownum >= PQntuples(res)) + { + PQclear(res); + res = fetchAttributeStats(fout); + rownum = 0; + } - /* fetch attribute stats */ - appendPQExpBufferStr(query, "EXECUTE getAttributeStats("); - appendStringLiteralAH(query, dobj->namespace->dobj.name, fout); - appendPQExpBufferStr(query, ", "); - appendStringLiteralAH(query, dobj->name, fout); - appendPQExpBufferStr(query, ");"); - - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - + i_schemaname = PQfnumber(res, "schemaname"); + i_tablename = PQfnumber(res, "tablename"); i_attname = PQfnumber(res, "attname"); i_inherited = PQfnumber(res, "inherited"); i_null_frac = PQfnumber(res, "null_frac"); @@ -10675,10 +10740,15 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) i_range_bounds_histogram = PQfnumber(res, "range_bounds_histogram"); /* restore attribute stats */ - for (int rownum = 0; rownum < PQntuples(res); rownum++) + for (; rownum < PQntuples(res); rownum++) { const char *attname; + /* Stop if the next stat row in our cache isn't for this relation. */ + if (strcmp(dobj->name, PQgetvalue(res, rownum, i_tablename)) != 0 || + strcmp(dobj->namespace->dobj.name, PQgetvalue(res, rownum, i_schemaname)) != 0) + break; + appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n"); appendPQExpBuffer(out, "\t'version', '%u'::integer,\n", fout->remoteVersion); @@ -10768,8 +10838,6 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(out, "\n);\n"); } - PQclear(res); - destroyPQExpBuffer(query); ret = out->data; pg_free(out); -- 2.39.5 (Apple Git-154)