From 99d653f59e3ed7a7f5809e6546f0514ea8f0beda Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 24 Jan 2025 09:29:30 -0600 Subject: [PATCH v1 1/2] vacuumdb: Save catalog query results for --analyze-in-stages. Presently, each call to vacuum_one_database() for each stage of --analyze-in-stages mode performs the catalog query to retrieve the list of tables to process. A proposed follow-up commit would add a "missing only" feature to --analyze-in-stages, which requires us to save the results of the catalog query (since tables without statistics would have them after the first stage). This commit adds this ability via a new parameter for vacuum_one_database() that specifies either a previously-retrieved list to process or a place to store the results of the catalog query for later use. This commit also makes use of this new parameter for --analyze-in-stages. The trade-offs of this approach are increased memory usage and less responsiveness to concurrent catalog changes in later stages, neither of which is expected to bother anyone. Author: Corey Huinker --- src/bin/scripts/vacuumdb.c | 316 +++++++++++++++++++++---------------- 1 file changed, 180 insertions(+), 136 deletions(-) diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index 3dc428674ae..88ceb42746d 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -62,10 +62,16 @@ typedef enum static VacObjFilter objfilter = OBJFILTER_NONE; +static SimpleStringList *retrieve_objects(PGconn *conn, + vacuumingOptions *vacopts, + SimpleStringList *objects, + bool echo); + static void vacuum_one_database(ConnParams *cparams, vacuumingOptions *vacopts, int stage, SimpleStringList *objects, + SimpleStringList **found_objs, int concurrentCons, const char *progname, bool echo, bool quiet); @@ -400,12 +406,13 @@ main(int argc, char *argv[]) if (analyze_in_stages) { int stage; + SimpleStringList *found_objs = NULL; for (stage = 0; stage < ANALYZE_NUM_STAGES; stage++) { vacuum_one_database(&cparams, &vacopts, stage, - &objects, + &objects, &found_objs, concurrentCons, progname, echo, quiet); } @@ -413,7 +420,7 @@ main(int argc, char *argv[]) else vacuum_one_database(&cparams, &vacopts, ANALYZE_NO_STAGE, - &objects, + &objects, NULL, concurrentCons, progname, echo, quiet); } @@ -461,8 +468,17 @@ escape_quotes(const char *src) /* * vacuum_one_database * - * Process tables in the given database. If the 'objects' list is empty, - * process all tables in the database. + * Process tables in the given database. + * + * 'found_objs' should be a fully qualified list of objects to process, as + * returned by a previous call to vacuum_one_database(). If *found_objs is + * NULL, it is set to the results of the catalog query discussed below. If + * found_objs is NULL, the results of the catalog query are not returned. + * + * If *found_objs is NULL, this function performs a catalog query to retrieve + * the list of tables to process. When 'objects' is NULL, all tables in the + * database are processed. Otherwise, the catalog query performs a lookup for + * the objects listed in 'objects'. * * Note that this function is only concerned with running exactly one stage * when in analyze-in-stages mode; caller must iterate on us if necessary. @@ -475,22 +491,18 @@ vacuum_one_database(ConnParams *cparams, vacuumingOptions *vacopts, int stage, SimpleStringList *objects, + SimpleStringList **found_objs, int concurrentCons, const char *progname, bool echo, bool quiet) { PQExpBufferData sql; - PQExpBufferData buf; - PQExpBufferData catalog_query; - PGresult *res; PGconn *conn; SimpleStringListCell *cell; ParallelSlotArray *sa; - SimpleStringList dbtables = {NULL, NULL}; - int i; - int ntups; + int ntups = 0; bool failed = false; - bool objects_listed = false; const char *initcmd; + SimpleStringList *ret = NULL; const char *stage_commands[] = { "SET default_statistics_target=1; SET vacuum_cost_delay=0;", "SET default_statistics_target=10; RESET vacuum_cost_delay;", @@ -587,19 +599,155 @@ vacuum_one_database(ConnParams *cparams, } /* - * Prepare the list of tables to process by querying the catalogs. - * - * Since we execute the constructed query with the default search_path - * (which could be unsafe), everything in this query MUST be fully - * qualified. - * - * First, build a WITH clause for the catalog query if any tables were - * specified, with a set of values made of relation names and their - * optional set of columns. This is used to match any provided column - * lists with the generated qualified identifiers and to filter for the - * tables provided via --table. If a listed table does not exist, the - * catalog query will fail. + * If the caller provided the results of a previous catalog query, just + * use that. Otherwise, run the catalog query ourselves and set the + * return variable if provided. + */ + if (found_objs && *found_objs) + ret = *found_objs; + else + { + ret = retrieve_objects(conn, vacopts, objects, echo); + if (found_objs) + *found_objs = ret; + } + + /* + * Count the number of objects in the catalog query result. If there are + * none, we are done. + */ + for (cell = ret ? ret->head : NULL; cell; cell = cell->next) + ntups++; + + if (ntups == 0) + { + PQfinish(conn); + return; + } + + /* + * Ensure concurrentCons is sane. If there are more connections than + * vacuumable relations, we don't need to use them all. + */ + if (concurrentCons > ntups) + concurrentCons = ntups; + if (concurrentCons <= 0) + concurrentCons = 1; + + /* + * All slots need to be prepared to run the appropriate analyze stage, if + * caller requested that mode. We have to prepare the initial connection + * ourselves before setting up the slots. + */ + if (stage == ANALYZE_NO_STAGE) + initcmd = NULL; + else + { + initcmd = stage_commands[stage]; + executeCommand(conn, initcmd, echo); + } + + /* + * Setup the database connections. We reuse the connection we already have + * for the first slot. If not in parallel mode, the first slot in the + * array contains the connection. */ + sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd); + ParallelSlotsAdoptConn(sa, conn); + + initPQExpBuffer(&sql); + + cell = ret->head; + do + { + const char *tabname = cell->val; + ParallelSlot *free_slot; + + if (CancelRequested) + { + failed = true; + goto finish; + } + + free_slot = ParallelSlotsGetIdle(sa, NULL); + if (!free_slot) + { + failed = true; + goto finish; + } + + prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection), + vacopts, tabname); + + /* + * Execute the vacuum. All errors are handled in processQueryResult + * through ParallelSlotsGetIdle. + */ + ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); + run_vacuum_command(free_slot->connection, sql.data, + echo, tabname); + + cell = cell->next; + } while (cell != NULL); + + if (!ParallelSlotsWaitCompletion(sa)) + { + failed = true; + goto finish; + } + + /* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */ + if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE) + { + const char *cmd = "VACUUM (ONLY_DATABASE_STATS);"; + ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL); + + if (!free_slot) + { + failed = true; + goto finish; + } + + ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); + run_vacuum_command(free_slot->connection, cmd, echo, NULL); + + if (!ParallelSlotsWaitCompletion(sa)) + failed = true; + } + +finish: + ParallelSlotsTerminate(sa); + pg_free(sa); + + termPQExpBuffer(&sql); + + if (failed) + exit(1); +} + +/* + * Prepare the list of tables to process by querying the catalogs. + * + * Since we execute the constructed query with the default search_path (which + * could be unsafe), everything in this query MUST be fully qualified. + * + * First, build a WITH clause for the catalog query if any tables were + * specified, with a set of values made of relation names and their optional + * set of columns. This is used to match any provided column lists with the + * generated qualified identifiers and to filter for the tables provided via + * --table. If a listed table does not exist, the catalog query will fail. + */ +static SimpleStringList * +retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, + SimpleStringList *objects, bool echo) +{ + PQExpBufferData buf; + PQExpBufferData catalog_query; + PGresult *res; + SimpleStringListCell *cell; + SimpleStringList *found_objs = palloc0(sizeof(SimpleStringList)); + bool objects_listed = false; + initPQExpBuffer(&catalog_query); for (cell = objects ? objects->head : NULL; cell; cell = cell->next) { @@ -753,23 +901,12 @@ vacuum_one_database(ConnParams *cparams, termPQExpBuffer(&catalog_query); PQclear(executeQuery(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo)); - /* - * If no rows are returned, there are no matching tables, so we are done. - */ - ntups = PQntuples(res); - if (ntups == 0) - { - PQclear(res); - PQfinish(conn); - return; - } - /* * Build qualified identifiers for each table, including the column list * if given. */ initPQExpBuffer(&buf); - for (i = 0; i < ntups; i++) + for (int i = 0; i < PQntuples(res); i++) { appendPQExpBufferStr(&buf, fmtQualifiedId(PQgetvalue(res, i, 1), @@ -778,110 +915,13 @@ vacuum_one_database(ConnParams *cparams, if (objects_listed && !PQgetisnull(res, i, 2)) appendPQExpBufferStr(&buf, PQgetvalue(res, i, 2)); - simple_string_list_append(&dbtables, buf.data); + simple_string_list_append(found_objs, buf.data); resetPQExpBuffer(&buf); } termPQExpBuffer(&buf); PQclear(res); - /* - * Ensure concurrentCons is sane. If there are more connections than - * vacuumable relations, we don't need to use them all. - */ - if (concurrentCons > ntups) - concurrentCons = ntups; - if (concurrentCons <= 0) - concurrentCons = 1; - - /* - * All slots need to be prepared to run the appropriate analyze stage, if - * caller requested that mode. We have to prepare the initial connection - * ourselves before setting up the slots. - */ - if (stage == ANALYZE_NO_STAGE) - initcmd = NULL; - else - { - initcmd = stage_commands[stage]; - executeCommand(conn, initcmd, echo); - } - - /* - * Setup the database connections. We reuse the connection we already have - * for the first slot. If not in parallel mode, the first slot in the - * array contains the connection. - */ - sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd); - ParallelSlotsAdoptConn(sa, conn); - - initPQExpBuffer(&sql); - - cell = dbtables.head; - do - { - const char *tabname = cell->val; - ParallelSlot *free_slot; - - if (CancelRequested) - { - failed = true; - goto finish; - } - - free_slot = ParallelSlotsGetIdle(sa, NULL); - if (!free_slot) - { - failed = true; - goto finish; - } - - prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection), - vacopts, tabname); - - /* - * Execute the vacuum. All errors are handled in processQueryResult - * through ParallelSlotsGetIdle. - */ - ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); - run_vacuum_command(free_slot->connection, sql.data, - echo, tabname); - - cell = cell->next; - } while (cell != NULL); - - if (!ParallelSlotsWaitCompletion(sa)) - { - failed = true; - goto finish; - } - - /* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */ - if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE) - { - const char *cmd = "VACUUM (ONLY_DATABASE_STATS);"; - ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL); - - if (!free_slot) - { - failed = true; - goto finish; - } - - ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); - run_vacuum_command(free_slot->connection, cmd, echo, NULL); - - if (!ParallelSlotsWaitCompletion(sa)) - failed = true; - } - -finish: - ParallelSlotsTerminate(sa); - pg_free(sa); - - termPQExpBuffer(&sql); - - if (failed) - exit(1); + return found_objs; } /* @@ -912,6 +952,10 @@ vacuum_all_databases(ConnParams *cparams, if (analyze_in_stages) { + SimpleStringList **found_objs; + + found_objs = palloc0(PQntuples(result) * sizeof(SimpleStringList *)); + /* * When analyzing all databases in stages, we analyze them all in the * fastest stage first, so that initial statistics become available @@ -928,7 +972,7 @@ vacuum_all_databases(ConnParams *cparams, vacuum_one_database(cparams, vacopts, stage, - objects, + objects, &found_objs[i], concurrentCons, progname, echo, quiet); } @@ -942,7 +986,7 @@ vacuum_all_databases(ConnParams *cparams, vacuum_one_database(cparams, vacopts, ANALYZE_NO_STAGE, - objects, + objects, NULL, concurrentCons, progname, echo, quiet); } -- 2.39.5 (Apple Git-154)