CREATE OR REPLACE FUNCTION get_explain(text) RETURNS text LANGUAGE plpgsql AS $$ DECLARE ln TEXT; r TEXT; BEGIN r := ''; FOR ln IN EXECUTE format('explain %s', $1) LOOP r := r || ln || E'\n'; END LOOP; RETURN r; END; $$; CREATE OR REPLACE FUNCTION test_run(join_method TEXT, iterations INTEGER, tables_names TEXT[], result_table TEXT = '_results') RETURNS VOID AS $$ DECLARE _start TIMESTAMPTZ; _end TIMESTAMPTZ; _millseconds DOUBLE PRECISION; _table TEXT; _query TEXT; _filtering_rate INTEGER := 0; BEGIN RAISE NOTICE 'Performing join method %', join_method; -- Run test and collect elapsed time into _results table FOR i IN 2..array_length(tables_names, 1) LOOP _query = FORMAT('SELECT COUNT(*) FROM %I INNER JOIN %I USING (c1);', tables_names[1], tables_names[i]); RAISE NOTICE 'PID % Executing query: %', pg_backend_pid(), _query; RAISE NOTICE '%', get_explain(_query); _filtering_rate = _filtering_rate + 10; FOR j IN 1..iterations LOOP _start = clock_timestamp(); EXECUTE _query; _end = clock_timestamp(); _millseconds = 1000 * ( extract(epoch from _end) - extract(epoch from _start) ); EXECUTE FORMAT('INSERT INTO %I(join_method, rate, iteration, elapsed) VALUES (''%s'', %s, %s, %s)', result_table, join_method, _filtering_rate, i, _millseconds); END LOOP; END LOOP; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION random_series(start INTEGER, stop INTEGER) RETURNS SETOF INTEGER AS $$ SELECT s FROM generate_series(start, stop) AS s ORDER BY random(); $$ LANGUAGE sql; CREATE OR REPLACE FUNCTION create_testing_data() RETURNS VOID AS $$ DECLARE _series_start INTEGER := 1; _series_end INTEGER := SERIES_END; _series_step INTEGER := SERIES_STEP; _tables_names TEXT[] := '{"t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"}'; _table TEXT; _random_series TEXT; BEGIN FOREACH _table IN ARRAY _tables_names LOOP RAISE NOTICE '%', FORMAT('CREATE TABLE IF NOT EXISTS %I(c1 INTEGER NOT NULL, c2 INTEGER NOT NULL, c3 INTEGER NOT NULL, c4 INTEGER NOT NULL, c5 INTEGER NOT NULL);', _table); EXECUTE FORMAT('CREATE TABLE IF NOT EXISTS %I(c1 INTEGER NOT NULL, c2 INTEGER NOT NULL, c3 INTEGER NOT NULL, c4 INTEGER NOT NULL, c5 INTEGER NOT NULL);', _table); _random_series = FORMAT('(%s + random() * %s)::int', _series_start, _series_step); RAISE NOTICE '%', FORMAT('INSERT INTO %I SELECT %s AS c1, %s AS c2, %s AS c3, %s AS c4, %s AS c5 FROM generate_series(1,%s);', _table, _random_series, _random_series, _random_series, _random_series, _random_series, _series_end - _series_start + 1); EXECUTE FORMAT('INSERT INTO %I SELECT %s AS c1, %s AS c2, %s AS c3, %s AS c4, %s AS c5 FROM generate_series(1,%s);', _table, _random_series, _random_series, _random_series, _random_series, _random_series, _series_end - _series_start + 1); RAISE NOTICE '%', FORMAT('INSERT INTO %I VALUES(0, 0, 0, 0, 0);', _table); EXECUTE FORMAT('INSERT INTO %I VALUES(0, 0, 0, 0, 0);', _table); RAISE NOTICE '%', FORMAT('INSERT INTO %I VALUES(20000000, 20000000, 20000000, 20000000, 20000000);', _table); EXECUTE FORMAT('INSERT INTO %I VALUES(20000000, 20000000, 20000000, 20000000, 20000000);', _table); RAISE NOTICE '%', FORMAT('ANALYZE %I;', _table); EXECUTE FORMAT('ANALYZE %I;', _table); RAISE NOTICE 'Table % created with [0], [20000000] and series[%, %]', _table, _series_start, _series_end; _series_start = _series_start + _series_step; _series_end = _series_end + _series_step; END LOOP; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cleanup_testing_data() RETURNS VOID AS $$ DECLARE _tables_names TEXT[] := '{"t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"}'; _table TEXT; BEGIN -- Test completed, clean up. FOREACH _table IN ARRAY _tables_names LOOP EXECUTE FORMAT('DROP TABLE IF EXISTS %I', _table); END LOOP; DROP TABLE IF EXISTS _results; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION semijoin_performance_test(iterations INTEGER = 1) RETURNS TABLE(join_method TEXT, filtering_rate INTEGER, count INTEGER, avg_exec_time_mills DOUBLE PRECISION) AS $$ DECLARE -- _join_methods TEXT[] := '{"Hash Join", "Merge Join - Default", "Merge Join - Filter Enabled", "Merge Join - Filter Forced"}'; _join_methods TEXT[] := '{"Merge Join - Filter Enabled", "Merge Join - Filter Forced"}'; _join_method TEXT; _series_start INTEGER := 1; _series_end INTEGER := 10000000; _series_step INTEGER := 1000000; _tables_names TEXT[] := '{"t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"}'; _table TEXT; _random_series TEXT; _regular_series TEXT; _filtering_rate INTEGER := 0; BEGIN RAISE NOTICE 'Begin testing iterations = %', iterations; -- Create tables used in the test. CREATE TABLE IF NOT EXISTS _results( id SERIAL PRIMARY KEY, join_method TEXT, rate INTEGER, iteration INTEGER, elapsed DOUBLE PRECISION ); SET enable_nestloop TO OFF; SELECT array_agg(v) INTO _tables_names FROM (SELECT v FROM (SELECT unnest(_tables_names) AS v) foo ORDER BY random()) bar; FOREACH _join_method IN ARRAY _join_methods LOOP IF _join_method = 'Merge Join - Default' THEN SET enable_hashjoin TO OFF; SET enable_mergejoin TO ON; SET enable_mergejoin_semijoin_filter TO OFF; SET force_mergejoin_semijoin_filter TO OFF; ELSIF _join_method = 'Merge Join - Filter Enabled' THEN SET enable_hashjoin TO OFF; SET enable_mergejoin TO ON; SET enable_mergejoin_semijoin_filter TO ON; SET force_mergejoin_semijoin_filter TO OFF; ELSIF _join_method = 'Merge Join - Filter Forced' THEN SET enable_hashjoin TO OFF; SET enable_mergejoin TO ON; SET enable_mergejoin_semijoin_filter TO ON; SET force_mergejoin_semijoin_filter TO ON; ELSIF _join_method = 'Hash Join' THEN SET enable_hashjoin TO ON; SET enable_mergejoin TO OFF; SET enable_mergejoin_semijoin_filter TO OFF; SET force_mergejoin_semijoin_filter TO OFF; ELSE RAISE NOTICE 'Unsupported join method %', _join_method CONTINUE; END IF; perform test_run(_join_method, iterations, _tables_names, '_results'); END LOOP; SET enable_nestloop TO Default; SET enable_hashjoin TO Default; SET enable_mergejoin TO Default; SET enable_mergejoin_semijoin_filter TO Default; SET force_mergejoin_semijoin_filter TO Default; RETURN QUERY SELECT _results.join_method, _results.rate, CAST(COUNT(*) AS INTEGER), avg(elapsed) FROM _results GROUP BY _results.join_method, _results.rate ORDER BY _results.join_method, _results.rate ASC; END; $$ LANGUAGE plpgsql; -- Setup testing data; SELECT * FROM create_testing_data(); SELECT pg_sleep(5); VACUUM ANALYZE; CHECKPOINT; -- Run each query 5 times, and calculate the average execution time. SELECT * FROM semijoin_performance_test(5); -- Cleanup testing data SELECT * FROM cleanup_testing_data();