Re: Using system tables directly takes many hours, using temp tables with no indexes takes a few seconds for geometry_columns view.

From: Lars Aksel Opsahl <Lars(dot)Opsahl(at)nibio(dot)no>
To: Justin Pryzby <pryzby(at)telsasoft(dot)com>
Cc: "pgsql-performance(at)lists(dot)postgresql(dot)org" <pgsql-performance(at)lists(dot)postgresql(dot)org>
Subject: Re: Using system tables directly takes many hours, using temp tables with no indexes takes a few seconds for geometry_columns view.
Date: 2022-03-24 09:39:59
Message-ID: VE1P189MB1037DC6A6E33040D5DD42E929D199@VE1P189MB1037.EURP189.PROD.OUTLOOK.COM
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-performance

________________________________
From: Justin Pryzby <pryzby(at)telsasoft(dot)com>
Sent: Wednesday, March 23, 2022 2:19 PM

>On Wed, Mar 23, 2022 at 09:44:09AM +0000, Lars Aksel Opsahl wrote:
>> Why is temp tables with no indexes much faster system tables with indexes ?
>
>I think the "temp table" way is accidentally faster due to having no
>statistics, not because it has no indexes. If you run ANALYZE, you may hit the
>same issue (or, maybe you just need to VACUUM ANALYZE your system catalogs).

Hi

Sorry I misread your mail you are totally right.

Before I do vacuum we have these execution Time: 9422.964 ms (00:09.423)

The vacuum as you suggested
VACUUM ANALYZE pg_attribute_temp;
VACUUM ANALYZE pg_namespace_temp;
VACUUM ANALYZE pg_type_temp;
VACUUM ANALYZE pg_constraint_temp;

I can wait for 10 minutes and it just hangs, yes so we have the same problem as suggested.

The original query "select * from geometry_columns" finally finished after almost 9 hours .

The plan is here https://explain.depesz.com/s/jGXf

I did some more testing and if remove LEFT JOIN to pg_constraint in runs in less than a minute and return 75219 rows.

WITH geo_column_list AS (SELECT
current_database()::character varying(256) AS f_table_catalog,
n.nspname AS f_table_schema,
n.oid AS n_oid,
c.relname AS f_table_name,
c.oid AS c_oid,
a.attname AS f_geometry_column,
a.attnum AS a_attnum
--COALESCE(postgis_typmod_dims(a.atttypmod), sn.ndims, 2) AS coord_dimension,
--COALESCE(NULLIF(postgis_typmod_srid(a.atttypmod), 0), sr.srid, 0) AS srid,
--replace(replace(COALESCE(NULLIF(upper(postgis_typmod_type(a.atttypmod)), 'GEOMETRY'::text), st.type, 'GEOMETRY'::text), 'ZM'::text, ''::text), 'Z'::text, ''::text)::character varying(30) AS type
FROM pg_class c
JOIN pg_attribute a ON a.attrelid = c.oid AND NOT a.attisdropped
JOIN pg_namespace n ON c.relnamespace = n.oid
JOIN pg_type t ON a.atttypid = t.oid
WHERE (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'm'::"char", 'f'::"char", 'p'::"char"]))
AND NOT c.relname = 'raster_columns'::name AND t.typname = 'geometry'::name
AND NOT pg_is_other_temp_schema(c.relnamespace)
AND has_table_privilege(c.oid, 'SELECT'::text)
)
SELECT * FROM geo_column_list;

But if I try this with LEFT JOIN it hangs for hours it seems like.

WITH geo_column_list AS (SELECT
current_database()::character varying(256) AS f_table_catalog,
n.nspname AS f_table_schema,
n.oid AS n_oid,
c.relname AS f_table_name,
c.oid AS c_oid,
a.attname AS f_geometry_column,
a.attnum AS a_attnum,
a.atttypmod
--COALESCE(NULLIF(postgis_typmod_srid(a.atttypmod), 0), sr.srid, 0) AS srid,
--replace(replace(COALESCE(NULLIF(upper(postgis_typmod_type(a.atttypmod)), 'GEOMETRY'::text), st.type, 'GEOMETRY'::text), 'ZM'::text, ''::text), 'Z'::text, ''::text)::character varying(30) AS type
FROM pg_class c
JOIN pg_attribute a ON a.attrelid = c.oid AND NOT a.attisdropped
JOIN pg_namespace n ON c.relnamespace = n.oid
JOIN pg_type t ON a.atttypid = t.oid
WHERE (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'm'::"char", 'f'::"char", 'p'::"char"]))
AND NOT c.relname = 'raster_columns'::name AND t.typname = 'geometry'::name
AND NOT pg_is_other_temp_schema(c.relnamespace)
AND has_table_privilege(c.oid, 'SELECT'::text)
),
pg_constraint_list AS (SELECT pg_constraint.connamespace,
pg_constraint.conrelid,
pg_constraint.conkey,
pg_get_constraintdef(pg_constraint.oid) AS consrc
FROM pg_constraint, geo_column_list
WHERE connamespace = n_oid AND conrelid = c_oid AND (a_attnum = ANY (conkey))
)
,
geo_column_list_full AS (SELECT * FROM geo_column_list
LEFT JOIN ( SELECT s.connamespace,
s.conrelid,
s.conkey,
replace(split_part(s.consrc, ''''::text, 2), ')'::text, ''::text) AS type
FROM pg_constraint_list s
WHERE s.consrc ~~* '%geometrytype(% = %'::text) st ON TRUE
LEFT JOIN ( SELECT s.connamespace,
s.conrelid,
s.conkey,
replace(split_part(s.consrc, ' = '::text, 2), ')'::text, ''::text)::integer AS ndims
FROM pg_constraint_list s
WHERE s.consrc ~~* '%ndims(% = %'::text) sn ON TRUE
LEFT JOIN ( SELECT s.connamespace,
s.conrelid,
s.conkey,
replace(replace(split_part(s.consrc, ' = '::text, 2), ')'::text, ''::text), '('::text, ''::text)::integer AS srid
FROM pg_constraint_list s
WHERE s.consrc ~~* '%srid(% = %'::text) sr ON TRUE
)
SELECT *,
COALESCE(postgis_typmod_dims(atttypmod), ndims, 2) AS coord_dimension
FROM geo_column_list_full;

but if I try this it return 648 rows in less than second

WITH geo_column_list AS (SELECT
current_database()::character varying(256) AS f_table_catalog,
n.nspname AS f_table_schema,
n.oid AS n_oid,
c.relname AS f_table_name,
c.oid AS c_oid,
a.attname AS f_geometry_column,
a.attnum AS a_attnum,
a.atttypmod
--COALESCE(NULLIF(postgis_typmod_srid(a.atttypmod), 0), sr.srid, 0) AS srid,
--replace(replace(COALESCE(NULLIF(upper(postgis_typmod_type(a.atttypmod)), 'GEOMETRY'::text), st.type, 'GEOMETRY'::text), 'ZM'::text, ''::text), 'Z'::text, ''::text)::character varying(30) AS type
FROM pg_class c
JOIN pg_attribute a ON a.attrelid = c.oid AND NOT a.attisdropped
JOIN pg_namespace n ON c.relnamespace = n.oid
JOIN pg_type t ON a.atttypid = t.oid
WHERE (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'm'::"char", 'f'::"char", 'p'::"char"]))
AND NOT c.relname = 'raster_columns'::name AND t.typname = 'geometry'::name
AND NOT pg_is_other_temp_schema(c.relnamespace)
AND has_table_privilege(c.oid, 'SELECT'::text)
),
pg_constraint_list AS (SELECT pg_constraint.connamespace,
pg_constraint.conrelid,
pg_constraint.conkey,
pg_get_constraintdef(pg_constraint.oid) AS consrc
FROM pg_constraint, geo_column_list
WHERE connamespace = n_oid AND conrelid = c_oid AND (a_attnum = ANY (conkey))
)
SELECT *
FROM pg_constraint_list;

Thanks.

Lars

In response to

Browse pgsql-performance by date

  From Date Subject
Next Message Tomas Vondra 2022-03-24 11:55:41 Re: Performance issue post upgrade on Version 13 - Incorrect Estimation Cost choosing Hash Aggregate-Nested Left Loop Join
Previous Message Justin Pryzby 2022-03-23 15:20:22 Re: High process memory consumption when running sort