Re: Strange behavior of limit clause in complex query

From: Paulo Silva <paulojjs(at)gmail(dot)com>
To: Ranier Vilela <ranier(dot)vf(at)gmail(dot)com>
Cc: Pgsql Performance <pgsql-performance(at)lists(dot)postgresql(dot)org>
Subject: Re: Strange behavior of limit clause in complex query
Date: 2022-06-08 14:07:11
Message-ID: CAHJdQrnehxOumup=qEhs4dGPSH72XurF05w9pXdZe8bLs-mKjw@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-performance

Hi,

The problem is that the query is generated by the framework, I'm not sure
if I can change anything on it. Any other way to influence planner?

Regards

Ranier Vilela <ranier(dot)vf(at)gmail(dot)com> escreveu no dia quarta, 8/06/2022 à(s)
12:40:

> Em qua., 8 de jun. de 2022 às 05:44, Paulo Silva <paulojjs(at)gmail(dot)com>
> escreveu:
>
>> Hi,
>>
>> I'm using PostgreSQL 14.3 and I'm getting strange behavior in a complex
>> query generated by the Entity Framework.
>>
>> The inner (complex) query has a quick execution time:
>>
>> # SELECT "Extent1"."id", CAST ("Extent1"."date" AS timestamp) AS "C3"
>> FROM "dbo"."ng_content" AS "Extent1"
>> INNER JOIN "dbo"."ng_path_content" AS "Extent2" ON "Extent1"."id"
>> = "Extent2"."id_content"
>> WHERE "Extent1"."date_from" <= CAST (LOCALTIMESTAMP AS timestamp)
>> AND "Extent1"."date_to" >= CAST (LOCALTIMESTAMP AS
>> timestamp)
>> AND 2 = "Extent1"."id_status"
>> AND EXISTS (
>> SELECT 1 AS "C1"
>> FROM (
>> SELECT "Extent3"."TagId" FROM
>> "dbo"."ngx_tag_content" AS "Extent3"
>> WHERE "Extent1"."id" =
>> "Extent3"."ContentId"
>> ) AS "Project1"
>> WHERE EXISTS (
>> SELECT 1 AS "C1" FROM (SELECT 1 AS "C")
>> AS "SingleRowTable1"
>> WHERE "Project1"."TagId" = 337139)
>> )
>> AND ("Extent2"."id_path" IN (27495,27554,27555)
>> AND NOT EXISTS (SELECT 1 AS "C1" FROM (SELECT 1 AS "C") AS
>> "SingleRowTable2" WHERE TRUE = FALSE)
>> );
>> id | C3
>> ----------+---------------------
>> 13505155 | 2021-03-27 12:01:00
>> 13505187 | 2021-03-27 12:03:00
>> 13505295 | 2021-03-27 12:06:00
>> 13505348 | 2021-03-27 12:09:00
>> 13505552 | 2021-03-27 12:11:00
>> (5 rows)
>>
>> *Time: 481.826 ms*
>>
>> If I run the same query as a nested select I get similar results (Q1):
>>
>>
>> *SELECT "Project5".idFROM (*
>> SELECT "Extent1"."id", CAST ("Extent1"."date" AS timestamp) AS "C3"
>> FROM "dbo"."ng_content" AS "Extent1"
>> INNER JOIN "dbo"."ng_path_content" AS "Extent2" ON "Extent1"."id"
>> = "Extent2"."id_content"
>> WHERE "Extent1"."date_from" <= CAST (LOCALTIMESTAMP AS timestamp)
>> AND "Extent1"."date_to" >= CAST (LOCALTIMESTAMP AS
>> timestamp)
>> AND 2 = "Extent1"."id_status"
>> AND EXISTS (
>> SELECT 1 AS "C1"
>> FROM (
>> SELECT "Extent3"."TagId" FROM
>> "dbo"."ngx_tag_content" AS "Extent3"
>> WHERE "Extent1"."id" =
>> "Extent3"."ContentId"
>> ) AS "Project1"
>> WHERE EXISTS (
>> SELECT 1 AS "C1" FROM (SELECT 1 AS "C")
>> AS "SingleRowTable1"
>> WHERE "Project1"."TagId" = 337139)
>> )
>> AND ("Extent2"."id_path" IN (27495,27554,27555)
>> AND NOT EXISTS (SELECT 1 AS "C1" FROM (SELECT 1 AS "C") AS
>> "SingleRowTable2" WHERE TRUE = FALSE)
>> )
>> *) AS "Project5";*
>> id
>> ----------
>> 13505155
>> 13505187
>> 13505295
>> 13505348
>> 13505552
>> (5 rows)
>>
>> *Time: 486.174 ms*
>>
>> But if I add an ORDER BY and a LIMIT something goes very wrong (Q2):
>>
>> # SELECT "Project5".id
>> FROM (
>> SELECT "Extent1"."id", CAST ("Extent1"."date" AS timestamp) AS "C3"
>> FROM "dbo"."ng_content" AS "Extent1"
>> INNER JOIN "dbo"."ng_path_content" AS "Extent2" ON "Extent1"."id"
>> = "Extent2"."id_content"
>> WHERE "Extent1"."date_from" <= CAST (LOCALTIMESTAMP AS timestamp)
>> AND "Extent1"."date_to" >= CAST (LOCALTIMESTAMP AS
>> timestamp)
>> AND 2 = "Extent1"."id_status"
>> AND EXISTS (
>> SELECT 1 AS "C1"
>> FROM (
>> SELECT "Extent3"."TagId" FROM
>> "dbo"."ngx_tag_content" AS "Extent3"
>> WHERE "Extent1"."id" =
>> "Extent3"."ContentId"
>> ) AS "Project1"
>> WHERE EXISTS (
>> SELECT 1 AS "C1" FROM (SELECT 1 AS "C")
>> AS "SingleRowTable1"
>> WHERE "Project1"."TagId" = 337139)
>> )
>> AND ("Extent2"."id_path" IN (27495,27554,27555)
>> AND NOT EXISTS (SELECT 1 AS "C1" FROM (SELECT 1 AS "C") AS
>> "SingleRowTable2" WHERE TRUE = FALSE)
>> )
>> ) AS "Project5" *ORDER BY "Project5"."C3" DESC LIMIT 6*;
>>
> I think that LIMIT is confusing the planner.
> Forcing a path that in the end is not faster.
>
> Can you try something similar to this?
>
> WITH q AS (
> SELECT "Project5".id
> FROM (
> SELECT "Extent1"."id", CAST ("Extent1"."date" AS timestamp) AS "C3"
> FROM "dbo"."ng_content" AS "Extent1"
> INNER JOIN "dbo"."ng_path_content" AS "Extent2" ON "Extent1"."id"
> = "Extent2"."id_content"
> WHERE "Extent1"."date_from" <= CAST (LOCALTIMESTAMP AS timestamp)
> AND "Extent1"."date_to" >= CAST (LOCALTIMESTAMP AS
> timestamp)
> AND 2 = "Extent1"."id_status"
> AND EXISTS (
> SELECT 1 AS "C1"
> FROM (
> SELECT "Extent3"."TagId" FROM
> "dbo"."ngx_tag_content" AS "Extent3"
> WHERE "Extent1"."id" =
> "Extent3"."ContentId"
> ) AS "Project1"
> WHERE EXISTS (
> SELECT 1 AS "C1" FROM (SELECT 1 AS "C") AS
> "SingleRowTable1"
> WHERE "Project1"."TagId" = 337139)
> )
> AND ("Extent2"."id_path" IN (27495,27554,27555)
> AND NOT EXISTS (SELECT 1 AS "C1" FROM (SELECT 1 AS "C") AS
> "SingleRowTable2" WHERE TRUE = FALSE))
> ))
> SELECT * FROM q ORDER BY q.C3 DESC LIMIT 6;
>
> Probably, using CTE, the plan you want.
>
> regards,
> Ranier Vilela
>

--
Paulo Silva <paulojjs(at)gmail(dot)com>

In response to

Browse pgsql-performance by date

  From Date Subject
Next Message Tom Lane 2022-06-08 14:32:03 Re: Adding non-selective key to jsonb query @> reduces performance?
Previous Message Ranier Vilela 2022-06-08 11:40:10 Re: Strange behavior of limit clause in complex query