BUG #14780: PostgreSQL 9.6 selects a wrong plan during aggregation against timestamp columns

From: sogawa(at)yandex(dot)ru
To: pgsql-bugs(at)postgresql(dot)org
Cc: sogawa(at)yandex(dot)ru
Subject: BUG #14780: PostgreSQL 9.6 selects a wrong plan during aggregation against timestamp columns
Date: 2017-08-16 01:47:04
Message-ID: 20170816014704.27360.35941@wrigleys.postgresql.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

The following bug has been logged on the website:

Bug reference: 14780
Logged by: sogawa-sps
Email address: sogawa(at)yandex(dot)ru
PostgreSQL version: 9.6.2
Operating system: Windows 10
Description:

PostgreSQL 9.6 selects a wrong plan during aggregation against indexed
timestamp columns while it's ok for other types.

Given: table “log” that has three columns: user_id, day, hours.

user_id character varying(36) COLLATE pg_catalog."default" NOT NULL,
day timestamp without time zone,
hours double precision

All columns have indexes.

The issue is that aggregation against the 'day' field works extremely slow
because makes a full scan filtering the entries that doesn’t relate to
user_id = 'ab056f5a-390b-41d7-ba56-897c14b679bf'

select min(day) from log where user_id =
'ab056f5a-390b-41d7-ba56-897c14b679bf'

[
{
"Execution Time": 146502.05,
"Planning Time": 0.893,
"Plan": {
"Startup Cost": 789.02,
"Actual Rows": 1,
"Plans": [
{
"Startup Cost": 0.44,
"Actual Rows": 1,
"Plans": [
{
"Index Cond": "(log.day IS NOT NULL)",
"Startup Cost": 0.44,
"Scan Direction": "Forward",
"Plan Width": 8,
"Rows Removed by Index Recheck": 0,
"Actual Rows": 1,
"Node Type": "Index Scan",
"Total Cost": 1395792.54,
"Plan Rows": 1770,
"Relation Name": "log",
"Alias": "log",
"Parallel Aware": false,
"Actual Total Time": 146502.015,
"Output": [
"log.day"
],
"Parent Relationship": "Outer",
"Actual Startup Time": 146502.015,
"Schema": "public",
"Filter": "((log.user_id)::text =
'ab056f5a-390b-41d7-ba56-897c14b679bf'::text)",
"Actual Loops": 1,
"Rows Removed by Filter": 12665610,
"Index Name": "index_log_day"
}
],
"Node Type": "Limit",
"Plan Rows": 1,
"Parallel Aware": false,
"Actual Total Time": 146502.016,
"Output": [
"log.day"
],
"Parent Relationship": "InitPlan",
"Actual Startup Time": 146502.016,
"Plan Width": 8,
"Subplan Name": "InitPlan 1 (returns $0)",
"Actual Loops": 1,
"Total Cost": 789.02
}
],
"Node Type": "Result",
"Plan Rows": 1,
"Parallel Aware": false,
"Actual Total Time": 146502.019,
"Output": [
"$0"
],
"Actual Startup Time": 146502.019,
"Plan Width": 8,
"Actual Loops": 1,
"Total Cost": 789.03
},
"Triggers": []
}
]

However the almost similar query but for the double type has a correct.

select min(hours) from log where user_id =
'ab056f5a-390b-41d7-ba56-897c14b679bf'

Server selects entries for user_id = 'ab056f5a-390b-41d7-ba56-897c14b679bf'
first and then aggregates among them what is correct

[
{
"Execution Time": 5.989,
"Planning Time": 1.186,
"Plan": {
"Partial Mode": "Simple",
"Startup Cost": 6842.66,
"Actual Rows": 1,
"Plans": [
{
"Startup Cost": 66.28,
"Plan Width": 8,
"Rows Removed by Index Recheck": 0,
"Actual Rows": 745,
"Plans": [
{
"Startup Cost": 0,
"Plan Width": 0,
"Actual Rows": 745,
"Node Type": "Bitmap Index Scan",
"Index Cond": "((log.user_id)::text =
'ab056f5a-390b-41d7-ba56-897c14b679bf'::text)",
"Plan Rows": 1770,
"Parallel Aware": false,
"Actual Total Time": 0.25,
"Parent Relationship": "Outer",
"Actual Startup Time": 0.25,
"Total Cost": 65.84,
"Actual Loops": 1,
"Index Name": "index_log_user_id"
}
],
"Recheck Cond": "((log.user_id)::text =
'ab056f5a-390b-41d7-ba56-897c14b679bf'::text)",
"Exact Heap Blocks": 742,
"Node Type": "Bitmap Heap Scan",
"Plan Rows": 1770,
"Relation Name": "log",
"Alias": "log",
"Parallel Aware": false,
"Actual Total Time": 5.793,
"Output": [
"day",
"hours",
"user_id"
],
"Lossy Heap Blocks": 0,
"Parent Relationship": "Outer",
"Actual Startup Time": 0.357,
"Total Cost": 6838.23,
"Actual Loops": 1,
"Schema": "public"
}
],
"Node Type": "Aggregate",
"Strategy": "Plain",
"Plan Rows": 1,
"Parallel Aware": false,
"Actual Total Time": 5.946,
"Output": [
"min(hours)"
],
"Actual Startup Time": 5.946,
"Plan Width": 8,
"Actual Loops": 1,
"Total Cost": 6842.67
},
"Triggers": []
}
]

Optimizer have to select correct plan for the timestamp fields like it does
for double.

WA: Rewrite query into:

select user_id, min(day) from log where user_id =
'ac43a155-4fbb-49eb-a670-02c307eb3d4f' group by user_id

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message Asif Shaikh 2017-08-16 07:08:38 Postgre driver(9.06.04.00) connected to Amazon redshift, is returning wrong count in SQLRowCount
Previous Message Thomas S. Chin 2017-08-15 15:38:15 Re: BUG #14776: ecpg 4.12.0 issues with macros containing line continued blocks