BUG #8464: Update with subquery with limit occasionally updates too many rows

From: jturkel(at)salsify(dot)com
To: pgsql-bugs(at)postgresql(dot)org
Subject: BUG #8464: Update with subquery with limit occasionally updates too many rows
Date: 2013-09-20 18:04:00
Message-ID: E1VN53g-0002Iy-Il@wrigleys.postgresql.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

The following bug has been logged on the website:

Bug reference: 8464
Logged by: Joel Turkel
Email address: jturkel(at)salsify(dot)com
PostgreSQL version: 9.2.4
Operating system: Ubunutu 12.04 (64-bit)/Kernel version 3.2
Description:

An update statement in my application that uses a limit subquery to select a
single primary key value for a row to update occasionally updates multiple
rows. We've only noticed this problem in our test automation runs and it
only occurs roughly every 25-50 test runs with no discernible pattern for
when it occurs. Attempts to isolate the problem into a consistently
reproducible test case have been unsuccessful. The schema for the relevant
tables in question are:

CREATE TABLE delayed_jobs
(
id serial NOT NULL,
priority integer DEFAULT 0,
attempts integer DEFAULT 0,
handler text,
last_error text,
run_at timestamp without time zone,
locked_at timestamp without time zone,
failed_at timestamp without time zone,
locked_by character varying(255),
queue character varying(255),
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL,
job_group_id integer,
blocked boolean NOT NULL DEFAULT false,
CONSTRAINT delayed_jobs_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);

CREATE INDEX delayed_jobs_priority
ON delayed_jobs
USING btree
(priority, run_at)
WHERE failed_at IS NULL AND blocked = false;

CREATE INDEX index_delayed_jobs_on_job_group_id
ON delayed_jobs
USING btree
(job_group_id);

CREATE TABLE delayed_job_groups
(
id serial NOT NULL,
on_completion_job text,
on_completion_job_options text,
queueing_complete boolean NOT NULL DEFAULT false,
blocked boolean NOT NULL DEFAULT false,
CONSTRAINT delayed_job_groups_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);

Here's a log of the SQL statements run during a recent test failure (there
were hundreds of tests run before this so including all statements run since
startup would be a bit unwieldy):

SAVEPOINT active_record_1
INSERT INTO "delayed_job_groups" ("blocked", "on_completion_job",
"on_completion_job_options", "queueing_complete") VALUES ($1, $2, $3, $4)
RETURNING "id" [["blocked", false], ["on_completion_job", "---
!ruby/object:CompletionJob {}\n"], ["on_completion_job_options", nil],
["queueing_complete", false]]
RELEASE SAVEPOINT active_record_1

SAVEPOINT active_record_1
INSERT INTO "delayed_jobs" ("attempts", "blocked", "created_at",
"failed_at", "handler", "job_group_id", "last_error", "locked_at",
"locked_by", "priority", "queue", "run_at", "updated_at") VALUES ($1, $2,
$3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) RETURNING "id"
[["attempts", 0], ["blocked", false], ["created_at", Fri, 20 Sep 2013
00:08:47 UTC +00:00], ["failed_at", nil], ["handler", "---
!ruby/object:FailingJob {}\n"], ["job_group_id", 920], ["last_error", nil],
["locked_at", nil], ["locked_by", nil], ["priority", 0], ["queue", nil],
["run_at", Fri, 20 Sep 2013 00:08:47 UTC +00:00], ["updated_at", Fri, 20 Sep
2013 00:08:47 UTC +00:00]]
-- Inserts row with 254
RELEASE SAVEPOINT active_record_1

SAVEPOINT active_record_1
INSERT INTO "delayed_jobs" ("attempts", "blocked", "created_at",
"failed_at", "handler", "job_group_id", "last_error", "locked_at",
"locked_by", "priority", "queue", "run_at", "updated_at") VALUES ($1, $2,
$3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) RETURNING "id" [0m
[["attempts", 0], ["blocked", false], ["created_at", Fri, 20 Sep 2013
00:08:47 UTC +00:00], ["failed_at", nil], ["handler", "---
!ruby/object:NoOpJob {}\n"], ["job_group_id", 920], ["last_error", nil],
["locked_at", nil], ["locked_by", nil], ["priority", 0], ["queue", nil],
["run_at", Fri, 20 Sep 2013 00:08:47 UTC +00:00], ["updated_at", Fri, 20 Sep
2013 00:08:47 UTC +00:00]]
-- Inserts row with id 254
RELEASE SAVEPOINT active_record_1

SAVEPOINT active_record_1
SELECT "delayed_job_groups".* FROM "delayed_job_groups" WHERE
"delayed_job_groups"."id" = $1 LIMIT 1 FOR UPDATE [["id", 920]]
UPDATE "delayed_job_groups" SET "queueing_complete" = 't' WHERE
"delayed_job_groups"."id" = 920
SELECT 1 AS one FROM "delayed_jobs" WHERE "delayed_jobs"."job_group_id" IN
(920) AND "delayed_jobs"."failed_at" IS NULL LIMIT 1
RELEASE SAVEPOINT active_record_1

SELECT COUNT(*) FROM "delayed_jobs" WHERE "delayed_jobs"."failed_at" IS
NULL
SELECT COUNT(*) FROM "delayed_job_groups"

UPDATE "delayed_jobs" SET locked_at = '2013-09-20 00:08:47.540592',
locked_by = 'host:box239 pid:11983' WHERE id IN (SELECT id FROM
"delayed_jobs" WHERE "delayed_jobs"."blocked" = 'f' AND ((run_at <=
'2013-09-20 00:08:47.540120' AND (locked_at IS NULL OR locked_at <
'2013-09-19 20:08:47.540135') OR locked_by = 'host:box239 pid:11983') AND
failed_at IS NULL) ORDER BY priority ASC, run_at ASC LIMIT 1 FOR UPDATE)
RETURNING *
-- Updates both rows 253 and 254 but the limit should result in only one row
being updated

There is no concurrent access to the database during the test in question. I
realize I haven't given you guys a ton of information to work off but any
help in tracking this down would be greatly appreciated.

Thanks,
Joel

Browse pgsql-bugs by date

  From Date Subject
Next Message fburgess 2013-09-20 20:51:43 Installing/Upgrading PostgreSQL 9.1.6 to 9.3 known bugs?
Previous Message John R Pierce 2013-09-20 17:42:39 Re: Known issues for PostgreSQL server 8.1.19