From c6179c3cf1395884d4a42b5ad983542a3fc4887c Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Tue, 31 Oct 2023 03:52:41 -0400 Subject: [PATCH v2] Currently we do not show any examples of using ctid anywhere, nor do we address the often-requested but problematic use case of having a LIMIT clause on UPDATE and DELETE statements. These examples are a subtle way of addressing both those concerns. --- doc/src/sgml/ref/delete.sgml | 29 +++++++++++++++++++++++++++++ doc/src/sgml/ref/select.sgml | 24 ++++++++++++++++++++++++ doc/src/sgml/ref/update.sgml | 23 +++++++++++++++++++++++ 3 files changed, 76 insertions(+) diff --git a/doc/src/sgml/ref/delete.sgml b/doc/src/sgml/ref/delete.sgml index 1b81b4e7d7..4e08c6c85e 100644 --- a/doc/src/sgml/ref/delete.sgml +++ b/doc/src/sgml/ref/delete.sgml @@ -234,6 +234,35 @@ DELETE FROM films In some cases the join style is easier to write or faster to execute than the sub-select style. + + In situations where a single operation would consume too many resources, + either causing the operation to fail or negatively impacting other workloads, + it may be desirable to break up a large DELETE into + multiple separate commands. While doing this will actually increase the + total amount of work performed, it can break the work into chunks that have + a more acceptable impact on other workloads. The + SQL standard does + not define a LIMIT clause for DELETE + operations, but it is possible get the equivalent functionality through the + USING clause to a + Common Table Expression which identifies + a subset of rows to be deleted, locks those rows, and returns their system + column ctid values: + +WITH delete_batch AS ( + SELECT l.ctid + FROM user_logs AS l + WHERE l.status = 'archived' + ORDER BY l.creation_date + LIMIT 10000 + FOR UPDATE +) +DELETE FROM user_logs AS ul +USING delete_branch AS del +WHERE ul.ctid = del.ctid; + + This allows for flexible search criteria within the CTE and an efficient self-join. + diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 42d78913cf..10e10ea249 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -1679,6 +1679,30 @@ SELECT * FROM (SELECT * FROM mytable FOR UPDATE) ss WHERE col1 = 5; condition is not textually within the sub-query. + + In cases where a DML operation involving many rows + must be performed, and that table experiences numerous other simultaneous + DML operations, a FOR UPDATE clause + used in conjunction with SKIP LOCKED can be useful for + performing partial DML operations: + + +WITH mods AS ( + SELECT ctid FROM mytable + WHERE status = 'active' AND retries > 10 + ORDER BY id FOR UPDATE SKIP LOCKED +) +UPDATE mytable SET status = 'failed' +FROM mods WHERE mytable.ctid = mods.ctid; + + + This allows the DML operation to be performed in parts, avoiding locking, + until such time as the set of rows that remain to be modified is small enough + that the locking will not affect overall performance, at which point the same + statement can be issued without the SKIP LOCKED clause to ensure + that no rows were overlooked. This technique has the additional benefit that it can reduce + the overal bloat of the updated table if the table can be vacuumed in between batch updates. + Previous releases failed to preserve a lock which is upgraded by a later savepoint. For example, this code: diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml index 2ab24b0523..c045d5dd49 100644 --- a/doc/src/sgml/ref/update.sgml +++ b/doc/src/sgml/ref/update.sgml @@ -442,6 +442,29 @@ COMMIT; UPDATE films SET kind = 'Dramatic' WHERE CURRENT OF c_films; + + + To break up a large UPDATE into more manageable pieces, + it is possible to do a self-join on the + ctid system column using + Common Table Expression to limit the + number of rows to be updated: + +WITH exceeded_max_retries AS ( + SELECT w.ctid + FROM work_item AS w + WHERE w.status = 'active' + AND w.num_retries > 10 + ORDER BY w.retry_timestamp + FOR UPDATE + LIMIT 5000 +) +UPDATE work_item +SET status = 'failed' +FROM exceeded_max_retries AS emr +WHERE work_item.ctid = emr.ctid + + -- 2.41.0