partial vacuum - Mailing list pgsql-hackers

From Satoshi Nagayasu
Subject partial vacuum
Date
Msg-id 42323260.3030209@nttdata.co.jp
Whole thread Raw
Responses Re: partial vacuum  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
Hi all,

I'm thinking about "partial (or range) vacuum" feature.

As you know, vacuum process scans and re-organizes a whole table,
so huge cpu load will be generated when vacuuming a large table,
and it will take long time (in some cases, it may take 10 minutes
or more).

Huge vacuum process hits a system performance.
Otherwise, dead tuples also hit the performance.

So, I imagine if the vacuum process can be done partially,
the huge vacuum load can be parted, and the performance penalty
of the vacuum can be reduced(parted).

"partial (or range) vacuum" means vacuuming a part of the table.

For example, if you have 10 Gbytes table,
you can execute vacuum partially, 10 times, 1 Gbytes each.

Attached patch extends vacuum syntax and lazy_scan_heap() function.
Backend can process the partial vacuum command as below:

psql$ vacuum table1 (0, 100);

In the above command, "0" means start block number,
and "100" means end block number of the vacuum scan.

Attached image contains three graphs generated with pgstatpage()
function (also attached).

1.) distribution of freespace of the "tellers" table after pgbench.
2.) after partial vacuum, between 200 block and 400 block.
3.) after pgbench running again.

(X-axis: block number, Y-axis: freespace size of a page)

I think the partial vacuum and intelligent pg_autovacuum
makes postgres backend near to vacuum-less.

Is this interesting?  Any comments?
--
NAGAYASU Satoshi <nagayasus@nttdata.co.jp>
OpenSource Development Center,
NTT DATA Corp. http://www.nttdata.co.jp

diff -ru postgresql-7.4.6.orig/src/backend/commands/vacuum.c postgresql-7.4.6/src/backend/commands/vacuum.c
--- postgresql-7.4.6.orig/src/backend/commands/vacuum.c    2003-10-03 08:19:44.000000000 +0900
+++ postgresql-7.4.6/src/backend/commands/vacuum.c    2005-02-28 20:19:55.000000000 +0900
@@ -743,6 +743,12 @@
     Oid            toast_relid;
     bool        result;

+    if ( vacstmt->range )
+        elog(NOTICE, "vacuum_rel(): range %ld...%ld",
+             vacstmt->range->start, vacstmt->range->end);
+    else
+        elog(NOTICE, "vacuum_rel(): no range.");
+
     /* Begin a transaction for vacuuming this relation */
     StartTransactionCommand();
     SetQuerySnapshot();            /* might be needed for functions in
diff -ru postgresql-7.4.6.orig/src/backend/commands/vacuumlazy.c postgresql-7.4.6/src/backend/commands/vacuumlazy.c
--- postgresql-7.4.6.orig/src/backend/commands/vacuumlazy.c    2003-09-25 15:57:59.000000000 +0900
+++ postgresql-7.4.6/src/backend/commands/vacuumlazy.c    2005-02-28 20:26:08.000000000 +0900
@@ -91,7 +91,8 @@

 /* non-export function prototypes */
 static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
-               Relation *Irel, int nindexes);
+               Relation *Irel, int nindexes,
+               BlockNumber startBlock, BlockNumber endBlock);
 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
 static void lazy_scan_index(Relation indrel, LVRelStats *vacrelstats);
 static void lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats);
@@ -149,7 +150,12 @@
     hasindex = (nindexes > 0);

     /* Do the vacuuming */
-    lazy_scan_heap(onerel, vacrelstats, Irel, nindexes);
+    if ( vacstmt->range )
+        lazy_scan_heap(onerel, vacrelstats, Irel, nindexes,
+                       vacstmt->range->start, vacstmt->range->end);
+    else
+        lazy_scan_heap(onerel, vacrelstats, Irel, nindexes,
+                       0, RelationGetNumberOfBlocks(onerel));

     /* Done with indexes */
     vac_close_indexes(nindexes, Irel);
@@ -184,7 +190,8 @@
  */
 static void
 lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
-               Relation *Irel, int nindexes)
+               Relation *Irel, int nindexes,
+               BlockNumber startBlock, BlockNumber endBlock)
 {
     BlockNumber nblocks,
                 blkno;
@@ -209,13 +216,20 @@
     empty_pages = 0;
     num_tuples = tups_vacuumed = nkeep = nunused = 0;

-    nblocks = RelationGetNumberOfBlocks(onerel);
+//    nblocks = RelationGetNumberOfBlocks(onerel);
+    if ( endBlock < RelationGetNumberOfBlocks(onerel) )
+        nblocks = endBlock;
+    else
+        nblocks = RelationGetNumberOfBlocks(onerel);
+
     vacrelstats->rel_pages = nblocks;
     vacrelstats->nonempty_pages = 0;

     lazy_space_alloc(vacrelstats, nblocks);

-    for (blkno = 0; blkno < nblocks; blkno++)
+    elog(NOTICE, "lazy_vacuum_heap: range %d...%d", startBlock, nblocks);
+
+    for (blkno = startBlock; blkno < nblocks; blkno++)
     {
         Buffer        buf;
         Page        page;
diff -ru postgresql-7.4.6.orig/src/backend/parser/gram.y postgresql-7.4.6/src/backend/parser/gram.y
--- postgresql-7.4.6.orig/src/backend/parser/gram.y    2003-11-25 01:54:15.000000000 +0900
+++ postgresql-7.4.6/src/backend/parser/gram.y    2005-02-28 20:19:17.000000000 +0900
@@ -3981,6 +3981,7 @@
                     n->freeze = $3;
                     n->verbose = $4;
                     n->relation = NULL;
+                    n->range = NULL;
                     n->va_cols = NIL;
                     $$ = (Node *)n;
                 }
@@ -3993,6 +3994,22 @@
                     n->freeze = $3;
                     n->verbose = $4;
                     n->relation = $5;
+                    n->range = NULL;
+                    n->va_cols = NIL;
+                    $$ = (Node *)n;
+                }
+            | VACUUM opt_full opt_freeze opt_verbose qualified_name '(' Iconst ',' Iconst ')'
+                {
+                    VacuumStmt *n = makeNode(VacuumStmt);
+                    n->vacuum = true;
+                    n->analyze = false;
+                    n->full = $2;
+                    n->freeze = $3;
+                    n->verbose = $4;
+                    n->relation = $5;
+                    n->range = makeNode(VacuumRange);
+                    n->range->start = $7;
+                    n->range->end   = $9;
                     n->va_cols = NIL;
                     $$ = (Node *)n;
                 }
@@ -4003,6 +4020,7 @@
                     n->full = $2;
                     n->freeze = $3;
                     n->verbose |= $4;
+                    n->range = NULL;
                     $$ = (Node *)n;
                 }
         ;
@@ -4017,6 +4035,7 @@
                     n->freeze = false;
                     n->verbose = $2;
                     n->relation = NULL;
+                    n->range = NULL;
                     n->va_cols = NIL;
                     $$ = (Node *)n;
                 }
@@ -4029,6 +4048,7 @@
                     n->freeze = false;
                     n->verbose = $2;
                     n->relation = $3;
+                    n->range = NULL;
                     n->va_cols = $4;
                     $$ = (Node *)n;
                 }
diff -ru postgresql-7.4.6.orig/src/include/nodes/nodes.h postgresql-7.4.6/src/include/nodes/nodes.h
--- postgresql-7.4.6.orig/src/include/nodes/nodes.h    2003-08-18 04:58:06.000000000 +0900
+++ postgresql-7.4.6/src/include/nodes/nodes.h    2005-02-28 19:16:39.000000000 +0900
@@ -223,6 +223,7 @@
     T_CreatedbStmt,
     T_DropdbStmt,
     T_VacuumStmt,
+    T_VacuumRange,
     T_ExplainStmt,
     T_CreateSeqStmt,
     T_AlterSeqStmt,
postgresql-7.4.6/src/include/nodes������ȯ��: nodes.h~
diff -ru postgresql-7.4.6.orig/src/include/nodes/parsenodes.h postgresql-7.4.6/src/include/nodes/parsenodes.h
--- postgresql-7.4.6.orig/src/include/nodes/parsenodes.h    2003-09-17 13:25:29.000000000 +0900
+++ postgresql-7.4.6/src/include/nodes/parsenodes.h    2005-02-28 19:48:36.000000000 +0900
@@ -1507,6 +1507,13 @@
     char       *indexname;        /* original index defined */
 } ClusterStmt;

+typedef struct VacuumRange
+{
+    NodeTag        type;
+    long        start;
+    long        end;
+} VacuumRange;
+
 /* ----------------------
  *        Vacuum and Analyze Statements
  *
@@ -1523,6 +1530,7 @@
     bool        freeze;            /* early-freeze option */
     bool        verbose;        /* print progress info */
     RangeVar   *relation;        /* single table to process, or NULL */
+    VacuumRange *range;
     List       *va_cols;        /* list of column names, or NIL for all */
 } VacuumStmt;


Attachment

pgsql-hackers by date:

Previous
From: Nicolai Tufar
Date:
Subject: Re: [pgsql-hackers-win32] snprintf causes regression tests to fail
Next
From: Tom Lane
Date:
Subject: Re: partial vacuum