Thread: Reduce heap tuple header size

Reduce heap tuple header size

From
Manfred Koizar
Date:
This patch, which is built upon the "HeapTupleHeader accessor macros"
patch from 2002-06-10, is supposed to reduce the heap tuple header size
by four bytes on most architectures.  Of course it changes the on-disk
tuple format and therefore requires initdb.  As I have (once more)
opened my mouth too wide, I'll have to provide a heap file conversion
utility, if this patch gets accepted...  More on this later.

======================
 All 81 tests passed.
======================

It's late now, I'll do more tests tomorrow.

Good night
 Manfred

diff -ru ../orig/src/backend/access/heap/heapam.c src/backend/access/heap/heapam.c
--- ../orig/src/backend/access/heap/heapam.c    2002-06-13 19:34:48.000000000 +0200
+++ src/backend/access/heap/heapam.c    2002-06-13 22:31:42.000000000 +0200
@@ -2204,7 +2204,7 @@
         htup->t_infomask = HEAP_XMAX_INVALID | xlhdr.mask;
         HeapTupleHeaderSetXmin(htup, record->xl_xid);
         HeapTupleHeaderSetCmin(htup, FirstCommandId);
-        HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
+        HeapTupleHeaderSetXmaxInvalid(htup);
         HeapTupleHeaderSetCmax(htup, FirstCommandId);

         offnum = PageAddItem(page, (Item) htup, newlen, offnum,
diff -ru ../orig/src/include/access/htup.h src/include/access/htup.h
--- ../orig/src/include/access/htup.h    2002-06-13 19:34:49.000000000 +0200
+++ src/include/access/htup.h    2002-06-14 01:12:47.000000000 +0200
@@ -57,15 +57,24 @@
  * Also note that we omit the nulls bitmap if t_infomask shows that there
  * are no nulls in the tuple.
  */
+/*
+** We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac
+** in three physical fields t_xmin, t_cid, t_xmax:
+** CommandId     Cmin;        insert CID stamp
+** CommandId     Cmax;        delete CommandId stamp
+** TransactionId Xmin;        insert XID stamp
+** TransactionId Xmax;        delete XID stamp
+** TransactionId Xvac;        used by VACCUUM
+**
+** This assumes, that a CommandId can be stored in a TransactionId.
+*/
 typedef struct HeapTupleHeaderData
 {
     Oid            t_oid;            /* OID of this tuple -- 4 bytes */

-    CommandId    t_cmin;            /* insert CID stamp -- 4 bytes each */
-    CommandId    t_cmax;            /* delete CommandId stamp */
-
-    TransactionId t_xmin;        /* insert XID stamp -- 4 bytes each */
-    TransactionId t_xmax;        /* delete XID stamp */
+    TransactionId t_xmin;        /* Xmin -- 4 bytes each */
+    TransactionId t_cid;        /* Cmin, Cmax, Xvac */
+    TransactionId t_xmax;        /* Xmax, Cmax */

     ItemPointerData t_ctid;        /* current TID of this or newer tuple */

@@ -75,7 +84,7 @@

     uint8        t_hoff;            /* sizeof header incl. bitmap, padding */

-    /* ^ - 31 bytes - ^ */
+    /* ^ - 27 bytes - ^ */

     bits8        t_bits[1];        /* bitmap of NULLs -- VARIABLE LENGTH */

@@ -96,6 +105,8 @@
                                          * attribute(s) */
 #define HEAP_HASEXTENDED        0x000C    /* the two above combined */

+#define HEAP_XMIN_IS_XMAX        0x0040    /* created and deleted in the */
+                                        /* same transaction
*/
 #define HEAP_XMAX_UNLOGGED        0x0080    /* to lock tuple for update */
                                         /* without logging
*/
 #define HEAP_XMIN_COMMITTED        0x0100    /* t_xmin committed */
@@ -108,6 +119,7 @@
                                          * vacuum */
 #define HEAP_MOVED_IN            0x8000    /* moved from another place by
                                          * vacuum */
+#define HEAP_MOVED (HEAP_MOVED_OFF | HEAP_MOVED_IN)

 #define HEAP_XACT_MASK            0xFFF0    /* visibility-related bits */

@@ -116,53 +128,100 @@
 /* HeapTupleHeader accessor macros */

 #define HeapTupleHeaderGetXmin(tup) \
-    ((tup)->t_xmin)
+( \
+    (tup)->t_xmin \
+)

 #define HeapTupleHeaderGetXmax(tup) \
-    ((tup)->t_xmax)
+( \
+    ((tup)->t_infomask & HEAP_XMIN_IS_XMAX) ? \
+        (tup)->t_xmin \
+    : \
+        (tup)->t_xmax \
+)

-/* no AssertMacro, because this is read as a system-defined attribute also */
+/* no AssertMacro, because this is read as a system-defined attribute */
 #define HeapTupleHeaderGetCmin(tup) \
 ( \
-    (tup)->t_cmin \
+    ((tup)->t_infomask & HEAP_MOVED) ? \
+        FirstCommandId \
+    : \
+    ( \
+        ((tup)->t_infomask & (HEAP_XMIN_IS_XMAX | HEAP_XMAX_INVALID)) ? \
+            (CommandId) (tup)->t_cid \
+        : \
+            FirstCommandId \
+    ) \
 )

 #define HeapTupleHeaderGetCmax(tup) \
-    ((tup)->t_cmax)
+( \
+    ((tup)->t_infomask & HEAP_MOVED) ? \
+        FirstCommandId \
+    : \
+    ( \
+        ((tup)->t_infomask & (HEAP_XMIN_IS_XMAX | HEAP_XMAX_INVALID)) ? \
+            (CommandId) (tup)->t_xmax \
+        : \
+            (CommandId) (tup)->t_cid \
+    ) \
+)

 #define HeapTupleHeaderGetXvac(tup) \
 ( \
-    AssertMacro((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF)), \
-    (TransactionId) (tup)->t_cmin \
+    AssertMacro((tup)->t_infomask & HEAP_MOVED), \
+    (tup)->t_cid \
 )


 #define HeapTupleHeaderSetXmin(tup, xid) \
-    (TransactionIdStore((xid), &(tup)->t_xmin))
+( \
+    TransactionIdStore((xid), &(tup)->t_xmin) \
+)

 #define HeapTupleHeaderSetXminInvalid(tup) \
-    (StoreInvalidTransactionId(&(tup)->t_xmin))
+do { \
+    (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
+    StoreInvalidTransactionId(&(tup)->t_xmin); \
+} while (0)

 #define HeapTupleHeaderSetXmax(tup, xid) \
-    (TransactionIdStore((xid), &(tup)->t_xmax))
+do { \
+    if (TransactionIdEquals((tup)->t_xmin, (xid))) \
+        (tup)->t_infomask |= HEAP_XMIN_IS_XMAX; \
+    else \
+    { \
+        (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
+        TransactionIdStore((xid), &(tup)->t_xmax); \
+    } \
+} while (0)

 #define HeapTupleHeaderSetXmaxInvalid(tup) \
-    (StoreInvalidTransactionId(&(tup)->t_xmax))
+do { \
+    (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
+    StoreInvalidTransactionId(&(tup)->t_xmax); \
+} while (0)

 #define HeapTupleHeaderSetCmin(tup, cid) \
-( \
-    AssertMacro(!((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF))), \
-    (tup)->t_cmin = (cid) \
-)
+do { \
+    Assert(!((tup)->t_infomask & HEAP_MOVED)); \
+    TransactionIdStore((TransactionId) (cid), &(tup)->t_cid); \
+} while (0)

 #define HeapTupleHeaderSetCmax(tup, cid) \
-    ((tup)->t_cmax = (cid))
+do { \
+    Assert(!((tup)->t_infomask & HEAP_MOVED)); \
+    if ((tup)->t_infomask & HEAP_XMIN_IS_XMAX) \
+        TransactionIdStore((TransactionId) (cid), &(tup)->t_xmax); \
+    else \
+        TransactionIdStore((TransactionId) (cid), &(tup)->t_cid); \
+} while (0)

 #define HeapTupleHeaderSetXvac(tup, xid) \
-( \
-    AssertMacro((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF)), \
-    TransactionIdStore((xid), (TransactionId *) &((tup)->t_cmin)) \
-)
+do { \
+    Assert((tup)->t_infomask & HEAP_MOVED); \
+    TransactionIdStore((xid), &(tup)->t_cid); \
+} while (0)


 /*


Re: Reduce heap tuple header size

From
Tom Lane
Date:
Manfred Koizar <mkoi-pg@aon.at> writes:
> This patch, which is built upon the "HeapTupleHeader accessor macros"
> patch from 2002-06-10, is supposed to reduce the heap tuple header size
> by four bytes on most architectures.  Of course it changes the on-disk
> tuple format and therefore requires initdb.

As I commented before, I am not in favor of this.  I don't think that a
four-byte savings justifies a forced initdb with no chance of
pg_upgrade, plus loss of redundancy (= reduced chance of detecting or
recovering from corruption), plus significantly slower access to
several critical header fields.  The tqual.c routines are already
hotspots in many scenarios.  I believe this will make them noticeably
slower.

            regards, tom lane

Re: Reduce heap tuple header size

From
Manfred Koizar
Date:
On Fri, 14 Jun 2002 10:16:22 -0400, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>As I commented before, I am not in favor of this.  I don't think that a
>four-byte savings justifies a forced initdb with no chance of
>pg_upgrade,

As I don't know other users' preferences, I cannot comment on this.
I just think that four bytes per tuple can amount to a sum that justifies
this effort.  Disk space is not my argument here, but reduced disk IO is.

>plus significantly slower access to
>several critical header fields.  The tqual.c routines are already
>hotspots in many scenarios.  I believe this will make them noticeably
>slower.

Significantly slower?  I tried to analyze HeapTupleSatisfiesUpdate(),
as I think it is the most complicated of these Satisfies functions
(look for the !! comments):

/*
!! HeapTupleHeaderGetXmin  no slowdown
!! HeapTupleHeaderGetXmax  one infomask compare
!! HeapTupleHeaderGetCmin  two infomask compares
!! HeapTupleHeaderGetCMax  two infomask compares
!! HeapTupleHeaderGetXvac  no slowdown
*/
int
HeapTupleSatisfiesUpdate(HeapTuple htuple, CommandId curcid)
{
    HeapTupleHeader tuple = htuple->t_data;

    if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
    {
        if (tuple->t_infomask & HEAP_XMIN_INVALID)
            /*
            !! no slowdown
            */
            return HeapTupleInvisible;

        if (tuple->t_infomask & HEAP_MOVED_OFF)
        {
            if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXvac(tuple)))
                /*
                !! no slowdown
                */
                return HeapTupleInvisible;
            if (!TransactionIdIsInProgress(HeapTupleHeaderGetXvac(tuple)))
            {
                if (TransactionIdDidCommit(HeapTupleHeaderGetXvac(tuple)))
                {
                    tuple->t_infomask |= HEAP_XMIN_INVALID;
                    /*
                    !! no slowdown
                    */
                    return HeapTupleInvisible;
                }
                tuple->t_infomask |= HEAP_XMIN_COMMITTED;
            }
        }
        else if (tuple->t_infomask & HEAP_MOVED_IN)
        {
            if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXvac(tuple)))
            {
                if (TransactionIdIsInProgress(HeapTupleHeaderGetXvac(tuple)))
                    /*
                    !! no slowdown
                    */
                    return HeapTupleInvisible;
                if (TransactionIdDidCommit(HeapTupleHeaderGetXvac(tuple)))
                    tuple->t_infomask |= HEAP_XMIN_COMMITTED;
                else
                {
                    tuple->t_infomask |= HEAP_XMIN_INVALID;
                    /*
                    !! no slowdown
                    */
                    return HeapTupleInvisible;
                }
            }
        }
        /*
        !! no slowdown up to here
        */
        else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple)))
        {
            /*
            !! GetCmin does 2 infomask compares
            */
            if (HeapTupleHeaderGetCmin(tuple) >= curcid)
                /*
                !! returning with 2 additional infomask compares
                */
                return HeapTupleInvisible;        /* inserted after scan
                                                 * started */

            if (tuple->t_infomask & HEAP_XMAX_INVALID)        /* xid invalid */
                /*
                !! returning with 2 additional infomask compares
                */
                return HeapTupleMayBeUpdated;

            /*
            !! assertions turned off in production: no slowdown
            */
            Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)));

            if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE)
                /*
                !! returning with 2 additional infomask compares
                */
                return HeapTupleMayBeUpdated;

            /*
            !! GetCmax does 2 infomask compares
            */
            if (HeapTupleHeaderGetCmax(tuple) >= curcid)
                /*
                !! returning with 4 additional infomask compares
                */
                return HeapTupleSelfUpdated;    /* updated after scan
                                                 * started */
            else
                /*
                !! returning with 4 additional infomask compares
                */
                return HeapTupleInvisible;        /* updated before scan
                                                 * started */
        }
        /*
        !! no slowdown up to here
        */
        else if (!TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
        {
            if (TransactionIdDidAbort(HeapTupleHeaderGetXmin(tuple)))
                tuple->t_infomask |= HEAP_XMIN_INVALID;    /* aborted */
            /*
            !! no slowdown
            */
            return HeapTupleInvisible;
        }
        else
            tuple->t_infomask |= HEAP_XMIN_COMMITTED;
    }

    /*
    !! no slowdown
    */
    /* by here, the inserting transaction has committed */

    if (tuple->t_infomask & HEAP_XMAX_INVALID)        /* xid invalid or aborted */
        /*
        !! no slowdown
        */
        return HeapTupleMayBeUpdated;

    if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
    {
        if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE)
            /*
            !! no slowdown
            ?? BTW, shouldn't we set HEAP_MAX_INVALID here?
            */
            return HeapTupleMayBeUpdated;
        /*
        !! no slowdown
        */
        return HeapTupleUpdated;    /* updated by other */
    }

    /*
    !! no slowdown up to here,
    !! one infomask compare to follow in GetXmax(),
    !! additional waste of precious CPU cycles could be avoided by:
    !! TransactionId xmax = HeapTupleHeaderGetXmax(tuple);
    */
    if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
    {
        if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE)
            /*
            !! returning with 1 additional infomask compare
            */
            return HeapTupleMayBeUpdated;
        /*
        !! GetCmax does 2 infomask compares
        */
        if (HeapTupleHeaderGetCmax(tuple) >= curcid)
            /*
            !! returning with 3 additional infomask compares
            */
            return HeapTupleSelfUpdated;        /* updated after scan
                                                 * started */
        else
            /*
            !! returning with 3 additional infomask compares
            */
            return HeapTupleInvisible;    /* updated before scan started */
    }

    /*
    !! 1 infomask compare up to here,
    !! another infomask compare ...
    !! or use xmax
    */
    if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
    {
        /*
        !! and a third infomask compare
        */
        if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple)))
        {
            tuple->t_infomask |= HEAP_XMAX_INVALID;        /* aborted */
            /*
            !! returning with 1 or 3 additional infomask compares
            */
            return HeapTupleMayBeUpdated;
        }
        /* running xact */
        /*
        !! returning with 1 or 3 additional infomask compares
        */
        return HeapTupleBeingUpdated;    /* in updation by other */
    }

    /*
    !! 2 (or 1 with a local xmax) infomask compares up to here
    */
    /* xmax transaction committed */
    tuple->t_infomask |= HEAP_XMAX_COMMITTED;

    if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE)
        /*
        ?? BTW, shouldn't we set HEAP_MAX_INVALID here?
        */
        return HeapTupleMayBeUpdated;

    return HeapTupleUpdated;    /* updated by other */
}

So in the worst case we return after having done four more
compares than without the patch.  Note that in the most common
cases there is no additional cost at all.  If you still think
we have a performance problem here, we could replace GetCmin
and GetCmax by cheaper macros:

#define HeapTupleHeaderGetCminKnowingThatNotMoved(tup) \
( \
    AssertMacro(!((tup)->t_infomask & HEAP_MOVED)),
    ( \
        ((tup)->t_infomask & (HEAP_XMIN_IS_XMAX | HEAP_XMAX_INVALID)) ? \
            (CommandId) (tup)->t_cid \
        : \
            FirstCommandId \
    ) \
)

thus reducing the additional cost to one t_infomask compare,
because the Satisfies functions only access Cmin and Cmax,
when HEAP_MOVED is known to be not set.

OTOH experimenting with a moderatly sized "out of production"
database I got the following results:
                         | pages | pages |
relkind | count | tuples | before| after | savings
--------+-------+--------+-------+-------+--------
i       |    31 | 808146 |  8330 |  8330 |   0.00%
r       |    32 | 612968 | 13572 | 13184 |   2.86%
all     |    63 |        | 21902 | 21514 |   1.77%

2.86% fewer heap pages mean 2.86% less disk IO caused by heap pages.
Considering that index pages tend to benefit more from caching
we conclude that heap pages contribute more to the overall
IO load, so the total savings in the number of disk IOs should
be better than the 1.77% shown in the table above.  I think
this outweighs a few CPU cycles now and then.

Servus
 Manfred

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Your patch has been added to the PostgreSQL unapplied patches list at:

    http://candle.pha.pa.us/cgi-bin/pgpatches

I will try to apply it within the next 48 hours.

---------------------------------------------------------------------------


Manfred Koizar wrote:
> This patch, which is built upon the "HeapTupleHeader accessor macros"
> patch from 2002-06-10, is supposed to reduce the heap tuple header size
> by four bytes on most architectures.  Of course it changes the on-disk
> tuple format and therefore requires initdb.  As I have (once more)
> opened my mouth too wide, I'll have to provide a heap file conversion
> utility, if this patch gets accepted...  More on this later.
>
> ======================
>  All 81 tests passed.
> ======================
>
> It's late now, I'll do more tests tomorrow.
>
> Good night
>  Manfred
>
> diff -ru ../orig/src/backend/access/heap/heapam.c src/backend/access/heap/heapam.c
> --- ../orig/src/backend/access/heap/heapam.c    2002-06-13 19:34:48.000000000 +0200
> +++ src/backend/access/heap/heapam.c    2002-06-13 22:31:42.000000000 +0200
> @@ -2204,7 +2204,7 @@
>          htup->t_infomask = HEAP_XMAX_INVALID | xlhdr.mask;
>          HeapTupleHeaderSetXmin(htup, record->xl_xid);
>          HeapTupleHeaderSetCmin(htup, FirstCommandId);
> -        HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
> +        HeapTupleHeaderSetXmaxInvalid(htup);
>          HeapTupleHeaderSetCmax(htup, FirstCommandId);
>
>          offnum = PageAddItem(page, (Item) htup, newlen, offnum,
> diff -ru ../orig/src/include/access/htup.h src/include/access/htup.h
> --- ../orig/src/include/access/htup.h    2002-06-13 19:34:49.000000000 +0200
> +++ src/include/access/htup.h    2002-06-14 01:12:47.000000000 +0200
> @@ -57,15 +57,24 @@
>   * Also note that we omit the nulls bitmap if t_infomask shows that there
>   * are no nulls in the tuple.
>   */
> +/*
> +** We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac
> +** in three physical fields t_xmin, t_cid, t_xmax:
> +** CommandId     Cmin;        insert CID stamp
> +** CommandId     Cmax;        delete CommandId stamp
> +** TransactionId Xmin;        insert XID stamp
> +** TransactionId Xmax;        delete XID stamp
> +** TransactionId Xvac;        used by VACCUUM
> +**
> +** This assumes, that a CommandId can be stored in a TransactionId.
> +*/
>  typedef struct HeapTupleHeaderData
>  {
>      Oid            t_oid;            /* OID of this tuple -- 4 bytes */
>
> -    CommandId    t_cmin;            /* insert CID stamp -- 4 bytes each */
> -    CommandId    t_cmax;            /* delete CommandId stamp */
> -
> -    TransactionId t_xmin;        /* insert XID stamp -- 4 bytes each */
> -    TransactionId t_xmax;        /* delete XID stamp */
> +    TransactionId t_xmin;        /* Xmin -- 4 bytes each */
> +    TransactionId t_cid;        /* Cmin, Cmax, Xvac */
> +    TransactionId t_xmax;        /* Xmax, Cmax */
>
>      ItemPointerData t_ctid;        /* current TID of this or newer tuple */
>
> @@ -75,7 +84,7 @@
>
>      uint8        t_hoff;            /* sizeof header incl. bitmap, padding */
>
> -    /* ^ - 31 bytes - ^ */
> +    /* ^ - 27 bytes - ^ */
>
>      bits8        t_bits[1];        /* bitmap of NULLs -- VARIABLE LENGTH */
>
> @@ -96,6 +105,8 @@
>                                           * attribute(s) */
>  #define HEAP_HASEXTENDED        0x000C    /* the two above combined */
>
> +#define HEAP_XMIN_IS_XMAX        0x0040    /* created and deleted in the */
> +                                        /* same transaction
> */
>  #define HEAP_XMAX_UNLOGGED        0x0080    /* to lock tuple for update */
>                                          /* without logging
> */
>  #define HEAP_XMIN_COMMITTED        0x0100    /* t_xmin committed */
> @@ -108,6 +119,7 @@
>                                           * vacuum */
>  #define HEAP_MOVED_IN            0x8000    /* moved from another place by
>                                           * vacuum */
> +#define HEAP_MOVED (HEAP_MOVED_OFF | HEAP_MOVED_IN)
>
>  #define HEAP_XACT_MASK            0xFFF0    /* visibility-related bits */
>
> @@ -116,53 +128,100 @@
>  /* HeapTupleHeader accessor macros */
>
>  #define HeapTupleHeaderGetXmin(tup) \
> -    ((tup)->t_xmin)
> +( \
> +    (tup)->t_xmin \
> +)
>
>  #define HeapTupleHeaderGetXmax(tup) \
> -    ((tup)->t_xmax)
> +( \
> +    ((tup)->t_infomask & HEAP_XMIN_IS_XMAX) ? \
> +        (tup)->t_xmin \
> +    : \
> +        (tup)->t_xmax \
> +)
>
> -/* no AssertMacro, because this is read as a system-defined attribute also */
> +/* no AssertMacro, because this is read as a system-defined attribute */
>  #define HeapTupleHeaderGetCmin(tup) \
>  ( \
> -    (tup)->t_cmin \
> +    ((tup)->t_infomask & HEAP_MOVED) ? \
> +        FirstCommandId \
> +    : \
> +    ( \
> +        ((tup)->t_infomask & (HEAP_XMIN_IS_XMAX | HEAP_XMAX_INVALID)) ? \
> +            (CommandId) (tup)->t_cid \
> +        : \
> +            FirstCommandId \
> +    ) \
>  )
>
>  #define HeapTupleHeaderGetCmax(tup) \
> -    ((tup)->t_cmax)
> +( \
> +    ((tup)->t_infomask & HEAP_MOVED) ? \
> +        FirstCommandId \
> +    : \
> +    ( \
> +        ((tup)->t_infomask & (HEAP_XMIN_IS_XMAX | HEAP_XMAX_INVALID)) ? \
> +            (CommandId) (tup)->t_xmax \
> +        : \
> +            (CommandId) (tup)->t_cid \
> +    ) \
> +)
>
>  #define HeapTupleHeaderGetXvac(tup) \
>  ( \
> -    AssertMacro((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF)), \
> -    (TransactionId) (tup)->t_cmin \
> +    AssertMacro((tup)->t_infomask & HEAP_MOVED), \
> +    (tup)->t_cid \
>  )
>
>
>  #define HeapTupleHeaderSetXmin(tup, xid) \
> -    (TransactionIdStore((xid), &(tup)->t_xmin))
> +( \
> +    TransactionIdStore((xid), &(tup)->t_xmin) \
> +)
>
>  #define HeapTupleHeaderSetXminInvalid(tup) \
> -    (StoreInvalidTransactionId(&(tup)->t_xmin))
> +do { \
> +    (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
> +    StoreInvalidTransactionId(&(tup)->t_xmin); \
> +} while (0)
>
>  #define HeapTupleHeaderSetXmax(tup, xid) \
> -    (TransactionIdStore((xid), &(tup)->t_xmax))
> +do { \
> +    if (TransactionIdEquals((tup)->t_xmin, (xid))) \
> +        (tup)->t_infomask |= HEAP_XMIN_IS_XMAX; \
> +    else \
> +    { \
> +        (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
> +        TransactionIdStore((xid), &(tup)->t_xmax); \
> +    } \
> +} while (0)
>
>  #define HeapTupleHeaderSetXmaxInvalid(tup) \
> -    (StoreInvalidTransactionId(&(tup)->t_xmax))
> +do { \
> +    (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
> +    StoreInvalidTransactionId(&(tup)->t_xmax); \
> +} while (0)
>
>  #define HeapTupleHeaderSetCmin(tup, cid) \
> -( \
> -    AssertMacro(!((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF))), \
> -    (tup)->t_cmin = (cid) \
> -)
> +do { \
> +    Assert(!((tup)->t_infomask & HEAP_MOVED)); \
> +    TransactionIdStore((TransactionId) (cid), &(tup)->t_cid); \
> +} while (0)
>
>  #define HeapTupleHeaderSetCmax(tup, cid) \
> -    ((tup)->t_cmax = (cid))
> +do { \
> +    Assert(!((tup)->t_infomask & HEAP_MOVED)); \
> +    if ((tup)->t_infomask & HEAP_XMIN_IS_XMAX) \
> +        TransactionIdStore((TransactionId) (cid), &(tup)->t_xmax); \
> +    else \
> +        TransactionIdStore((TransactionId) (cid), &(tup)->t_cid); \
> +} while (0)
>
>  #define HeapTupleHeaderSetXvac(tup, xid) \
> -( \
> -    AssertMacro((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF)), \
> -    TransactionIdStore((xid), (TransactionId *) &((tup)->t_cmin)) \
> -)
> +do { \
> +    Assert((tup)->t_infomask & HEAP_MOVED); \
> +    TransactionIdStore((xid), &(tup)->t_cid); \
> +} while (0)
>
>
>  /*
>
>
> ---------------------------(end of broadcast)---------------------------
> TIP 3: if posting/reading through Usenet, please send an appropriate
> subscribe-nomail command to majordomo@postgresql.org so that your
> message can get through to the mailing list cleanly
>

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Tom Lane
Date:
Bruce Momjian <pgman@candle.pha.pa.us> writes:
> Your patch has been added to the PostgreSQL unapplied patches list at:
>     http://candle.pha.pa.us/cgi-bin/pgpatches
> I will try to apply it within the next 48 hours.

Are you planning to ignore my objections to it?

            regards, tom lane

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Tom Lane wrote:
> Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > Your patch has been added to the PostgreSQL unapplied patches list at:
> >     http://candle.pha.pa.us/cgi-bin/pgpatches
> > I will try to apply it within the next 48 hours.
>
> Are you planning to ignore my objections to it?

The author replied addressing your objections and I saw no reply from on
on that.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Tom Lane wrote:
> Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > Tom Lane wrote:
> >> Are you planning to ignore my objections to it?
>
> > The author replied addressing your objections and I saw no reply from on
> > on that.
>
> He replied stating his opinion; my opinion didn't change.  I was waiting
> for some other people to weigh in with their opinions.  As far as I've
> seen, no one else has commented at all.
>
> If I get voted down on the point after suitable discussion, so be it.
> But I will strongly object to you applying the patch just because it's
> next in your inbox.

Well, we have three votes, yours, the author, and mine.  That's a vote.

If you want to make a pitch for more votes, go ahead.  I can wait.

The thread went on for quite a while and no one else gave an opinion, as
I remember, though there may have been a few positive ones for the patch
that I forgot about.

I don't care if you object, strongly object, or jump up and down; you
have one vote.  Please stop the posturing.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Tom Lane
Date:
Bruce Momjian <pgman@candle.pha.pa.us> writes:
> Tom Lane wrote:
>> Are you planning to ignore my objections to it?

> The author replied addressing your objections and I saw no reply from on
> on that.

He replied stating his opinion; my opinion didn't change.  I was waiting
for some other people to weigh in with their opinions.  As far as I've
seen, no one else has commented at all.

If I get voted down on the point after suitable discussion, so be it.
But I will strongly object to you applying the patch just because it's
next in your inbox.

            regards, tom lane

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Tom Lane wrote:
> Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > Tom Lane wrote:
> >> Are you planning to ignore my objections to it?
>
> > The author replied addressing your objections and I saw no reply from on
> > on that.
>
> He replied stating his opinion; my opinion didn't change.  I was waiting
> for some other people to weigh in with their opinions.  As far as I've
> seen, no one else has commented at all.
>
> If I get voted down on the point after suitable discussion, so be it.
> But I will strongly object to you applying the patch just because it's
> next in your inbox.

Tom, I have reviewed your objections:

> As I commented before, I am not in favor of this.  I don't think that a
> four-byte savings justifies a forced initdb with no chance of
> pg_upgrade, plus loss of redundancy (= reduced chance of detecting or
> recovering from corruption), plus significantly slower access to
> several critical header fields.  The tqual.c routines are already
> hotspots in many scenarios.  I believe this will make them noticeably
> slower.

I don't think enough people use pg_upgrade to make it a reason to keep
an extra four bytes of tuple overhead.  I realize 8-byte aligned systems
don't benefit, but most of our platforms are 4-byte aligned.  I don't
consider redundency a valid reason either.  We just don't have many
table corruption complaints, and the odds that having an extra 4 bytes
is going to make detection or correction better is unlikely.

The author addressed the slowness complaint and seemed to refute the
idea it would be slower.

Is there something I am missing?

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Jan Wieck
Date:
Bruce Momjian wrote:
>
> Tom Lane wrote:
> > Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > > Tom Lane wrote:
> > >> Are you planning to ignore my objections to it?
> >
> > > The author replied addressing your objections and I saw no reply from on
> > > on that.
> >
> > He replied stating his opinion; my opinion didn't change.  I was waiting
> > for some other people to weigh in with their opinions.  As far as I've
> > seen, no one else has commented at all.
> >
> > If I get voted down on the point after suitable discussion, so be it.
> > But I will strongly object to you applying the patch just because it's
> > next in your inbox.
>
> Tom, I have reviewed your objections:
>
> > As I commented before, I am not in favor of this.  I don't think that a
> > four-byte savings justifies a forced initdb with no chance of
> > pg_upgrade, plus loss of redundancy (= reduced chance of detecting or
> > recovering from corruption), plus significantly slower access to
> > several critical header fields.  The tqual.c routines are already
> > hotspots in many scenarios.  I believe this will make them noticeably
> > slower.
>
> I don't think enough people use pg_upgrade to make it a reason to keep
> an extra four bytes of tuple overhead.  I realize 8-byte aligned systems
> don't benefit, but most of our platforms are 4-byte aligned.  I don't
> consider redundency a valid reason either.  We just don't have many
> table corruption complaints, and the odds that having an extra 4 bytes
> is going to make detection or correction better is unlikely.

The non-overwriting storage management (which is one reason why whe need
all these header fields) causes over 30 bytes of row overhead anyway. I
am with Tom here, 4 bytes per row isn't worth making the tuple header
variable length size.

> The author addressed the slowness complaint and seemed to refute the
> idea it would be slower.

Do we have any hard numbers on that? Is it just access to the header
fields, or do we loose the offset cacheability of all fixed size fields
at the beginning of a row? In the latter case count me into the
slowness-believer camp.


Jan

--

#======================================================================#
# It's easier to get forgiveness for being wrong than for being right. #
# Let's break this rule - forgive me.                                  #
#================================================== JanWieck@Yahoo.com #

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Jan Wieck wrote:
> > I don't think enough people use pg_upgrade to make it a reason to keep
> > an extra four bytes of tuple overhead.  I realize 8-byte aligned systems
> > don't benefit, but most of our platforms are 4-byte aligned.  I don't
> > consider redundency a valid reason either.  We just don't have many
> > table corruption complaints, and the odds that having an extra 4 bytes
> > is going to make detection or correction better is unlikely.
>
> The non-overwriting storage management (which is one reason why whe need
> all these header fields) causes over 30 bytes of row overhead anyway. I
> am with Tom here, 4 bytes per row isn't worth making the tuple header
> variable length size.

Woh, I didn't see anything about making the header variable size.  The
issue was that on 8-byte machines, structure alignment will not allow
any savings.  However, on 4-byte machines, it will be a savings of ~11%
in the tuple header.

> > The author addressed the slowness complaint and seemed to refute the
> > idea it would be slower.
>
> Do we have any hard numbers on that? Is it just access to the header
> fields, or do we loose the offset cacheability of all fixed size fields
> at the beginning of a row? In the latter case count me into the
> slowness-believer camp.

No other slowdown except access to the tuple header requires a little
more smarts. As the author mentions, the increased number of tuples per
page more than offset that.  In fact, the patch is fairly small, so you
can review it yourself:

    http://candle.pha.pa.us/cgi-bin/pgpatches

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Jan Wieck wrote:
> > I don't think enough people use pg_upgrade to make it a reason to keep
> > an extra four bytes of tuple overhead.  I realize 8-byte aligned systems
> > don't benefit, but most of our platforms are 4-byte aligned.  I don't
> > consider redundency a valid reason either.  We just don't have many
> > table corruption complaints, and the odds that having an extra 4 bytes
> > is going to make detection or correction better is unlikely.
>
> The non-overwriting storage management (which is one reason why whe need
> all these header fields) causes over 30 bytes of row overhead anyway. I
> am with Tom here, 4 bytes per row isn't worth making the tuple header
> variable length size.
>
> > The author addressed the slowness complaint and seemed to refute the
> > idea it would be slower.
>
> Do we have any hard numbers on that? Is it just access to the header
> fields, or do we loose the offset cacheability of all fixed size fields
> at the beginning of a row? In the latter case count me into the
> slowness-believer camp.

Here is a summary of the concepts used in the patch:


http://groups.google.com/groups?hl=en&lr=&ie=UTF-8&threadm=ejf4du853mblm44f0u78f02g166r69lng7%404ax.com&rnum=28&prev=/groups%3Fq%3Dmanfred%2Bkoizar%2Bgroup:comp.databases.postgresql.*%26start%3D20%26hl%3Den%26lr%3D%26ie%3DUTF-8%26scoring%3Dd%26selm%3Dejf4du853mblm44f0u78f02g166r69lng7%25404ax.com%26rnum%3D28

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Tom Lane
Date:
Bruce Momjian <pgman@candle.pha.pa.us> writes:
> Tom, I have reviewed your objections:

Thanks.

> Is there something I am missing?

My principal objection to this is that I do not like piecemeal breakage
of pg_upgrade, disk page examination tools, etc.  There are other things
that have been proposed that would require changes of page header or
tuple header format, and I would prefer to see them bundled up and done
as one big change (in some future revision; it's probably too late for
7.3).  "Disk format of the week" is not an idea that appeals to me.

I know as well as you do that pg_upgrade compatibility is only
interesting if there *is* a pg_upgrade, which very possibly won't
happen for 7.3 anyway --- but I don't like foreclosing the possibility
for a marginal win.  And this is definitely a marginal win.  Let's
try to batch up enough changes to make it worth the pain.

In case you are wondering what I am talking about, here is an
off-the-cuff list of things that I'd prefer not to do one at a time:

* Version identifier words in page headers
* CRCs in page headers
* Replication and/or point-in-time recovery might need additional
  header fields similar to those used by WAL
* Restructuring of line pointers
* Making OIDs optional (no wasted space if no OID)
* Adding a tuple version identifier to tuples (for DROP COLUMN etc)
* Restructuring index tuple headers (remove length field)
* Fixing array storage to support NULLs inside arrays
* Restoring time travel on some optional basis

Some of these may never happen (I'm not even in favor of all of them)
but it's certain that we will want to do some of them.  I don't want to
take the same hit over and over when some intelligent project management
would let us take it just once or twice.

            regards, tom lane

Re: Reduce heap tuple header size

From
Tom Lane
Date:
Jan Wieck <JanWieck@Yahoo.com> writes:
> Do we have any hard numbers on that? Is it just access to the header
> fields, or do we loose the offset cacheability of all fixed size fields
> at the beginning of a row? In the latter case count me into the
> slowness-believer camp.

I don't believe the patch would have made the header variable size,
only changed what the fixed size is.  The slowdown I was worried about
was just a matter of a couple extra tests and branches in the tqual.c
routines; which would be negligible if they weren't such hotspots.

            regards, tom lane

Re: Reduce heap tuple header size

From
Jan Wieck
Date:
Bruce Momjian wrote:
>
> Jan Wieck wrote:
> > > I don't think enough people use pg_upgrade to make it a reason to keep
> > > an extra four bytes of tuple overhead.  I realize 8-byte aligned systems
> > > don't benefit, but most of our platforms are 4-byte aligned.  I don't
> > > consider redundency a valid reason either.  We just don't have many
> > > table corruption complaints, and the odds that having an extra 4 bytes
> > > is going to make detection or correction better is unlikely.
> >
> > The non-overwriting storage management (which is one reason why whe need
> > all these header fields) causes over 30 bytes of row overhead anyway. I
> > am with Tom here, 4 bytes per row isn't worth making the tuple header
> > variable length size.
>
> Woh, I didn't see anything about making the header variable size.  The
> issue was that on 8-byte machines, structure alignment will not allow
> any savings.  However, on 4-byte machines, it will be a savings of ~11%
> in the tuple header.

You're right. Dunno where I got that idea from.

Looking at the patch I find it quite confusing using Xmin as Xmax,
sometimes. If we use 3 physical variables for 5 virtual ones in that
way, I would rather use generic names.


Jan

--

#======================================================================#
# It's easier to get forgiveness for being wrong than for being right. #
# Let's break this rule - forgive me.                                  #
#================================================== JanWieck@Yahoo.com #

Re: Reduce heap tuple header size

From
Jan Wieck
Date:
Tom Lane wrote:
>
> Jan Wieck <JanWieck@Yahoo.com> writes:
> > Do we have any hard numbers on that? Is it just access to the header
> > fields, or do we loose the offset cacheability of all fixed size fields
> > at the beginning of a row? In the latter case count me into the
> > slowness-believer camp.
>
> I don't believe the patch would have made the header variable size,
> only changed what the fixed size is.  The slowdown I was worried about
> was just a matter of a couple extra tests and branches in the tqual.c
> routines; which would be negligible if they weren't such hotspots.

Did someone run at least pgbench with/without that patch applied?


Jan

--

#======================================================================#
# It's easier to get forgiveness for being wrong than for being right. #
# Let's break this rule - forgive me.                                  #
#================================================== JanWieck@Yahoo.com #

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Tom Lane wrote:
> Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > Tom, I have reviewed your objections:
>
> Thanks.
>
> > Is there something I am missing?
>
> My principal objection to this is that I do not like piecemeal breakage
> of pg_upgrade, disk page examination tools, etc.  There are other things

What do we have, pg_upgrade and Red Hat's disk dump tool, right?  (I
forgot the name.)

> that have been proposed that would require changes of page header or
> tuple header format, and I would prefer to see them bundled up and done
> as one big change (in some future revision; it's probably too late for
> 7.3).  "Disk format of the week" is not an idea that appeals to me.

This is part of the "do it perfect or don't do it" logic that I usually
disagree with.  We all agree we have a tuple header that is too large.
Here we have a chance to reduce it by 11% on most platforms, and as a
downside, we will not have a working pg_upgrade.  What do you think the
average user will choose?

> I know as well as you do that pg_upgrade compatibility is only
> interesting if there *is* a pg_upgrade, which very possibly won't
> happen for 7.3 anyway --- but I don't like foreclosing the possibility

I am not sure what needs to be done for pg_upgrade for 7.3 (does anyone
else?), but if you suspect it will not work anyway, why are you trying
to save it for 7.3?  (I think the problem with pg_upgrade in 7.2 was the
inability to create pg_clog files to match the new transaction counter.)

> for a marginal win.  And this is definitely a marginal win.  Let's
> try to batch up enough changes to make it worth the pain.

Marginal?  Seems like a pretty concrete win to me in disk space.

Also, he has offered to write a conversion tool.  If he does that, maybe
he can improve pg_upgrade if needed.

> In case you are wondering what I am talking about, here is an
> off-the-cuff list of things that I'd prefer not to do one at a time:
>
> * Version identifier words in page headers

Yes, we need that, and in fact should do that in 7.3 if we accept this
patch.  This may make future upgrades easier.  In fact, I wonder if we
could place the page version number in such a way that old pre-7.3 pages
could be easily identified, i.e. place version number 0xAE in an offset
that used to hold a values that were always less than 0x80.

> * CRCs in page headers
> * Replication and/or point-in-time recovery might need additional
>   header fields similar to those used by WAL
> * Restructuring of line pointers
> * Making OIDs optional (no wasted space if no OID)
> * Adding a tuple version identifier to tuples (for DROP COLUMN etc)
> * Restructuring index tuple headers (remove length field)
> * Fixing array storage to support NULLs inside arrays
> * Restoring time travel on some optional basis
>
> Some of these may never happen (I'm not even in favor of all of them)
> but it's certain that we will want to do some of them.  I don't want to
> take the same hit over and over when some intelligent project management
> would let us take it just once or twice.

Yes, there are some good ones there, but the idea that somehow they are
all going to hit in the same release seems unlikely.  I say let's do
some now, some later, and move ahead.

Ultimately, I think we need to add smarts to the backend to read old
formats.  I know it is a pain, but I see no other options.  pg_upgrade
always will have trouble with certain changes.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Jan Wieck wrote:
> Tom Lane wrote:
> >
> > Jan Wieck <JanWieck@Yahoo.com> writes:
> > > Do we have any hard numbers on that? Is it just access to the header
> > > fields, or do we loose the offset cacheability of all fixed size fields
> > > at the beginning of a row? In the latter case count me into the
> > > slowness-believer camp.
> >
> > I don't believe the patch would have made the header variable size,
> > only changed what the fixed size is.  The slowdown I was worried about
> > was just a matter of a couple extra tests and branches in the tqual.c
> > routines; which would be negligible if they weren't such hotspots.
>
> Did someone run at least pgbench with/without that patch applied?

No, but he did perform this analysis:

> thus reducing the additional cost to one t_infomask compare,
> because the Satisfies functions only access Cmin and Cmax,
> when HEAP_MOVED is known to be not set.
>
> OTOH experimenting with a moderatly sized "out of production"
> database I got the following results:
>                          | pages | pages |
> relkind | count | tuples | before| after | savings
> --------+-------+--------+-------+-------+--------
> i       |    31 | 808146 |  8330 |  8330 |   0.00%
> r       |    32 | 612968 | 13572 | 13184 |   2.86%
> all     |    63 |        | 21902 | 21514 |   1.77%
>
> 2.86% fewer heap pages mean 2.86% less disk IO caused by heap pages.
> Considering that index pages tend to benefit more from caching
> we conclude that heap pages contribute more to the overall
> IO load, so the total savings in the number of disk IOs should
> be better than the 1.77% shown in the table above.  I think
> this outweighs a few CPU cycles now and then.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Tom Lane
Date:
Bruce Momjian <pgman@candle.pha.pa.us> writes:
> Tom Lane wrote:
>> Some of these may never happen (I'm not even in favor of all of them)
>> but it's certain that we will want to do some of them.  I don't want to
>> take the same hit over and over when some intelligent project management
>> would let us take it just once or twice.

> Yes, there are some good ones there, but the idea that somehow they are
> all going to hit in the same release seems unlikely.  I say let's do
> some now, some later, and move ahead.

I think it's very likely that P-I-T recovery and replication will hit
in the 7.4 release cycle.  I would prefer to see us plan ahead to do
these other changes for 7.4 as well, and get as many of them done in
that cycle as we can.

Basically my point is that there are costs and benefits to this sort
of change, and many of the costs are quantized --- it doesn't cost
more to make three incompatible disk-format changes than one, *as long
as they're in the same release*.  So we should try to do some actual
management of such changes, not accept them willy-nilly whenever someone
feels like doing one.

This patch is unlikely to suffer significant bit-rot if we hold it for
7.4, and that's what I think we should do with it.

            regards, tom lane

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Tom Lane wrote:
> Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > Tom Lane wrote:
> >> Some of these may never happen (I'm not even in favor of all of them)
> >> but it's certain that we will want to do some of them.  I don't want to
> >> take the same hit over and over when some intelligent project management
> >> would let us take it just once or twice.
>
> > Yes, there are some good ones there, but the idea that somehow they are
> > all going to hit in the same release seems unlikely.  I say let's do
> > some now, some later, and move ahead.
>
> I think it's very likely that P-I-T recovery and replication will hit
> in the 7.4 release cycle.  I would prefer to see us plan ahead to do
> these other changes for 7.4 as well, and get as many of them done in
> that cycle as we can.
>
> Basically my point is that there are costs and benefits to this sort
> of change, and many of the costs are quantized --- it doesn't cost
> more to make three incompatible disk-format changes than one, *as long
> as they're in the same release*.  So we should try to do some actual
> management of such changes, not accept them willy-nilly whenever someone
> feels like doing one.
>
> This patch is unlikely to suffer significant bit-rot if we hold it for
> 7.4, and that's what I think we should do with it.

Well, that is a different argument than initially.  So, it is a valid
patch, but we have to decide when to apply it.

We can easily hold it until we near release of 7.3.  If pg_upgrade is in
good shape _and_ no other format changes are required, we can  hold it
for 7.4.  What happens if 7.4 doesn't have any format changes?

However, if we have other changes or pg_upgrade isn't going to work, we
can apply it in August.

(Manfred, you can be sure I will not lose this patch.)

However, we have been very bad at predicting what features/changes are
coming in future releases.  Also, I don't think replication will require
tuple format changes.  I will check with the group but I haven't heard
anything about that.

So, we have to decide if we apply it now or delay it for later in 7.3,
or for >=7.4.

My personal vote is that we apply it now, and perhaps try some of the
other format changes we were going to make.  Tom's vote is to hold it, I
assume. Let's see what others say.


--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Tom Lane
Date:
Jan Wieck <JanWieck@Yahoo.com> writes:
> Here are some numbers:

> Current CVS tip:     tps  34.1, 38.7, 36.6
>                  avg(tps) 36.4

> With patch:          tps  37.0, 41.1, 41.1
>                  avg(tps) 39.7

> So it saves less than 3% disk space at the cost of about 9% performance
> loss.

Uh ... isn't more TPS better?

            regards, tom lane

Re: Reduce heap tuple header size

From
Jan Wieck
Date:
Bruce Momjian wrote:
>
> Jan Wieck wrote:
> >
> > Did someone run at least pgbench with/without that patch applied?
>
> No, but he did perform this analysis:
>
> > thus reducing the additional cost to one t_infomask compare,
> > because the Satisfies functions only access Cmin and Cmax,
> > when HEAP_MOVED is known to be not set.
> >
> > OTOH experimenting with a moderatly sized "out of production"
> > database I got the following results:
> >                          | pages | pages |
> > relkind | count | tuples | before| after | savings
> > --------+-------+--------+-------+-------+--------
> > i       |    31 | 808146 |  8330 |  8330 |   0.00%
> > r       |    32 | 612968 | 13572 | 13184 |   2.86%
> > all     |    63 |        | 21902 | 21514 |   1.77%
> >
> > 2.86% fewer heap pages mean 2.86% less disk IO caused by heap pages.
> > Considering that index pages tend to benefit more from caching
> > we conclude that heap pages contribute more to the overall
> > IO load, so the total savings in the number of disk IOs should
> > be better than the 1.77% shown in the table above.  I think
> > this outweighs a few CPU cycles now and then.

This anawhat? This is a proof that this patch is able to save not even
3% of disk space in a production environment plus an assumption that the
saved IO outweights the extra effort in the tuple visibility checks.

Here are some numbers:

P3 850MHz 256MB RAM IDE
postmaster -N256 -B8192
pgbench -i -s 10 db
pgbench -c 20 -t 500 db


Current CVS tip:     tps  34.1, 38.7, 36.6
                 avg(tps) 36.4

With patch:          tps  37.0, 41.1, 41.1
                 avg(tps) 39.7

So it saves less than 3% disk space at the cost of about 9% performance
loss. If we can do the same the other way around I'd go for wasting some
more disk space.


Jan

--

#======================================================================#
# It's easier to get forgiveness for being wrong than for being right. #
# Let's break this rule - forgive me.                                  #
#================================================== JanWieck@Yahoo.com #

Re: Reduce heap tuple header size

From
Alvaro Herrera
Date:
Tom Lane dijo:

> Jan Wieck <JanWieck@Yahoo.com> writes:
> > Here are some numbers:
>
> > Current CVS tip:     tps  34.1, 38.7, 36.6
> >                  avg(tps) 36.4
>
> > With patch:          tps  37.0, 41.1, 41.1
> >                  avg(tps) 39.7
>
> > So it saves less than 3% disk space at the cost of about 9% performance
> > loss.
>
> Uh ... isn't more TPS better?

Also, is that 3% in disk space savings the actual number, or just copied
from the "anawhat"?

--
Alvaro Herrera (<alvherre[a]atentus.com>)
Y dijo Dios: "Que haya Satanás, para que la gente no me culpe de todo a mí."
"Y que hayan abogados, para que la gente no culpe de todo a Satanás"


Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Alvaro Herrera wrote:
> Tom Lane dijo:
>
> > Jan Wieck <JanWieck@Yahoo.com> writes:
> > > Here are some numbers:
> >
> > > Current CVS tip:     tps  34.1, 38.7, 36.6
> > >                  avg(tps) 36.4
> >
> > > With patch:          tps  37.0, 41.1, 41.1
> > >                  avg(tps) 39.7
> >
> > > So it saves less than 3% disk space at the cost of about 9% performance
> > > loss.
> >
> > Uh ... isn't more TPS better?

9%, that is a dramatic difference.  Is it caused by the reduced disk
space (Jan's numbers are correct) or by the extra overhead in the merged
fields (Jan's numbers are backwards)?  Jan will tell us soon.

> Also, is that 3% in disk space savings the actual number, or just copied
> from the "anawhat"?

The 3% is savings from a sample database.  Header size is 11% reduced.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: Reduce heap tuple header size

From
Tom Lane
Date:
Bruce Momjian <pgman@candle.pha.pa.us> writes:
> So it saves less than 3% disk space at the cost of about 9% performance
> loss.
>
> Uh ... isn't more TPS better?

> 9%, that is a dramatic difference.

Yup.  I'm suspicious of it --- if the database is 3% smaller then I'd
believe a 3% performance gain from reduction of I/O, but I don't see
where 9% comes from.

            regards, tom lane

Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Jan, any update on this?  Are the numbers correct?

---------------------------------------------------------------------------

Jan Wieck wrote:
> Bruce Momjian wrote:
> >
> > Jan Wieck wrote:
> > >
> > > Did someone run at least pgbench with/without that patch applied?
> >
> > No, but he did perform this analysis:
> >
> > > thus reducing the additional cost to one t_infomask compare,
> > > because the Satisfies functions only access Cmin and Cmax,
> > > when HEAP_MOVED is known to be not set.
> > >
> > > OTOH experimenting with a moderatly sized "out of production"
> > > database I got the following results:
> > >                          | pages | pages |
> > > relkind | count | tuples | before| after | savings
> > > --------+-------+--------+-------+-------+--------
> > > i       |    31 | 808146 |  8330 |  8330 |   0.00%
> > > r       |    32 | 612968 | 13572 | 13184 |   2.86%
> > > all     |    63 |        | 21902 | 21514 |   1.77%
> > >
> > > 2.86% fewer heap pages mean 2.86% less disk IO caused by heap pages.
> > > Considering that index pages tend to benefit more from caching
> > > we conclude that heap pages contribute more to the overall
> > > IO load, so the total savings in the number of disk IOs should
> > > be better than the 1.77% shown in the table above.  I think
> > > this outweighs a few CPU cycles now and then.
>
> This anawhat? This is a proof that this patch is able to save not even
> 3% of disk space in a production environment plus an assumption that the
> saved IO outweights the extra effort in the tuple visibility checks.
>
> Here are some numbers:
>
> P3 850MHz 256MB RAM IDE
> postmaster -N256 -B8192
> pgbench -i -s 10 db
> pgbench -c 20 -t 500 db
>
>
> Current CVS tip:     tps  34.1, 38.7, 36.6
>                  avg(tps) 36.4
>
> With patch:          tps  37.0, 41.1, 41.1
>                  avg(tps) 39.7
>
> So it saves less than 3% disk space at the cost of about 9% performance
> loss. If we can do the same the other way around I'd go for wasting some
> more disk space.
>
>
> Jan
>
> --
>
> #======================================================================#
> # It's easier to get forgiveness for being wrong than for being right. #
> # Let's break this rule - forgive me.                                  #
> #================================================== JanWieck@Yahoo.com #
>
> ---------------------------(end of broadcast)---------------------------
> TIP 1: subscribe and unsubscribe commands go to majordomo@postgresql.org
>

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026



Re: Reduce heap tuple header size

From
Jan Wieck
Date:
Bruce Momjian wrote:
>
> Jan, any update on this?  Are the numbers correct?

Sorry, but took some time.

Well, it turned out that pgbench does a terrible job with runtimes below
30 minutes. Seems that one checkpoint more or less can have a
significant impact on the numbers reported by such run.

Also starting off with a populated cache (rampup) seems to be a very
good idea. So my advice for running pgbench is to do an initdb before
running (to whipe out logfile creation/reuse issues). To populate a
fresh database with a reasonable scaling. Run pgbench with a high enough
-c and -t so it runs for at least 5 minutes. Then do the actual
measurement with a pgbench run with settings keeping the system busy for
30 minutes or more. Needless to say, keep your fingers (and everyone
elses too) off the system during that time. Shut down not needed
services, and especially cron!

Using the above, the discussed change to the tuple header shows less
than 1% difference.


Sorry for all the confusion.
Jan

>
> ---------------------------------------------------------------------------
>
> Jan Wieck wrote:
> > Bruce Momjian wrote:
> > >
> > > Jan Wieck wrote:
> > > >
> > > > Did someone run at least pgbench with/without that patch applied?
> > >
> > > No, but he did perform this analysis:
> > >
> > > > thus reducing the additional cost to one t_infomask compare,
> > > > because the Satisfies functions only access Cmin and Cmax,
> > > > when HEAP_MOVED is known to be not set.
> > > >
> > > > OTOH experimenting with a moderatly sized "out of production"
> > > > database I got the following results:
> > > >                          | pages | pages |
> > > > relkind | count | tuples | before| after | savings
> > > > --------+-------+--------+-------+-------+--------
> > > > i       |    31 | 808146 |  8330 |  8330 |   0.00%
> > > > r       |    32 | 612968 | 13572 | 13184 |   2.86%
> > > > all     |    63 |        | 21902 | 21514 |   1.77%
> > > >
> > > > 2.86% fewer heap pages mean 2.86% less disk IO caused by heap pages.
> > > > Considering that index pages tend to benefit more from caching
> > > > we conclude that heap pages contribute more to the overall
> > > > IO load, so the total savings in the number of disk IOs should
> > > > be better than the 1.77% shown in the table above.  I think
> > > > this outweighs a few CPU cycles now and then.
> >
> > This anawhat? This is a proof that this patch is able to save not even
> > 3% of disk space in a production environment plus an assumption that the
> > saved IO outweights the extra effort in the tuple visibility checks.
> >
> > Here are some numbers:
> >
> > P3 850MHz 256MB RAM IDE
> > postmaster -N256 -B8192
> > pgbench -i -s 10 db
> > pgbench -c 20 -t 500 db
> >
> >
> > Current CVS tip:     tps  34.1, 38.7, 36.6
> >                  avg(tps) 36.4
> >
> > With patch:          tps  37.0, 41.1, 41.1
> >                  avg(tps) 39.7
> >
> > So it saves less than 3% disk space at the cost of about 9% performance
> > loss. If we can do the same the other way around I'd go for wasting some
> > more disk space.
> >
> >
> > Jan
> >
> > --
> >
> > #======================================================================#
> > # It's easier to get forgiveness for being wrong than for being right. #
> > # Let's break this rule - forgive me.                                  #
> > #================================================== JanWieck@Yahoo.com #
> >
> > ---------------------------(end of broadcast)---------------------------
> > TIP 1: subscribe and unsubscribe commands go to majordomo@postgresql.org
> >
>
> --
>   Bruce Momjian                        |  http://candle.pha.pa.us
>   pgman@candle.pha.pa.us               |  (610) 853-3000
>   +  If your life is a hard drive,     |  830 Blythe Avenue
>   +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026
>
> ---------------------------(end of broadcast)---------------------------
> TIP 4: Don't 'kill -9' the postmaster

--

#======================================================================#
# It's easier to get forgiveness for being wrong than for being right. #
# Let's break this rule - forgive me.                                  #
#================================================== JanWieck@Yahoo.com #



Re: Reduce heap tuple header size

From
Tom Lane
Date:
Jan Wieck <JanWieck@Yahoo.com> writes:
> Well, it turned out that pgbench does a terrible job with runtimes below
> 30 minutes. Seems that one checkpoint more or less can have a
> significant impact on the numbers reported by such run.

Yeah, it is *very* painful to get reproducible numbers out of pgbench.

> Using the above, the discussed change to the tuple header shows less
> than 1% difference.

So the bottom line is that there is probably no measurable performance
difference, but a 3% space savings, at least for average row lengths
comparable to those used in pgbench.  (Obviously the space savings is
going to depend on average row length...)

            regards, tom lane



Re: Reduce heap tuple header size

From
Jan Wieck
Date:
Tom Lane wrote:
>
> Jan Wieck <JanWieck@Yahoo.com> writes:
> > Well, it turned out that pgbench does a terrible job with runtimes below
> > 30 minutes. Seems that one checkpoint more or less can have a
> > significant impact on the numbers reported by such run.
>
> Yeah, it is *very* painful to get reproducible numbers out of pgbench.

And since it seems to be extremly checkpoint dependent, imagine what
someone would have to do to measure the impact of changing config
options about checkpoint segments and intervals. But I guess that this
would be an issue for every benchmark.

The TPC must know about it as the specs for the TPC-C require at least
one checkpoint occuring during the measurement time and 90% of all
transactions fitting the timing constraints. Currently PostgreSQL
creates an IO storm on a checkpoint, that simply brings the system down
to responsetimes 100x the average, slowly recovering from there since
all simulated users are active at once then (they all woke up from their
thinking or keying times and pressed SEND).


Jan

--

#======================================================================#
# It's easier to get forgiveness for being wrong than for being right. #
# Let's break this rule - forgive me.                                  #
#================================================== JanWieck@Yahoo.com #



Re: Reduce heap tuple header size II

From
Manfred Koizar
Date:
On Fri, 21 Jun 2002 12:45:48 -0400 (EDT), Bruce Momjian <pgman@candle.pha.pa.us> wrote:
>> > Yes, there are some good ones there, but the idea that somehow they are
>> > all going to hit in the same release seems unlikely.  I say let's do
>> > some now, some later, and move ahead.

I (strongly :-)) agree.

>Well, that is a different argument than initially.  So, it is a valid
>patch, but we have to decide when to apply it.
>
>We can easily hold it until we near release of 7.3.  If pg_upgrade is in
>good shape _and_ no other format changes are required, we can  hold it
>for 7.4.  What happens if 7.4 doesn't have any format changes?
>
>However, if we have other changes or pg_upgrade isn't going to work, we
>can apply it in August.

But what, if the patch causes or reveals a well hidden bug, possibly
somewhere else in the code?  I'd vote for applying it now, so that in
August it already has undergone some testing.  You can always patch -R
before going beta...

>(Manfred, you can be sure I will not lose this patch.)

Thanks.  Anyway, lose it!  Here is a new version.

>So, we have to decide if we apply it now or delay it for later in 7.3,
>or for >=7.4.

Why not let the users decide?  With this new version of the patch they
can
    configure --enable-pg72format
if this patch is the only thing that stops pg_upgrade from working and
if they want to use pg_upgrade.

>My personal vote is that we apply it now, and perhaps try some of the
>other format changes we were going to make.

And with #ifdef PG72FORMAT we can introduce them without very much
risk.  One thing that's still missing is we'd need two entries in
catversion.h.

Servus
 Manfred

diff -u ../base/INSTALL ./INSTALL
--- ../base/INSTALL    2002-01-31 01:46:26.000000000 +0100
+++ ./INSTALL    2002-06-25 15:59:03.000000000 +0200
@@ -283,6 +283,13 @@
           Enables single-byte character set recode support. See the
           Administrator's Guide about this feature.

+     --enable-pg72format
+
+          Enables version 7.2 page format, giving you a better chance of
+          converting existing databases by pg_upgrade. Do not use this
+          feature, if you are just starting to use PostgreSQL or if you
+          plan to upgrade via pg_dump/restore.
+
      --enable-multibyte

           Allows the use of multibyte character encodings (including
Common subdirectories: ../base/config and ./config
diff -u ../base/configure ./configure
--- ../base/configure    2002-05-29 14:42:20.000000000 +0200
+++ ./configure    2002-06-25 15:51:18.000000000 +0200
@@ -1652,6 +1652,44 @@


 #
+# Version 7.2 page format (--enable-pg72format)
+#
+echo "$as_me:$LINENO: checking whether to build with version 7.2 page format" >&5
+echo $ECHO_N "checking whether to build with version 7.2 page format... $ECHO_C" >&6
+
+
+# Check whether --enable-pg72format or --disable-pg72format was given.
+if test "${enable_pg72format+set}" = set; then
+  enableval="$enable_pg72format"
+
+  case $enableval in
+    yes)
+
+cat >>confdefs.h <<\_ACEOF
+#define PG72FORMAT 1
+_ACEOF
+
+      ;;
+    no)
+      :
+      ;;
+    *)
+      { { echo "$as_me:$LINENO: error: no argument expected for --enable-pg72format option" >&5
+echo "$as_me: error: no argument expected for --enable-pg72format option" >&2;}
+   { (exit 1); exit 1; }; }
+      ;;
+  esac
+
+else
+  enable_pg72format=no
+
+fi;
+
+echo "$as_me:$LINENO: result: $enable_pg72format" >&5
+echo "${ECHO_T}$enable_pg72format" >&6
+
+
+#
 # Multibyte support
 #
 MULTIBYTE=SQL_ASCII
diff -u ../base/configure.in ./configure.in
--- ../base/configure.in    2002-05-29 14:42:20.000000000 +0200
+++ ./configure.in    2002-06-25 15:51:15.000000000 +0200
@@ -162,6 +162,16 @@


 #
+# Version 7.2 page format (--enable-pg72format)
+#
+AC_MSG_CHECKING([whether to build with version 7.2 page format])
+PGAC_ARG_BOOL(enable, pg72format, no, [  --enable-pg72format     enable version 7.2 page format],
+              [AC_DEFINE([PG72FORMAT], 1,
+                         [Set to 1 if you want version 7.2 page format (--enable-pg72format)])])
+AC_MSG_RESULT([$enable_pg72format])
+
+
+#
 # Multibyte support
 #
 MULTIBYTE=SQL_ASCII
diff -ru ../base/src/backend/access/heap/heapam.c src/backend/access/heap/heapam.c
--- ../base/src/backend/access/heap/heapam.c    2002-06-17 10:11:31.000000000 +0200
+++ src/backend/access/heap/heapam.c    2002-06-17 22:35:32.000000000 +0200
@@ -2204,7 +2204,7 @@
         htup->t_infomask = HEAP_XMAX_INVALID | xlhdr.mask;
         HeapTupleHeaderSetXmin(htup, record->xl_xid);
         HeapTupleHeaderSetCmin(htup, FirstCommandId);
-        HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
+        HeapTupleHeaderSetXmaxInvalid(htup);
         HeapTupleHeaderSetCmax(htup, FirstCommandId);

         offnum = PageAddItem(page, (Item) htup, newlen, offnum,
diff -ru ../base/src/include/access/htup.h src/include/access/htup.h
--- ../base/src/include/access/htup.h    2002-06-17 10:11:32.000000000 +0200
+++ src/include/access/htup.h    2002-06-25 16:14:37.000000000 +0200
@@ -57,15 +57,33 @@
  * Also note that we omit the nulls bitmap if t_infomask shows that there
  * are no nulls in the tuple.
  */
+/*
+** We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac
+** in three physical fields t_xmin, t_cid, t_xmax:
+** CommandId     Cmin;        insert CID stamp
+** CommandId     Cmax;        delete CommandId stamp
+** TransactionId Xmin;        insert XID stamp
+** TransactionId Xmax;        delete XID stamp
+** TransactionId Xvac;        used by VACCUUM
+**
+** This assumes, that a CommandId can be stored in a TransactionId.
+*/
 typedef struct HeapTupleHeaderData
 {
     Oid            t_oid;            /* OID of this tuple -- 4 bytes */

+#ifdef PG72FORMAT
+    /* v7.2: Xvac is stored in t_cmin */
     CommandId    t_cmin;            /* insert CID stamp -- 4 bytes each */
     CommandId    t_cmax;            /* delete CommandId stamp */

     TransactionId t_xmin;        /* insert XID stamp -- 4 bytes each */
     TransactionId t_xmax;        /* delete XID stamp */
+#else
+    TransactionId t_xmin;        /* Xmin -- 4 bytes each */
+    TransactionId t_cid;        /* Cmin, Cmax, Xvac */
+    TransactionId t_xmax;        /* Xmax, Cmax */
+#endif

     ItemPointerData t_ctid;        /* current TID of this or newer tuple */

@@ -75,7 +93,7 @@

     uint8        t_hoff;            /* sizeof header incl. bitmap, padding */

-    /* ^ - 31 bytes - ^ */
+    /* ^ - 27 (v7.3) or 31 (v7.2) bytes - ^ */

     bits8        t_bits[1];        /* bitmap of NULLs -- VARIABLE LENGTH */

@@ -96,6 +114,8 @@
                                          * attribute(s) */
 #define HEAP_HASEXTENDED        0x000C    /* the two above combined */

+#define HEAP_XMIN_IS_XMAX        0x0040    /* created and deleted in the */
+                                        /* same transaction */
 #define HEAP_XMAX_UNLOGGED        0x0080    /* to lock tuple for update */
                                         /* without logging */
 #define HEAP_XMIN_COMMITTED        0x0100    /* t_xmin committed */
@@ -108,6 +128,7 @@
                                          * vacuum */
 #define HEAP_MOVED_IN            0x8000    /* moved from another place by
                                          * vacuum */
+#define HEAP_MOVED (HEAP_MOVED_OFF | HEAP_MOVED_IN)

 #define HEAP_XACT_MASK            0xFFF0    /* visibility-related bits */

@@ -116,8 +137,11 @@
 /* HeapTupleHeader accessor macros */

 #define HeapTupleHeaderGetXmin(tup) \
-    ((tup)->t_xmin)
+( \
+    (tup)->t_xmin \
+)

+#ifdef PG72FORMAT
 #define HeapTupleHeaderGetXmax(tup) \
     ((tup)->t_xmax)

@@ -163,6 +187,98 @@
     AssertMacro((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF)), \
     TransactionIdStore((xid), (TransactionId *) &((tup)->t_cmin)) \
 )
+#else
+#define HeapTupleHeaderGetXmax(tup) \
+( \
+    ((tup)->t_infomask & HEAP_XMIN_IS_XMAX) ? \
+        (tup)->t_xmin \
+    : \
+        (tup)->t_xmax \
+)
+
+/* no AssertMacro, because this is read as a system-defined attribute */
+#define HeapTupleHeaderGetCmin(tup) \
+( \
+    ((tup)->t_infomask & HEAP_MOVED) ? \
+        FirstCommandId \
+    : \
+    ( \
+        ((tup)->t_infomask & (HEAP_XMIN_IS_XMAX | HEAP_XMAX_INVALID)) ? \
+            (CommandId) (tup)->t_cid \
+        : \
+            FirstCommandId \
+    ) \
+)
+
+#define HeapTupleHeaderGetCmax(tup) \
+( \
+    ((tup)->t_infomask & HEAP_MOVED) ? \
+        FirstCommandId \
+    : \
+    ( \
+        ((tup)->t_infomask & (HEAP_XMIN_IS_XMAX | HEAP_XMAX_INVALID)) ? \
+            (CommandId) (tup)->t_xmax \
+        : \
+            (CommandId) (tup)->t_cid \
+    ) \
+)
+
+#define HeapTupleHeaderGetXvac(tup) \
+( \
+    AssertMacro((tup)->t_infomask & HEAP_MOVED), \
+    (tup)->t_cid \
+)
+
+
+#define HeapTupleHeaderSetXmin(tup, xid) \
+( \
+    TransactionIdStore((xid), &(tup)->t_xmin) \
+)
+
+#define HeapTupleHeaderSetXminInvalid(tup) \
+do { \
+    (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
+    StoreInvalidTransactionId(&(tup)->t_xmin); \
+} while (0)
+
+#define HeapTupleHeaderSetXmax(tup, xid) \
+do { \
+    if (TransactionIdEquals((tup)->t_xmin, (xid))) \
+        (tup)->t_infomask |= HEAP_XMIN_IS_XMAX; \
+    else \
+    { \
+        (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
+        TransactionIdStore((xid), &(tup)->t_xmax); \
+    } \
+} while (0)
+
+#define HeapTupleHeaderSetXmaxInvalid(tup) \
+do { \
+    (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
+    StoreInvalidTransactionId(&(tup)->t_xmax); \
+} while (0)
+
+#define HeapTupleHeaderSetCmin(tup, cid) \
+do { \
+    Assert(!((tup)->t_infomask & HEAP_MOVED)); \
+    TransactionIdStore((TransactionId) (cid), &(tup)->t_cid); \
+} while (0)
+
+#define HeapTupleHeaderSetCmax(tup, cid) \
+do { \
+    Assert(!((tup)->t_infomask & HEAP_MOVED)); \
+    if ((tup)->t_infomask & HEAP_XMIN_IS_XMAX) \
+        TransactionIdStore((TransactionId) (cid), &(tup)->t_xmax); \
+    else \
+        TransactionIdStore((TransactionId) (cid), &(tup)->t_cid); \
+} while (0)
+
+#define HeapTupleHeaderSetXvac(tup, xid) \
+do { \
+    Assert((tup)->t_infomask & HEAP_MOVED); \
+    TransactionIdStore((xid), &(tup)->t_cid); \
+} while (0)
+#endif


 /*
diff -ru ../base/src/include/pg_config.h.in src/include/pg_config.h.in
--- ../base/src/include/pg_config.h.in    2002-06-17 21:04:03.000000000 +0200
+++ src/include/pg_config.h.in    2002-06-25 16:02:40.000000000 +0200
@@ -39,6 +39,9 @@
 /* Set to 1 if you want cyrillic recode (--enable-recode) */
 #undef CYR_RECODE

+/* Set to 1 if you want version 7.2 page format (--enable-pg72format) */
+#undef PG72FORMAT
+
 /* Set to 1 if you want to use multibyte characters (--enable-multibyte) */
 #undef MULTIBYTE





Re: Reduce heap tuple header size II

From
Tom Lane
Date:
Manfred Koizar <mkoi-pg@aon.at> writes:
> Why not let the users decide?  With this new version of the patch they
> can
>     configure --enable-pg72format

I think that is a really, really bad idea.  If we're gonna do it we
should just do it.  We don't need multiple incompatible file formats
running around all calling themselves PG 7.3.

Configure options are generally not a solution to anything anyway,
since more and more people use RPM distributions and don't have the
option to make their own configure choices.

            regards, tom lane



Re: Reduce heap tuple header size

From
Bruce Momjian
Date:
Patch applied.  Thanks.

Catalog version updated.  initdb everyone.

---------------------------------------------------------------------------



Manfred Koizar wrote:
> This patch, which is built upon the "HeapTupleHeader accessor macros"
> patch from 2002-06-10, is supposed to reduce the heap tuple header size
> by four bytes on most architectures.  Of course it changes the on-disk
> tuple format and therefore requires initdb.  As I have (once more)
> opened my mouth too wide, I'll have to provide a heap file conversion
> utility, if this patch gets accepted...  More on this later.
>
> ======================
>  All 81 tests passed.
> ======================
>
> It's late now, I'll do more tests tomorrow.
>
> Good night
>  Manfred
>
> diff -ru ../orig/src/backend/access/heap/heapam.c src/backend/access/heap/heapam.c
> --- ../orig/src/backend/access/heap/heapam.c    2002-06-13 19:34:48.000000000 +0200
> +++ src/backend/access/heap/heapam.c    2002-06-13 22:31:42.000000000 +0200
> @@ -2204,7 +2204,7 @@
>          htup->t_infomask = HEAP_XMAX_INVALID | xlhdr.mask;
>          HeapTupleHeaderSetXmin(htup, record->xl_xid);
>          HeapTupleHeaderSetCmin(htup, FirstCommandId);
> -        HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
> +        HeapTupleHeaderSetXmaxInvalid(htup);
>          HeapTupleHeaderSetCmax(htup, FirstCommandId);
>
>          offnum = PageAddItem(page, (Item) htup, newlen, offnum,
> diff -ru ../orig/src/include/access/htup.h src/include/access/htup.h
> --- ../orig/src/include/access/htup.h    2002-06-13 19:34:49.000000000 +0200
> +++ src/include/access/htup.h    2002-06-14 01:12:47.000000000 +0200
> @@ -57,15 +57,24 @@
>   * Also note that we omit the nulls bitmap if t_infomask shows that there
>   * are no nulls in the tuple.
>   */
> +/*
> +** We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac
> +** in three physical fields t_xmin, t_cid, t_xmax:
> +** CommandId     Cmin;        insert CID stamp
> +** CommandId     Cmax;        delete CommandId stamp
> +** TransactionId Xmin;        insert XID stamp
> +** TransactionId Xmax;        delete XID stamp
> +** TransactionId Xvac;        used by VACCUUM
> +**
> +** This assumes, that a CommandId can be stored in a TransactionId.
> +*/
>  typedef struct HeapTupleHeaderData
>  {
>      Oid            t_oid;            /* OID of this tuple -- 4 bytes */
>
> -    CommandId    t_cmin;            /* insert CID stamp -- 4 bytes each */
> -    CommandId    t_cmax;            /* delete CommandId stamp */
> -
> -    TransactionId t_xmin;        /* insert XID stamp -- 4 bytes each */
> -    TransactionId t_xmax;        /* delete XID stamp */
> +    TransactionId t_xmin;        /* Xmin -- 4 bytes each */
> +    TransactionId t_cid;        /* Cmin, Cmax, Xvac */
> +    TransactionId t_xmax;        /* Xmax, Cmax */
>
>      ItemPointerData t_ctid;        /* current TID of this or newer tuple */
>
> @@ -75,7 +84,7 @@
>
>      uint8        t_hoff;            /* sizeof header incl. bitmap, padding */
>
> -    /* ^ - 31 bytes - ^ */
> +    /* ^ - 27 bytes - ^ */
>
>      bits8        t_bits[1];        /* bitmap of NULLs -- VARIABLE LENGTH */
>
> @@ -96,6 +105,8 @@
>                                           * attribute(s) */
>  #define HEAP_HASEXTENDED        0x000C    /* the two above combined */
>
> +#define HEAP_XMIN_IS_XMAX        0x0040    /* created and deleted in the */
> +                                        /* same transaction
> */
>  #define HEAP_XMAX_UNLOGGED        0x0080    /* to lock tuple for update */
>                                          /* without logging
> */
>  #define HEAP_XMIN_COMMITTED        0x0100    /* t_xmin committed */
> @@ -108,6 +119,7 @@
>                                           * vacuum */
>  #define HEAP_MOVED_IN            0x8000    /* moved from another place by
>                                           * vacuum */
> +#define HEAP_MOVED (HEAP_MOVED_OFF | HEAP_MOVED_IN)
>
>  #define HEAP_XACT_MASK            0xFFF0    /* visibility-related bits */
>
> @@ -116,53 +128,100 @@
>  /* HeapTupleHeader accessor macros */
>
>  #define HeapTupleHeaderGetXmin(tup) \
> -    ((tup)->t_xmin)
> +( \
> +    (tup)->t_xmin \
> +)
>
>  #define HeapTupleHeaderGetXmax(tup) \
> -    ((tup)->t_xmax)
> +( \
> +    ((tup)->t_infomask & HEAP_XMIN_IS_XMAX) ? \
> +        (tup)->t_xmin \
> +    : \
> +        (tup)->t_xmax \
> +)
>
> -/* no AssertMacro, because this is read as a system-defined attribute also */
> +/* no AssertMacro, because this is read as a system-defined attribute */
>  #define HeapTupleHeaderGetCmin(tup) \
>  ( \
> -    (tup)->t_cmin \
> +    ((tup)->t_infomask & HEAP_MOVED) ? \
> +        FirstCommandId \
> +    : \
> +    ( \
> +        ((tup)->t_infomask & (HEAP_XMIN_IS_XMAX | HEAP_XMAX_INVALID)) ? \
> +            (CommandId) (tup)->t_cid \
> +        : \
> +            FirstCommandId \
> +    ) \
>  )
>
>  #define HeapTupleHeaderGetCmax(tup) \
> -    ((tup)->t_cmax)
> +( \
> +    ((tup)->t_infomask & HEAP_MOVED) ? \
> +        FirstCommandId \
> +    : \
> +    ( \
> +        ((tup)->t_infomask & (HEAP_XMIN_IS_XMAX | HEAP_XMAX_INVALID)) ? \
> +            (CommandId) (tup)->t_xmax \
> +        : \
> +            (CommandId) (tup)->t_cid \
> +    ) \
> +)
>
>  #define HeapTupleHeaderGetXvac(tup) \
>  ( \
> -    AssertMacro((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF)), \
> -    (TransactionId) (tup)->t_cmin \
> +    AssertMacro((tup)->t_infomask & HEAP_MOVED), \
> +    (tup)->t_cid \
>  )
>
>
>  #define HeapTupleHeaderSetXmin(tup, xid) \
> -    (TransactionIdStore((xid), &(tup)->t_xmin))
> +( \
> +    TransactionIdStore((xid), &(tup)->t_xmin) \
> +)
>
>  #define HeapTupleHeaderSetXminInvalid(tup) \
> -    (StoreInvalidTransactionId(&(tup)->t_xmin))
> +do { \
> +    (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
> +    StoreInvalidTransactionId(&(tup)->t_xmin); \
> +} while (0)
>
>  #define HeapTupleHeaderSetXmax(tup, xid) \
> -    (TransactionIdStore((xid), &(tup)->t_xmax))
> +do { \
> +    if (TransactionIdEquals((tup)->t_xmin, (xid))) \
> +        (tup)->t_infomask |= HEAP_XMIN_IS_XMAX; \
> +    else \
> +    { \
> +        (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
> +        TransactionIdStore((xid), &(tup)->t_xmax); \
> +    } \
> +} while (0)
>
>  #define HeapTupleHeaderSetXmaxInvalid(tup) \
> -    (StoreInvalidTransactionId(&(tup)->t_xmax))
> +do { \
> +    (tup)->t_infomask &= ~HEAP_XMIN_IS_XMAX; \
> +    StoreInvalidTransactionId(&(tup)->t_xmax); \
> +} while (0)
>
>  #define HeapTupleHeaderSetCmin(tup, cid) \
> -( \
> -    AssertMacro(!((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF))), \
> -    (tup)->t_cmin = (cid) \
> -)
> +do { \
> +    Assert(!((tup)->t_infomask & HEAP_MOVED)); \
> +    TransactionIdStore((TransactionId) (cid), &(tup)->t_cid); \
> +} while (0)
>
>  #define HeapTupleHeaderSetCmax(tup, cid) \
> -    ((tup)->t_cmax = (cid))
> +do { \
> +    Assert(!((tup)->t_infomask & HEAP_MOVED)); \
> +    if ((tup)->t_infomask & HEAP_XMIN_IS_XMAX) \
> +        TransactionIdStore((TransactionId) (cid), &(tup)->t_xmax); \
> +    else \
> +        TransactionIdStore((TransactionId) (cid), &(tup)->t_cid); \
> +} while (0)
>
>  #define HeapTupleHeaderSetXvac(tup, xid) \
> -( \
> -    AssertMacro((tup)->t_infomask & (HEAP_MOVED_IN | HEAP_MOVED_OFF)), \
> -    TransactionIdStore((xid), (TransactionId *) &((tup)->t_cmin)) \
> -)
> +do { \
> +    Assert((tup)->t_infomask & HEAP_MOVED); \
> +    TransactionIdStore((xid), &(tup)->t_cid); \
> +} while (0)
>
>
>  /*
>
>
> ---------------------------(end of broadcast)---------------------------
> TIP 3: if posting/reading through Usenet, please send an appropriate
> subscribe-nomail command to majordomo@postgresql.org so that your
> message can get through to the mailing list cleanly
>

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026