Re: Unexpected interval comparison - Mailing list pgsql-general

From Tom Lane
Subject Re: Unexpected interval comparison
Date
Msg-id 5084.1491343598@sss.pgh.pa.us
Whole thread Raw
In response to Re: Unexpected interval comparison  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: Unexpected interval comparison
List pgsql-general
Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp> writes:
> The first attached is the revised patch and the second is
> temporary sanity check code for non-128bit environment code. (but
> works only on 128 bit environment)

This seemed to me to be probably even less correct, so I extracted
the addition and multiplication logic into a standalone test program
(attached), which compares the result of a multiplication to that
of native int128 arithmetic.  I changed the order of the LinearInterval
fields to be LS-first so that I could overlay them onto an int128
result (on a little-endian machine); this is just for testing purposes
not something we must do in the finished code.  I soon found cases
where it indeed fails, eg

$ ./a.out 0x7ffffffff 0x7ffffffff
7FFFFFFFF * 7FFFFFFFF
result = 62 18446744004990074881
result = 3E FFFFFFF000000001
MISMATCH!
result = 63 18446744004990074881
result = 3F FFFFFFF000000001

After fooling with it for awhile, I decided that the cause of the
problems was basically not thinking carefully about the lower half
of the value being unsigned: that affects when to do carries in
the addition macro, and we also have to be careful about whether
or not to sign-extend the partial product terms.  The second
attached file is a version that I can't break anymore, though I'm
not quite sure it's bug-free.

            regards, tom lane

#include "postgres.h"


/*
 * LinearInterval's alternative defeinition for the environments without
 * int128 arithmetics. See interval_cmp_value for datails.
 */
typedef struct
{
    uint64    lo; /* holds the lower 64 bits without sign */
    int64    hi;    /* holds significant 64 bits including a sign bit */
} LinearInterval;

typedef union LI
{
    int128 i128;
    LinearInterval li;
} LI;


/*
 * arithmetic 32 bit extraction from int64
 *
 * INT64_AU32 extracts significant 32 bit of int64 as a int64, and INT64_AL32
 * extracts non-siginificant 32 bit as a int64. Both macros extends sign bits
 * according to the given value. The values of these macros and the parameter
 * value are in the following relationship.
 *
 * i64 = (int64)INT64_AU32(i64) * (2^32) + (int64)INT64_AL32(i64)
 */
#define INT64_AU32(i64) ((i64) / (1LL<<32))
#define INT64_AL32(i64) (((i64) & 0xffffffff) | ((i64) < 0 ? 0xffffffff00000000 : 0))

/*
 * Adds signed 65 bit integer into LinearInterval variable. If s is not zero,
 * its sign is used as v's sign.
 */
#define LINEARINTERVAL_ADD_INT65(li, v, s) \
{ \
    uint64 t = (uint64)(v); \
    uint64 p = (li).lo;    \
    (li).lo += t; \
    if (s < 0 || (s == 0 && v < 0))    \
        (li).hi --; \
    if ((li).lo < p) \
        (li).hi ++; \
}

static inline LinearInterval
interval_times(int64 x, int64 y)
{
    LinearInterval    span = {0, 0};
    int64     tmp;

    /*
     * perform 128 bit multiplication using 64 bit variables.
     *
     *   x * y = ((x.hi << 32) + x.lo) * (((y.hi << 32) + y.lo)
     *         = (x.hi * y.hi) << 64 +
     *           ((x.hi * y.lo) + (x.lo * y.hi)) << 32 +
     *           x.lo * y.lo
     */

    /* We don't bother calculation results in zero */
    if (x != 0 && y != 0)
    {
        int64 x_u32 = INT64_AU32(x);
        int64 x_l32 = INT64_AL32(x);

        /* the first term */
        span.hi = x_u32 * (y >> 32);

        /* the second term */
        tmp = x_l32 * (y >> 32)
            + x_u32 * (y & 0xffffffff);
        span.hi += INT64_AU32(tmp);

        /* this shift may push out MSB. supply it explicitly */
        LINEARINTERVAL_ADD_INT65(span, INT64_AL32(tmp) << 32, tmp);

        /* the third term */
        tmp = x_l32 * (y & 0xffffffff);
        LINEARINTERVAL_ADD_INT65(span, tmp, 0);
    }

    return span;
}

int
main(int argc, char **argv)
{
    int64 x = strtol(argv[1], NULL, 0);
    int64 y = strtol(argv[2], NULL, 0);
    LI li;
    LI li2;

    printf("%lX * %lX\n", x, y);

    li.li = interval_times(x, y);

    printf("result = %ld %lu\n", li.li.hi, li.li.lo);
    printf("result = %lX %lX\n", li.li.hi, li.li.lo);

    li2.i128 = (int128) x * (int128) y;

    if (li.li.hi != li2.li.hi || li.li.lo != li2.li.lo)
    {
        printf("MISMATCH!\n");
        printf("result = %ld %lu\n", li2.li.hi, li2.li.lo);
        printf("result = %lX %lX\n", li2.li.hi, li2.li.lo);
    }

    return 0;
}
#include "postgres.h"


/*
 * LinearInterval's alternative defeinition for the environments without
 * int128 arithmetics. See interval_cmp_value for datails.
 */
typedef struct
{
    uint64        lo;                /* holds the lower 64 bits without sign */
    int64        hi;                /* holds significant 64 bits including a sign
                                 * bit */
} LinearInterval;

typedef union LI
{
    int128        i128;
    LinearInterval li;
} LI;


/*
 * INT64_AU32 extracts the most significant 32 bits of int64 as int64, while
 * INT64_AL32 extracts the least significant 32 bits as uint64.
 */
#define INT64_AU32(i64) ((i64) >> 32)
#define INT64_AL32(i64) ((i64) & UINT64CONST(0xFFFFFFFF))

/*
 * Add an unsigned int64 value into a LinearInterval variable.
 * First add the value to the .lo part, then check to see if a carry
 * needs to be propagated into the .hi part.  A carry is needed if both
 * inputs have high bits set, or if just one input has high bit set
 * but the new .lo part doesn't.  Remember that .lo part is unsigned;
 * we cast to signed here just as a cheap way to check the high bit.
 */
#define LINEARINTERVAL_ADD_UINT64(li, v) \
do { \
    uint64        t = (uint64) (v); \
    uint64        oldlo = (li).lo; \
    (li).lo += t; \
    if (((int64) t < 0 && (int64) oldlo < 0) || \
        (((int64) t < 0 || (int64) oldlo < 0) && (int64) (li).lo >= 0)) \
        (li).hi++; \
} while (0)

static inline LinearInterval
interval_times(int64 x, int64 y)
{
    LinearInterval span = {0, 0};

    /*----------
     * Form the 128-bit product x * y using 64-bit arithmetic.
     * Considering each 64-bit input as having 32-bit high and low parts,
     * we can compute
     *
     *     x * y = ((x.hi << 32) + x.lo) * (((y.hi << 32) + y.lo)
     *           = (x.hi * y.hi) << 64 +
     *             (x.hi * y.lo) << 32 +
     *             (x.lo * y.hi) << 32 +
     *             x.lo * y.lo
     *
     * Each individual product is of 32-bit terms so it won't overflow when
     * computed in 64-bit arithmetic.  Then we just have to shift it to the
     * correct position while adding into the 128-bit result.  We must also
     * keep in mind that the "lo" parts must be treated as unsigned.
     *----------
     */

    /* INT64_AU32 must use arithmetic right shift */
    StaticAssertStmt(((int64) -1 >> 1) == (int64) -1,
                     "arithmetic right shift is needed");

    /* No need to work hard if product must be zero */
    if (x != 0 && y != 0)
    {
        int64        x_u32 = INT64_AU32(x);
        uint64        x_l32 = INT64_AL32(x);
        int64        y_u32 = INT64_AU32(y);
        uint64        y_l32 = INT64_AL32(y);
        int64        tmp;

        /* the first term */
        span.hi = x_u32 * y_u32;
        printf("first term  = %016lX\n", span.hi);

        /* the second term: sign-extend it only if x is negative */
        tmp = x_u32 * y_l32;
        printf("second term =         %016lX\n", tmp);
        if (x < 0)
            span.hi += INT64_AU32(tmp);
        else
            span.hi += ((uint64) tmp) >> 32;
        LINEARINTERVAL_ADD_UINT64(span, ((uint64) INT64_AL32(tmp)) << 32);
        printf("partial sum = %016lX%016lX\n", span.hi, span.lo);

        /* the third term: sign-extend it only if y is negative */
        tmp = x_l32 * y_u32;
        printf("third term  =         %016lX\n", tmp);
        if (y < 0)
            span.hi += INT64_AU32(tmp);
        else
            span.hi += ((uint64) tmp) >> 32;
        LINEARINTERVAL_ADD_UINT64(span, ((uint64) INT64_AL32(tmp)) << 32);
        printf("partial sum = %016lX%016lX\n", span.hi, span.lo);

        /* the fourth term: always unsigned */
        printf("fourth term =                 %016lX\n", x_l32 * y_l32);
        LINEARINTERVAL_ADD_UINT64(span, x_l32 * y_l32);
    }

    return span;
}

int
main(int argc, char **argv)
{
    int64        x = strtol(argv[1], NULL, 0);
    int64        y = strtol(argv[2], NULL, 0);
    LI            li;
    LI            li2;

    printf("%lX * %lX\n", x, y);

    li.li = interval_times(x, y);

    printf("result      = %016lX%016lX\n", li.li.hi, li.li.lo);
    printf("result = %ld %lu\n", li.li.hi, li.li.lo);

    li2.i128 = (int128) x * (int128) y;

    if (li.li.hi != li2.li.hi || li.li.lo != li2.li.lo)
    {
        printf("MISMATCH!\n");
        printf("result      = %016lX%016lX\n", li2.li.hi, li2.li.lo);
        printf("result = %ld %lu\n", li2.li.hi, li2.li.lo);
    }

    return 0;
}

pgsql-general by date:

Previous
From: "David G. Johnston"
Date:
Subject: Re: getting column names
Next
From: "Armand Pirvu (home)"
Date:
Subject: Re: getting column names