Re: Date-Time dangling unit fix - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Date-Time dangling unit fix
Date
Msg-id 703560.1678407991@sss.pgh.pa.us
Whole thread Raw
In response to Re: Date-Time dangling unit fix  (Joseph Koshakow <koshy44@gmail.com>)
Responses Re: Date-Time dangling unit fix  (Alexander Lakhin <exclusion@gmail.com>)
List pgsql-hackers
Joseph Koshakow <koshy44@gmail.com> writes:
> Also I removed some dead code from the previous patch.

This needs a rebase over bcc704b52, so I did that and made a
couple of other adjustments.

I'm inclined to think that you removed too much from DecodeTimeOnly.
That does accept date specs (at least for timetz), and I see no very
good reason for it not to accept a Julian date spec.  I also wonder
why you removed the UNITS: case there.  Seems like we want these
functions to accept the same syntax as much as possible.

I think the code is still a bit schizophrenic about placement of
ptype specs.  In the UNITS: case, we don't insist that a unit
apply to exactly the very next field; instead it applies to the next
one where it disambiguates.  So for instead this is accepted:

regression=# select 'J PM 1234567 1:23'::timestamp;
       timestamp        
------------------------
 1333-01-11 13:23:00 BC

That's a little weird, or maybe even a lot weird, but it's not
inherently nonsensical so I'm hesitant to stop accepting it.
However, if UNITS acts that way, then why is ISOTIME different?
So I'm inclined to remove ISOTIME's lookahead check

                        if (i >= nf - 1 ||
                            (ftype[i + 1] != DTK_NUMBER &&
                             ftype[i + 1] != DTK_TIME &&
                             ftype[i + 1] != DTK_DATE))
                            return DTERR_BAD_FORMAT;

and rely on the ptype-still-set error at the bottom of the loop
to complain about nonsensical cases.

Also, if we do keep the lookahead checks, the one in DecodeTimeOnly
could be simplified --- it's accepting some cases that actually
aren't supported there.

            regards, tom lane

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index a7558d39a0..13856b06d1 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -983,7 +983,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
     int            fmask = 0,
                 tmask,
                 type;
-    int            ptype = 0;        /* "prefix type" for ISO y2001m02d04 format */
+    int            ptype = 0;        /* "prefix type" for ISO and Julian formats */
     int            i;
     int            val;
     int            dterr;
@@ -1071,6 +1071,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
                     {
                         char       *cp;

+                        /*
+                         * Allow a preceding "t" field, but no other units.
+                         */
                         if (ptype != 0)
                         {
                             /* Sanity check; should not fail this test */
@@ -1175,8 +1178,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
             case DTK_NUMBER:

                 /*
-                 * Was this an "ISO date" with embedded field labels? An
-                 * example is "y2001m02d04" - thomas 2001-02-04
+                 * Deal with cases where previous field labeled this one
                  */
                 if (ptype != 0)
                 {
@@ -1187,85 +1189,11 @@ DecodeDateTime(char **field, int *ftype, int nf,
                     value = strtoint(field[i], &cp, 10);
                     if (errno == ERANGE)
                         return DTERR_FIELD_OVERFLOW;
-
-                    /*
-                     * only a few kinds are allowed to have an embedded
-                     * decimal
-                     */
-                    if (*cp == '.')
-                        switch (ptype)
-                        {
-                            case DTK_JULIAN:
-                            case DTK_TIME:
-                            case DTK_SECOND:
-                                break;
-                            default:
-                                return DTERR_BAD_FORMAT;
-                                break;
-                        }
-                    else if (*cp != '\0')
+                    if (*cp != '.' && *cp != '\0')
                         return DTERR_BAD_FORMAT;

                     switch (ptype)
                     {
-                        case DTK_YEAR:
-                            tm->tm_year = value;
-                            tmask = DTK_M(YEAR);
-                            break;
-
-                        case DTK_MONTH:
-
-                            /*
-                             * already have a month and hour? then assume
-                             * minutes
-                             */
-                            if ((fmask & DTK_M(MONTH)) != 0 &&
-                                (fmask & DTK_M(HOUR)) != 0)
-                            {
-                                tm->tm_min = value;
-                                tmask = DTK_M(MINUTE);
-                            }
-                            else
-                            {
-                                tm->tm_mon = value;
-                                tmask = DTK_M(MONTH);
-                            }
-                            break;
-
-                        case DTK_DAY:
-                            tm->tm_mday = value;
-                            tmask = DTK_M(DAY);
-                            break;
-
-                        case DTK_HOUR:
-                            tm->tm_hour = value;
-                            tmask = DTK_M(HOUR);
-                            break;
-
-                        case DTK_MINUTE:
-                            tm->tm_min = value;
-                            tmask = DTK_M(MINUTE);
-                            break;
-
-                        case DTK_SECOND:
-                            tm->tm_sec = value;
-                            tmask = DTK_M(SECOND);
-                            if (*cp == '.')
-                            {
-                                dterr = ParseFractionalSecond(cp, fsec);
-                                if (dterr)
-                                    return dterr;
-                                tmask = DTK_ALL_SECS_M;
-                            }
-                            break;
-
-                        case DTK_TZ:
-                            tmask = DTK_M(TZ);
-                            dterr = DecodeTimezone(field[i], tzp);
-                            if (dterr)
-                                return dterr;
-                            break;
-
                         case DTK_JULIAN:
                             /* previous field was a label for "julian date" */
                             if (value < 0)
@@ -1519,6 +1447,9 @@ DecodeDateTime(char **field, int *ftype, int nf,

                     case UNITS:
                         tmask = 0;
+                        /* reject consecutive unhandled units */
+                        if (ptype != 0)
+                            return DTERR_BAD_FORMAT;
                         ptype = val;
                         break;

@@ -1576,6 +1507,10 @@ DecodeDateTime(char **field, int *ftype, int nf,
         fmask |= tmask;
     }                            /* end loop over fields */

+    /* reject if prefix type appeared and was never handled */
+    if (ptype != 0)
+        return DTERR_BAD_FORMAT;
+
     /* do additional checking for normal date specs (but not "infinity" etc) */
     if (*dtype == DTK_DATE)
     {
@@ -1943,7 +1878,7 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
     int            fmask = 0,
                 tmask,
                 type;
-    int            ptype = 0;        /* "prefix type" for ISO h04mm05s06 format */
+    int            ptype = 0;        /* "prefix type" for ISO format */
     int            i;
     int            val;
     int            dterr;
@@ -2070,133 +2005,12 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
             case DTK_NUMBER:

                 /*
-                 * Was this an "ISO time" with embedded field labels? An
-                 * example is "h04mm05s06" - thomas 2001-02-04
+                 * Was this an "ISO time", for example "T040506.789"?
                  */
                 if (ptype != 0)
                 {
-                    char       *cp;
-                    int            value;
-
-                    /* Only accept a date under limited circumstances */
                     switch (ptype)
                     {
-                        case DTK_JULIAN:
-                        case DTK_YEAR:
-                        case DTK_MONTH:
-                        case DTK_DAY:
-                            if (tzp == NULL)
-                                return DTERR_BAD_FORMAT;
-                        default:
-                            break;
-                    }
-
-                    errno = 0;
-                    value = strtoint(field[i], &cp, 10);
-                    if (errno == ERANGE)
-                        return DTERR_FIELD_OVERFLOW;
-
-                    /*
-                     * only a few kinds are allowed to have an embedded
-                     * decimal
-                     */
-                    if (*cp == '.')
-                        switch (ptype)
-                        {
-                            case DTK_JULIAN:
-                            case DTK_TIME:
-                            case DTK_SECOND:
-                                break;
-                            default:
-                                return DTERR_BAD_FORMAT;
-                                break;
-                        }
-                    else if (*cp != '\0')
-                        return DTERR_BAD_FORMAT;
-
-                    switch (ptype)
-                    {
-                        case DTK_YEAR:
-                            tm->tm_year = value;
-                            tmask = DTK_M(YEAR);
-                            break;
-
-                        case DTK_MONTH:
-
-                            /*
-                             * already have a month and hour? then assume
-                             * minutes
-                             */
-                            if ((fmask & DTK_M(MONTH)) != 0 &&
-                                (fmask & DTK_M(HOUR)) != 0)
-                            {
-                                tm->tm_min = value;
-                                tmask = DTK_M(MINUTE);
-                            }
-                            else
-                            {
-                                tm->tm_mon = value;
-                                tmask = DTK_M(MONTH);
-                            }
-                            break;
-
-                        case DTK_DAY:
-                            tm->tm_mday = value;
-                            tmask = DTK_M(DAY);
-                            break;
-
-                        case DTK_HOUR:
-                            tm->tm_hour = value;
-                            tmask = DTK_M(HOUR);
-                            break;
-
-                        case DTK_MINUTE:
-                            tm->tm_min = value;
-                            tmask = DTK_M(MINUTE);
-                            break;
-
-                        case DTK_SECOND:
-                            tm->tm_sec = value;
-                            tmask = DTK_M(SECOND);
-                            if (*cp == '.')
-                            {
-                                dterr = ParseFractionalSecond(cp, fsec);
-                                if (dterr)
-                                    return dterr;
-                                tmask = DTK_ALL_SECS_M;
-                            }
-                            break;
-
-                        case DTK_TZ:
-                            tmask = DTK_M(TZ);
-                            dterr = DecodeTimezone(field[i], tzp);
-                            if (dterr)
-                                return dterr;
-                            break;
-
-                        case DTK_JULIAN:
-                            /* previous field was a label for "julian date" */
-                            if (value < 0)
-                                return DTERR_FIELD_OVERFLOW;
-                            tmask = DTK_DATE_M;
-                            j2date(value, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
-                            isjulian = true;
-
-                            if (*cp == '.')
-                            {
-                                double        time;
-
-                                dterr = ParseFraction(cp, &time);
-                                if (dterr)
-                                    return dterr;
-                                time *= USECS_PER_DAY;
-                                dt2time(time,
-                                        &tm->tm_hour, &tm->tm_min,
-                                        &tm->tm_sec, fsec);
-                                tmask |= DTK_TIME_M;
-                            }
-                            break;
-
                         case DTK_TIME:
                             /* previous field was "t" for ISO time */
                             dterr = DecodeNumberField(strlen(field[i]), field[i],
@@ -2376,11 +2190,6 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
                         bc = (val == BC);
                         break;

-                    case UNITS:
-                        tmask = 0;
-                        ptype = val;
-                        break;
-
                     case ISOTIME:
                         tmask = 0;

@@ -2426,6 +2235,10 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
         fmask |= tmask;
     }                            /* end loop over fields */

+    /* reject if prefix type appeared and was never handled */
+    if (ptype != 0)
+        return DTERR_BAD_FORMAT;
+
     /* do final checking/adjustment of Y/M/D fields */
     dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
     if (dterr)
diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out
index 4f01131077..d87d0daad8 100644
--- a/src/test/regress/expected/horology.out
+++ b/src/test/regress/expected/horology.out
@@ -97,30 +97,6 @@ SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 (1 row)

 reset datestyle;
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
-           timestamptz
-----------------------------------
- Wed Dec 26 12:05:06.789 2001 PST
-(1 row)
-
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789-08';
-           timestamptz
-----------------------------------
- Thu Dec 27 04:05:06.789 2001 PST
-(1 row)
-
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789+08';
-           timestamptz
-----------------------------------
- Wed Dec 26 12:05:06.789 2001 PST
-(1 row)
-
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
-           timestamptz
-----------------------------------
- Thu Dec 27 04:05:06.789 2001 PST
-(1 row)
-
 SELECT timestamp with time zone 'J2452271+08';
          timestamptz
 ------------------------------
@@ -283,6 +259,25 @@ SELECT date 'J0' AS "Julian Epoch";
  11-24-4714 BC
 (1 row)

+-- test error on dangling Julian units
+SELECT date '1995-08-06  J J J';
+ERROR:  invalid input syntax for type date: "1995-08-06  J J J"
+LINE 1: SELECT date '1995-08-06  J J J';
+                    ^
+SELECT date 'J J 1520447';
+ERROR:  invalid input syntax for type date: "J J 1520447"
+LINE 1: SELECT date 'J J 1520447';
+                    ^
+-- We used to accept this input style, but it was based on a misreading
+-- of ISO8601, and it was never documented anyway
+SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
+ERROR:  invalid input syntax for type timestamp with time zone: "Y2001M12D27H04M05S06.789+08"
+LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08...
+                                        ^
+SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
+ERROR:  invalid input syntax for type timestamp with time zone: "Y2001M12D27H04MM05S06.789-08"
+LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-0...
+                                        ^
 -- conflicting fields should throw errors
 SELECT date '1995-08-06 epoch';
 ERROR:  invalid input syntax for type date: "1995-08-06 epoch"
diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql
index 0676cac5d1..474f92dcd9 100644
--- a/src/test/regress/sql/horology.sql
+++ b/src/test/regress/sql/horology.sql
@@ -25,10 +25,6 @@ SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 set datestyle to dmy;
 SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 reset datestyle;
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789-08';
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789+08';
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
 SELECT timestamp with time zone 'J2452271+08';
 SELECT timestamp with time zone 'J2452271-08';
 SELECT timestamp with time zone 'J2452271.5+08';
@@ -62,6 +58,15 @@ SET DateStyle = 'Postgres, MDY';
 SELECT date 'J1520447' AS "Confucius' Birthday";
 SELECT date 'J0' AS "Julian Epoch";

+-- test error on dangling Julian units
+SELECT date '1995-08-06  J J J';
+SELECT date 'J J 1520447';
+
+-- We used to accept this input style, but it was based on a misreading
+-- of ISO8601, and it was never documented anyway
+SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
+SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
+
 -- conflicting fields should throw errors
 SELECT date '1995-08-06 epoch';
 SELECT date '1995-08-06 infinity';

pgsql-hackers by date:

Previous
From: Thomas Munro
Date:
Subject: Re: Sub-millisecond [autovacuum_]vacuum_cost_delay broken
Next
From: Tom Lane
Date:
Subject: Re: Sub-millisecond [autovacuum_]vacuum_cost_delay broken