diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 7412df0..2e2aa38 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -6387,8 +6387,17 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); TM does not include trailing blanks. + + + + + to_timestamp and to_date ignore - the TM modifier. + the case when receiving names as an input. For example, either + to_timestamp('2000-JUN', 'YYYY-MON') or + to_timestamp('2000-Jun', 'YYYY-MON') or + to_timestamp('2000-JUN', 'YYYY-mon') work and return + the same output. diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index b3115e4..90c707b 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -174,6 +174,13 @@ typedef struct #define CLOCK_24_HOUR 0 #define CLOCK_12_HOUR 1 +/* ---------- + * Dimension for seq_search. + * ---------- + */ +#define MONTHS_DIM 12 +#define DAYS_DIM 7 +#define AD_AM_DIM 4 /* ---------- * Full months @@ -181,11 +188,11 @@ typedef struct */ static const char *const months_full[] = { "January", "February", "March", "April", "May", "June", "July", - "August", "September", "October", "November", "December", NULL + "August", "September", "October", "November", "December" }; static const char *const days_short[] = { - "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; /* ---------- @@ -217,8 +224,8 @@ static const char *const days_short[] = { * matches for BC have an odd index. So the boolean value for BC is given by * taking the array index of the match, modulo 2. */ -static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL}; -static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL}; +static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR}; +static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR}; /* ---------- * AM / PM @@ -244,8 +251,8 @@ static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_S * matches for PM have an odd index. So the boolean value for PM is given by * taking the array index of the match, modulo 2. */ -static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL}; -static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL}; +static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR}; +static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR}; /* ---------- * Months in roman-numeral @@ -254,10 +261,10 @@ static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_S * ---------- */ static const char *const rm_months_upper[] = -{"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL}; +{"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I"}; static const char *const rm_months_lower[] = -{"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL}; +{"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i"}; /* ---------- * Roman numbers @@ -975,7 +982,7 @@ static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, static void DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid); -static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out); +static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out, Oid collid); #ifdef DEBUG_TO_FROM_CHAR static void dump_index(const KeyWord *k, const int *index); @@ -990,10 +997,11 @@ static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode); static void from_char_set_int(int *dest, const int value, const FormatNode *node); static int from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node); static int from_char_parse_int(int *dest, char **src, FormatNode *node); -static int seq_search(char *name, const char *const *array, int type, int max, int *len); -static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node); +static int seq_search(char *name, const char *const *array, int type, int max, int dim, int *len, Oid collid); +static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, int dim, + FormatNode *node, Oid collid); static void do_to_timestamp(text *date_txt, text *fmt, - struct pg_tm *tm, fsec_t *fsec); + struct pg_tm *tm, fsec_t *fsec, Oid collid); static char *fill_str(char *str, int c, int max); static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); static char *int_to_roman(int number); @@ -2326,13 +2334,17 @@ from_char_parse_int(int *dest, char **src, FormatNode *node) * ---------- */ static int -seq_search(char *name, const char *const *array, int type, int max, int *len) +seq_search(char *name, const char *const *array, int type, int max, int dim, int *len, Oid collid) { const char *p; const char *const *a; char *n; int last, i; + int index; + int mb_max; + char *initcap_name; + const char *localized_name; *len = 0; @@ -2345,25 +2357,37 @@ seq_search(char *name, const char *const *array, int type, int max, int *len) else if (type == ALL_LOWER) *name = pg_tolower((unsigned char) *name); - for (last = 0, a = array; *a != NULL; a++) + for (last = 0, a = array, index = 0; index < dim; a++, index++) { + /* Do not make assumptions about localized names case and length */ + if (lc_ctype_is_c(collid)) + { + localized_name = *a; + mb_max = max; + } + else + { + initcap_name = str_initcap_z(*a, collid); + localized_name = initcap_name; + mb_max = max * MAX_MULTIBYTE_CHAR_LEN; + } /* compare first chars */ - if (*name != **a) + if (*name != *localized_name) continue; - for (i = 1, p = *a + 1, n = name + 1;; n++, p++, i++) + for (i = 1, p = localized_name + 1, n = name + 1;; n++, p++, i++) { /* search fragment (max) only */ - if (max && i == max) + if (mb_max && i == mb_max) { *len = i; - return a - array; + return index; } /* full size */ if (*p == '\0') { *len = i; - return a - array; + return index; } /* Not found in array 'a' */ if (*n == '\0') @@ -2383,7 +2407,7 @@ seq_search(char *name, const char *const *array, int type, int max, int *len) #ifdef DEBUG_TO_FROM_CHAR elog(DEBUG_elog_output, "N: %c, P: %c, A: %s (%s)", - *n, *p, *a, name); + *n, *p, localized_name, name); #endif if (*n != *p) break; @@ -2404,12 +2428,12 @@ seq_search(char *name, const char *const *array, int type, int max, int *len) * If the string doesn't match, throw an error. */ static int -from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, - FormatNode *node) +from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, int dim, + FormatNode *node, Oid collid) { int len; - *dest = seq_search(*src, array, type, max, &len); + *dest = seq_search(*src, array, type, max, dim, &len, collid); if (len <= 0) { char copy[DCH_MAX_ITEM_SIZ + 1]; @@ -3014,7 +3038,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col * ---------- */ static void -DCH_from_char(FormatNode *node, char *in, TmFromChar *out) +DCH_from_char(FormatNode *node, char *in, TmFromChar *out, Oid collid) { FormatNode *n; char *s; @@ -3024,6 +3048,8 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) /* number of extra skipped characters (more than given in format string) */ int extra_skip = 0; + /* cache localized days and months */ + cache_locale_time(); for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) { @@ -3109,7 +3135,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) case DCH_a_m: case DCH_p_m: from_char_seq_search(&value, &s, ampm_strings_long, - ALL_UPPER, n->key->len, n); + ALL_UPPER, n->key->len, AD_AM_DIM, n, C_COLLATION_OID); from_char_set_int(&out->pm, value % 2, n); out->clock = CLOCK_12_HOUR; break; @@ -3118,7 +3144,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) case DCH_am: case DCH_pm: from_char_seq_search(&value, &s, ampm_strings, - ALL_UPPER, n->key->len, n); + ALL_UPPER, n->key->len, AD_AM_DIM, n, C_COLLATION_OID); from_char_set_int(&out->pm, value % 2, n); out->clock = CLOCK_12_HOUR; break; @@ -3209,7 +3235,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) case DCH_a_d: case DCH_b_c: from_char_seq_search(&value, &s, adbc_strings_long, - ALL_UPPER, n->key->len, n); + ALL_UPPER, n->key->len, AD_AM_DIM, n, C_COLLATION_OID); from_char_set_int(&out->bc, value % 2, n); break; case DCH_AD: @@ -3217,21 +3243,29 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) case DCH_ad: case DCH_bc: from_char_seq_search(&value, &s, adbc_strings, - ALL_UPPER, n->key->len, n); + ALL_UPPER, n->key->len, AD_AM_DIM, n, C_COLLATION_OID); from_char_set_int(&out->bc, value % 2, n); break; case DCH_MONTH: case DCH_Month: case DCH_month: - from_char_seq_search(&value, &s, months_full, ONE_UPPER, - MAX_MONTH_LEN, n); + if (S_TM(n->suffix)) + from_char_seq_search(&value, &s, (const char * const*)localized_full_months, ONE_UPPER, + MAX_MONTH_LEN, MONTHS_DIM, n, collid); + else + from_char_seq_search(&value, &s, months_full, ONE_UPPER, + MAX_MONTH_LEN, MONTHS_DIM, n, C_COLLATION_OID); from_char_set_int(&out->mm, value + 1, n); break; case DCH_MON: case DCH_Mon: case DCH_mon: - from_char_seq_search(&value, &s, months, ONE_UPPER, - MAX_MON_LEN, n); + if (S_TM(n->suffix)) + from_char_seq_search(&value, &s, (const char * const*)localized_abbrev_months, ONE_UPPER, + MAX_MON_LEN, MONTHS_DIM, n, collid); + else + from_char_seq_search(&value, &s, months, ONE_UPPER, + MAX_MON_LEN, MONTHS_DIM, n, C_COLLATION_OID); from_char_set_int(&out->mm, value + 1, n); break; case DCH_MM: @@ -3241,16 +3275,24 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) case DCH_DAY: case DCH_Day: case DCH_day: - from_char_seq_search(&value, &s, days, ONE_UPPER, - MAX_DAY_LEN, n); + if (S_TM(n->suffix)) + from_char_seq_search(&value, &s, (const char * const*)localized_full_days, ONE_UPPER, + MAX_DAY_LEN, DAYS_DIM, n, collid); + else + from_char_seq_search(&value, &s, days, ONE_UPPER, + MAX_DAY_LEN, DAYS_DIM, n, C_COLLATION_OID); from_char_set_int(&out->d, value, n); out->d++; break; case DCH_DY: case DCH_Dy: case DCH_dy: - from_char_seq_search(&value, &s, days, ONE_UPPER, - MAX_DY_LEN, n); + if (S_TM(n->suffix)) + from_char_seq_search(&value, &s, (const char * const*)localized_abbrev_days, ONE_UPPER, + MAX_DY_LEN, DAYS_DIM, n, collid); + else + from_char_seq_search(&value, &s, days_short, ONE_UPPER, + MAX_DY_LEN, DAYS_DIM, n, C_COLLATION_OID); from_char_set_int(&out->d, value, n); out->d++; break; @@ -3349,12 +3391,12 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) break; case DCH_RM: from_char_seq_search(&value, &s, rm_months_upper, - ALL_UPPER, MAX_RM_LEN, n); + ALL_UPPER, MAX_RM_LEN, MONTHS_DIM, n, C_COLLATION_OID); from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n); break; case DCH_rm: from_char_seq_search(&value, &s, rm_months_lower, - ALL_LOWER, MAX_RM_LEN, n); + ALL_LOWER, MAX_RM_LEN, MONTHS_DIM, n, C_COLLATION_OID); from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n); break; case DCH_W: @@ -3687,7 +3729,7 @@ to_timestamp(PG_FUNCTION_ARGS) struct pg_tm tm; fsec_t fsec; - do_to_timestamp(date_txt, fmt, &tm, &fsec); + do_to_timestamp(date_txt, fmt, &tm, &fsec, PG_GET_COLLATION()); /* Use the specified time zone, if any. */ if (tm.tm_zone) @@ -3722,7 +3764,7 @@ to_date(PG_FUNCTION_ARGS) struct pg_tm tm; fsec_t fsec; - do_to_timestamp(date_txt, fmt, &tm, &fsec); + do_to_timestamp(date_txt, fmt, &tm, &fsec, PG_GET_COLLATION()); /* Prevent overflow in Julian-day routines */ if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) @@ -3758,7 +3800,7 @@ to_date(PG_FUNCTION_ARGS) */ static void do_to_timestamp(text *date_txt, text *fmt, - struct pg_tm *tm, fsec_t *fsec) + struct pg_tm *tm, fsec_t *fsec, Oid collid) { FormatNode *format; TmFromChar tmfc; @@ -3807,11 +3849,11 @@ do_to_timestamp(text *date_txt, text *fmt, } #ifdef DEBUG_TO_FROM_CHAR - /* dump_node(format, fmt_len); */ - /* dump_index(DCH_keywords, DCH_index); */ + dump_node(format, fmt_len); + dump_index(DCH_keywords, DCH_index); #endif - DCH_from_char(format, date_str, &tmfc); + DCH_from_char(format, date_str, &tmfc, collid); pfree(fmt_str); if (!incache) diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index ad56ff9..1e0161b 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -461,6 +461,18 @@ SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR"); 01 NİS 2010 (1 row) +-- to_date +SET DateStyle='ISO, YMD'; +SET TimeZone='UTC'; +SELECT to_timestamp('01 ŞUB 2010', 'DD TMMON YYYY'); + to_timestamp +------------------------ + 2010-02-01 00:00:00+00 +(1 row) + +SELECT to_timestamp('1234567890ab 2010', 'TMMONTH YYYY'); -- fail +ERROR: invalid value "123456789" for "MONTH" +DETAIL: The given value did not match any of the allowed values for this field. -- backwards parsing CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index eac2f90..1c734d2 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -182,6 +182,13 @@ SELECT to_char(date '2010-02-01', 'DD TMMON YYYY' COLLATE "tr_TR"); SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR"); +-- to_date + +SET DateStyle='ISO, YMD'; +SET TimeZone='UTC'; +SELECT to_timestamp('01 ŞUB 2010', 'DD TMMON YYYY'); +SELECT to_timestamp('1234567890ab 2010', 'TMMONTH YYYY'); -- fail + -- backwards parsing