diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 7412df0..2e2aa38 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -6387,8 +6387,17 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); TM does not include trailing blanks. + + + + + to_timestamp and to_date ignore - the TM modifier. + the case when receiving names as an input. For example, either + to_timestamp('2000-JUN', 'YYYY-MON') or + to_timestamp('2000-Jun', 'YYYY-MON') or + to_timestamp('2000-JUN', 'YYYY-mon') work and return + the same output. diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index b3115e4..9d6ab1a 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -174,6 +174,13 @@ typedef struct #define CLOCK_24_HOUR 0 #define CLOCK_12_HOUR 1 +/* ---------- + * Dimension for seq_search. + * ---------- + */ +#define MONTHS_DIM 12 +#define DAYS_DIM 7 +#define AD_AM_DIM 4 /* ---------- * Full months @@ -181,11 +188,11 @@ typedef struct */ static const char *const months_full[] = { "January", "February", "March", "April", "May", "June", "July", - "August", "September", "October", "November", "December", NULL + "August", "September", "October", "November", "December" }; static const char *const days_short[] = { - "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; /* ---------- @@ -217,8 +224,8 @@ static const char *const days_short[] = { * matches for BC have an odd index. So the boolean value for BC is given by * taking the array index of the match, modulo 2. */ -static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL}; -static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL}; +static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR}; +static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR}; /* ---------- * AM / PM @@ -244,8 +251,8 @@ static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_S * matches for PM have an odd index. So the boolean value for PM is given by * taking the array index of the match, modulo 2. */ -static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL}; -static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL}; +static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR}; +static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR}; /* ---------- * Months in roman-numeral @@ -254,10 +261,10 @@ static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_S * ---------- */ static const char *const rm_months_upper[] = -{"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL}; +{"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I"}; static const char *const rm_months_lower[] = -{"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL}; +{"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i"}; /* ---------- * Roman numbers @@ -975,7 +982,7 @@ static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, static void DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid); -static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out); +static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out, Oid collid); #ifdef DEBUG_TO_FROM_CHAR static void dump_index(const KeyWord *k, const int *index); @@ -990,10 +997,13 @@ static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode); static void from_char_set_int(int *dest, const int value, const FormatNode *node); static int from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node); static int from_char_parse_int(int *dest, char **src, FormatNode *node); -static int seq_search(char *name, const char *const *array, int type, int max, int *len); -static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node); +static bool str_compare(char *name, const char *element, int type, int max, int *len, int *last); +static int seq_search_sqlascii(char *name, const char *const *array, int type, int max, int *len, int dim); +static int seq_search_localized(char *name, char **array, int type, int max, int *len, int dim, Oid collid); +static int from_char_seq_search(int *dest, char **src, const char *const *array, char **localized_array, int type, int max, + FormatNode *node, int dim, Oid collid); static void do_to_timestamp(text *date_txt, text *fmt, - struct pg_tm *tm, fsec_t *fsec); + struct pg_tm *tm, fsec_t *fsec, Oid collid); static char *fill_str(char *str, int c, int max); static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); static char *int_to_roman(int number); @@ -2322,17 +2332,66 @@ from_char_parse_int(int *dest, char **src, FormatNode *node) } /* ---------- - * Sequential search with to upper/lower conversion + * Compares 'name' with array 'element' applying to upper/lower conversion, + * utility function for seq_search * ---------- */ -static int -seq_search(char *name, const char *const *array, int type, int max, int *len) +static bool +str_compare(char *name, const char *element, int type, int max, int *len, int *last) { const char *p; - const char *const *a; char *n; - int last, - i; + int i; + + for (i = 1, p = element + 1, n = name + 1;; n++, p++, i++) + { + /* search fragment (max) only */ + if (max && i == max) + { + *len = i; + return true; + } + /* full size */ + if (*p == '\0') + { + *len = i; + return true; + } + /* Not found in array 'a' */ + if (*n == '\0') + return false; + + /* + * Convert (but convert new chars only) + */ + if (i > *last) + { + if (type == ONE_UPPER || type == ALL_LOWER) + *n = pg_tolower((unsigned char) *n); + else if (type == ALL_UPPER) + *n = pg_toupper((unsigned char) *n); + *last = i; + } + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "N: %c, P: %c, A: %s (%s)", + *n, *p, element, name); +#endif + if (*n != *p) + return false; + } +} + +/* ---------- + * Sequential search with to upper/lower conversion for SQL_ASCII array input + * ---------- + */ +static int +seq_search_sqlascii(char *name, const char *const *array, int type, int max, int *len, int dim) +{ + const char *const *a; + int last; + int index; *len = 0; @@ -2345,49 +2404,51 @@ seq_search(char *name, const char *const *array, int type, int max, int *len) else if (type == ALL_LOWER) *name = pg_tolower((unsigned char) *name); - for (last = 0, a = array; *a != NULL; a++) + for (last = 0, a = array, index = 0; index < dim; a++, index++) { /* compare first chars */ if (*name != **a) continue; + if (str_compare(name, *a, type, max, len, &last)) + return index; + } - for (i = 1, p = *a + 1, n = name + 1;; n++, p++, i++) - { - /* search fragment (max) only */ - if (max && i == max) - { - *len = i; - return a - array; - } - /* full size */ - if (*p == '\0') - { - *len = i; - return a - array; - } - /* Not found in array 'a' */ - if (*n == '\0') - break; + return -1; +} - /* - * Convert (but convert new chars only) - */ - if (i > last) - { - if (type == ONE_UPPER || type == ALL_LOWER) - *n = pg_tolower((unsigned char) *n); - else if (type == ALL_UPPER) - *n = pg_toupper((unsigned char) *n); - last = i; - } +/* ---------- + * Sequential search with to upper/lower conversion for localized array input + * ---------- + */ +static int +seq_search_localized(char *name, char **array, int type, int max, int *len, int dim, Oid collid) +{ + char **a; + char *initcap; + int last; + int index; + int mb_max; -#ifdef DEBUG_TO_FROM_CHAR - elog(DEBUG_elog_output, "N: %c, P: %c, A: %s (%s)", - *n, *p, *a, name); -#endif - if (*n != *p) - break; - } + *len = 0; + + if (!*name) + return -1; + + /* set first char */ + if (type == ONE_UPPER || type == ALL_UPPER) + *name = pg_toupper((unsigned char) *name); + else if (type == ALL_LOWER) + *name = pg_tolower((unsigned char) *name); + + mb_max = max * pg_database_encoding_max_length(); + for (last = 0, a = array, index = 0; index < dim; a++, index++) + { + /* compare first chars */ + if (*name != **a) + continue; + initcap = str_initcap_z(*a, collid); + if (str_compare(name, initcap, type, mb_max, len, &last)) + return index; } return -1; @@ -2404,16 +2465,20 @@ seq_search(char *name, const char *const *array, int type, int max, int *len) * If the string doesn't match, throw an error. */ static int -from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, - FormatNode *node) +from_char_seq_search(int *dest, char **src, const char *const *array, char **localized_array, int type, int max, + FormatNode *node, int dim, Oid collid) { int len; - *dest = seq_search(*src, array, type, max, &len); + if (localized_array == NULL) + *dest = seq_search_sqlascii(*src, array, type, max, &len, dim); + else + *dest = seq_search_localized(*src, localized_array, type, max, &len, dim, collid); if (len <= 0) { char copy[DCH_MAX_ITEM_SIZ + 1]; + /* We use byte length, localized names encoding is ignored */ Assert(max <= DCH_MAX_ITEM_SIZ); strlcpy(copy, *src, max + 1); @@ -3014,19 +3079,24 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col * ---------- */ static void -DCH_from_char(FormatNode *node, char *in, TmFromChar *out) +DCH_from_char(FormatNode *node, char *in, TmFromChar *out, Oid collid) { FormatNode *n; char *s; int len, value; bool fx_mode = false; + char **localized_names; /* number of extra skipped characters (more than given in format string) */ int extra_skip = 0; + /* cache localized days and months */ + cache_locale_time(); for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) { + localized_names = NULL; + /* * Ignore spaces at the beginning of the string and before fields when * not in FX (fixed width) mode. @@ -3108,8 +3178,8 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) case DCH_P_M: case DCH_a_m: case DCH_p_m: - from_char_seq_search(&value, &s, ampm_strings_long, - ALL_UPPER, n->key->len, n); + from_char_seq_search(&value, &s, ampm_strings_long, localized_names, + ALL_UPPER, n->key->len, n, AD_AM_DIM, collid); from_char_set_int(&out->pm, value % 2, n); out->clock = CLOCK_12_HOUR; break; @@ -3117,8 +3187,8 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) case DCH_PM: case DCH_am: case DCH_pm: - from_char_seq_search(&value, &s, ampm_strings, - ALL_UPPER, n->key->len, n); + from_char_seq_search(&value, &s, ampm_strings, localized_names, + ALL_UPPER, n->key->len, n, AD_AM_DIM, collid); from_char_set_int(&out->pm, value % 2, n); out->clock = CLOCK_12_HOUR; break; @@ -3208,30 +3278,34 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) case DCH_B_C: case DCH_a_d: case DCH_b_c: - from_char_seq_search(&value, &s, adbc_strings_long, - ALL_UPPER, n->key->len, n); + from_char_seq_search(&value, &s, adbc_strings_long, localized_names, + ALL_UPPER, n->key->len, n, AD_AM_DIM, collid); from_char_set_int(&out->bc, value % 2, n); break; case DCH_AD: case DCH_BC: case DCH_ad: case DCH_bc: - from_char_seq_search(&value, &s, adbc_strings, - ALL_UPPER, n->key->len, n); + from_char_seq_search(&value, &s, adbc_strings, localized_names, + ALL_UPPER, n->key->len, n, AD_AM_DIM, collid); from_char_set_int(&out->bc, value % 2, n); break; case DCH_MONTH: case DCH_Month: case DCH_month: - from_char_seq_search(&value, &s, months_full, ONE_UPPER, - MAX_MONTH_LEN, n); + if (S_TM(n->suffix)) + localized_names = localized_full_months; + from_char_seq_search(&value, &s, months_full, localized_names, + ONE_UPPER, MAX_MONTH_LEN, n, MONTHS_DIM, collid); from_char_set_int(&out->mm, value + 1, n); break; case DCH_MON: case DCH_Mon: case DCH_mon: - from_char_seq_search(&value, &s, months, ONE_UPPER, - MAX_MON_LEN, n); + if (S_TM(n->suffix)) + localized_names = localized_abbrev_months; + from_char_seq_search(&value, &s, months, localized_names, + ONE_UPPER, MAX_MON_LEN, n, MONTHS_DIM, collid); from_char_set_int(&out->mm, value + 1, n); break; case DCH_MM: @@ -3241,16 +3315,20 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) case DCH_DAY: case DCH_Day: case DCH_day: - from_char_seq_search(&value, &s, days, ONE_UPPER, - MAX_DAY_LEN, n); + if (S_TM(n->suffix)) + localized_names = localized_full_days; + from_char_seq_search(&value, &s, days, localized_names, + ONE_UPPER, MAX_DAY_LEN, n, DAYS_DIM, collid); from_char_set_int(&out->d, value, n); out->d++; break; case DCH_DY: case DCH_Dy: case DCH_dy: - from_char_seq_search(&value, &s, days, ONE_UPPER, - MAX_DY_LEN, n); + if (S_TM(n->suffix)) + localized_names = localized_abbrev_days; + from_char_seq_search(&value, &s, days_short, localized_names, + ONE_UPPER, MAX_DY_LEN, n, DAYS_DIM, collid); from_char_set_int(&out->d, value, n); out->d++; break; @@ -3348,13 +3426,13 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) SKIP_THth(s, n->suffix); break; case DCH_RM: - from_char_seq_search(&value, &s, rm_months_upper, - ALL_UPPER, MAX_RM_LEN, n); + from_char_seq_search(&value, &s, rm_months_upper, localized_names, + ALL_UPPER, MAX_RM_LEN, n, MONTHS_DIM, collid); from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n); break; case DCH_rm: - from_char_seq_search(&value, &s, rm_months_lower, - ALL_LOWER, MAX_RM_LEN, n); + from_char_seq_search(&value, &s, rm_months_lower, localized_names, + ALL_LOWER, MAX_RM_LEN, n, MONTHS_DIM, collid); from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n); break; case DCH_W: @@ -3687,7 +3765,7 @@ to_timestamp(PG_FUNCTION_ARGS) struct pg_tm tm; fsec_t fsec; - do_to_timestamp(date_txt, fmt, &tm, &fsec); + do_to_timestamp(date_txt, fmt, &tm, &fsec, PG_GET_COLLATION()); /* Use the specified time zone, if any. */ if (tm.tm_zone) @@ -3722,7 +3800,7 @@ to_date(PG_FUNCTION_ARGS) struct pg_tm tm; fsec_t fsec; - do_to_timestamp(date_txt, fmt, &tm, &fsec); + do_to_timestamp(date_txt, fmt, &tm, &fsec, PG_GET_COLLATION()); /* Prevent overflow in Julian-day routines */ if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) @@ -3758,7 +3836,7 @@ to_date(PG_FUNCTION_ARGS) */ static void do_to_timestamp(text *date_txt, text *fmt, - struct pg_tm *tm, fsec_t *fsec) + struct pg_tm *tm, fsec_t *fsec, Oid collid) { FormatNode *format; TmFromChar tmfc; @@ -3807,11 +3885,11 @@ do_to_timestamp(text *date_txt, text *fmt, } #ifdef DEBUG_TO_FROM_CHAR - /* dump_node(format, fmt_len); */ - /* dump_index(DCH_keywords, DCH_index); */ + dump_node(format, fmt_len); + dump_index(DCH_keywords, DCH_index); #endif - DCH_from_char(format, date_str, &tmfc); + DCH_from_char(format, date_str, &tmfc, collid); pfree(fmt_str); if (!incache) diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index ad56ff9..1e0161b 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -461,6 +461,18 @@ SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR"); 01 NİS 2010 (1 row) +-- to_date +SET DateStyle='ISO, YMD'; +SET TimeZone='UTC'; +SELECT to_timestamp('01 ŞUB 2010', 'DD TMMON YYYY'); + to_timestamp +------------------------ + 2010-02-01 00:00:00+00 +(1 row) + +SELECT to_timestamp('1234567890ab 2010', 'TMMONTH YYYY'); -- fail +ERROR: invalid value "123456789" for "MONTH" +DETAIL: The given value did not match any of the allowed values for this field. -- backwards parsing CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index eac2f90..1c734d2 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -182,6 +182,13 @@ SELECT to_char(date '2010-02-01', 'DD TMMON YYYY' COLLATE "tr_TR"); SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR"); +-- to_date + +SET DateStyle='ISO, YMD'; +SET TimeZone='UTC'; +SELECT to_timestamp('01 ŞUB 2010', 'DD TMMON YYYY'); +SELECT to_timestamp('1234567890ab 2010', 'TMMONTH YYYY'); -- fail + -- backwards parsing