commit 968a44c4f65ddc9bbad4e79156440814758301a4 Author: Alexander Korotkov Date: Mon Aug 26 04:40:24 2019 +0300 Standard datetime parsing mode Reported-by: Bug: Discussion: Author: Reviewed-by: Tested-by: Backpatch-through: diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 9ebcdad3cf9..142d9d2f678 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -99,11 +99,12 @@ #include "utils/pg_locale.h" /* ---------- - * Routines type + * Routines flags * ---------- */ -#define DCH_TYPE 1 /* DATE-TIME version */ -#define NUM_TYPE 2 /* NUMBER version */ +#define DCH_FLAG 0x1 /* DATE-TIME flag */ +#define NUM_FLAG 0x2 /* NUMBER flag */ +#define STD_FLAG 0x4 /* STANDARD flag */ /* ---------- * KeyWord Index (ascii from position 32 (' ') to 126 (~)) @@ -384,6 +385,7 @@ typedef struct { FormatNode format[DCH_CACHE_SIZE + 1]; char str[DCH_CACHE_SIZE + 1]; + bool std; bool valid; int age; } DCHCacheEntry; @@ -1008,11 +1010,12 @@ static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int t static bool is_separator_char(const char *str); static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, - const KeySuffix *suf, const int *index, int ver, NUMDesc *Num); + const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num); static void DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid); -static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out); +static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out, + bool std); #ifdef DEBUG_TO_FROM_CHAR static void dump_index(const KeyWord *k, const int *index); @@ -1030,7 +1033,7 @@ static int from_char_parse_int_len(int *dest, char **src, const int len, FormatN static int from_char_parse_int(int *dest, char **src, FormatNode *node); static int seq_search(char *name, const char *const *array, int type, int max, int *len); static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node); -static void do_to_timestamp(text *date_txt, text *fmt, +static void do_to_timestamp(text *date_txt, text *fmt, bool std, struct pg_tm *tm, fsec_t *fsec, int *fprec); static char *fill_str(char *str, int c, int max); static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); @@ -1042,9 +1045,9 @@ static void NUM_numpart_to_char(NUMProc *Np, int id); static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number, int input_len, int to_char_out_pre_spaces, int sign, bool is_to_char, Oid collid); -static DCHCacheEntry *DCH_cache_getnew(const char *str); -static DCHCacheEntry *DCH_cache_search(const char *str); -static DCHCacheEntry *DCH_cache_fetch(const char *str); +static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std); +static DCHCacheEntry *DCH_cache_search(const char *str, bool std); +static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std); static NUMCacheEntry *NUM_cache_getnew(const char *str); static NUMCacheEntry *NUM_cache_search(const char *str); static NUMCacheEntry *NUM_cache_fetch(const char *str); @@ -1287,7 +1290,7 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n) */ static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, - const KeySuffix *suf, const int *index, int ver, NUMDesc *Num) + const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num) { FormatNode *n; @@ -1305,7 +1308,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, /* * Prefix */ - if (ver == DCH_TYPE && + if ((flags & DCH_FLAG) && (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL) { suffix |= s->id; @@ -1326,13 +1329,13 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, /* * NUM version: Prepare global NUMDesc struct */ - if (ver == NUM_TYPE) + if (flags & NUM_FLAG) NUMDesc_prepare(Num, n); /* * Postfix */ - if (ver == DCH_TYPE && *str && + if ((flags & DCH_FLAG) && *str && (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL) { n->suffix |= s->id; @@ -1346,11 +1349,35 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, { int chlen; - /* - * Process double-quoted literal string, if any - */ - if (*str == '"') + if (flags & STD_FLAG) + { + /* + * Standard mode, allow only following separators: + * "-./,':; " + */ + if (strchr("-./,':; ", *str) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid datetime format separator: \"%s\"", + pnstrdup(str, pg_mblen(str))))); + + if (*str == ' ') + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_SEPARATOR; + + n->character[0] = *str; + n->character[1] = '\0'; + n->key = NULL; + n->suffix = 0; + n++; + str++; + } + else if (*str == '"') { + /* + * Process double-quoted literal string, if any + */ str++; while (*str) { @@ -1382,7 +1409,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, str++; chlen = pg_mblen(str); - if (ver == DCH_TYPE && is_separator_char(str)) + if ((flags & DCH_FLAG) && is_separator_char(str)) n->type = NODE_TYPE_SEPARATOR; else if (isspace((unsigned char) *str)) n->type = NODE_TYPE_SPACE; @@ -3080,13 +3107,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col * ---------- */ static void -DCH_from_char(FormatNode *node, char *in, TmFromChar *out) +DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std) { FormatNode *n; char *s; int len, value; - bool fx_mode = false; + bool fx_mode = std; /* number of extra skipped characters (more than given in format string) */ int extra_skip = 0; @@ -3109,7 +3136,23 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) { - if (!fx_mode) + if (std) + { + /* + * Standard mode requires strict matching between format string + * separators/spaces and input string. + */ + Assert(n->character[0] && !n->character[1]); + + if (*s == n->character[0]) + s++; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unmatched format separator \"%c\"", + n->character[0]))); + } + else if (!fx_mode) { /* * In non FX (fixed format) mode one format string space or @@ -3461,6 +3504,27 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) } } } + + /* + * Standard parsing mode doesn't allow unmatched format patterns or + * trailing characters in the input string. + */ + if (std) + { + if (n->type != NODE_TYPE_END) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("input string is too short for datetime format"))); + + while (*s != '\0' && isspace((unsigned char) *s)) + s++; + + if (*s != '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("trailing characters remain in input string after " + "datetime format"))); + } } /* @@ -3483,7 +3547,7 @@ DCH_prevent_counter_overflow(void) /* select a DCHCacheEntry to hold the given format picture */ static DCHCacheEntry * -DCH_cache_getnew(const char *str) +DCH_cache_getnew(const char *str, bool std) { DCHCacheEntry *ent; @@ -3533,6 +3597,7 @@ DCH_cache_getnew(const char *str) MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry)); ent->valid = false; StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1); + ent->std = std; ent->age = (++DCHCounter); /* caller is expected to fill format, then set valid */ ++n_DCHCache; @@ -3542,7 +3607,7 @@ DCH_cache_getnew(const char *str) /* look for an existing DCHCacheEntry matching the given format picture */ static DCHCacheEntry * -DCH_cache_search(const char *str) +DCH_cache_search(const char *str, bool std) { /* Ensure we can advance DCHCounter below */ DCH_prevent_counter_overflow(); @@ -3551,7 +3616,7 @@ DCH_cache_search(const char *str) { DCHCacheEntry *ent = DCHCache[i]; - if (ent->valid && strcmp(ent->str, str) == 0) + if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std) { ent->age = (++DCHCounter); return ent; @@ -3563,21 +3628,21 @@ DCH_cache_search(const char *str) /* Find or create a DCHCacheEntry for the given format picture */ static DCHCacheEntry * -DCH_cache_fetch(const char *str) +DCH_cache_fetch(const char *str, bool std) { DCHCacheEntry *ent; - if ((ent = DCH_cache_search(str)) == NULL) + if ((ent = DCH_cache_search(str, std)) == NULL) { /* * Not in the cache, must run parser and save a new format-picture to * the cache. Do not mark the cache entry valid until parsing * succeeds. */ - ent = DCH_cache_getnew(str); + ent = DCH_cache_getnew(str, std); - parse_format(ent->format, str, DCH_keywords, - DCH_suff, DCH_index, DCH_TYPE, NULL); + parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index, + DCH_FLAG | (std ? STD_FLAG : 0), NULL); ent->valid = true; } @@ -3622,14 +3687,14 @@ datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid) format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); parse_format(format, fmt_str, DCH_keywords, - DCH_suff, DCH_index, DCH_TYPE, NULL); + DCH_suff, DCH_index, DCH_FLAG, NULL); } else { /* * Use cache buffers */ - DCHCacheEntry *ent = DCH_cache_fetch(fmt_str); + DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false); incache = true; format = ent->format; @@ -3771,7 +3836,7 @@ to_timestamp(PG_FUNCTION_ARGS) fsec_t fsec; int fprec; - do_to_timestamp(date_txt, fmt, &tm, &fsec, &fprec); + do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec); /* Use the specified time zone, if any. */ if (tm.tm_zone) @@ -3810,7 +3875,7 @@ to_date(PG_FUNCTION_ARGS) struct pg_tm tm; fsec_t fsec; - do_to_timestamp(date_txt, fmt, &tm, &fsec, NULL); + do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL); /* Prevent overflow in Julian-day routines */ if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) @@ -3845,7 +3910,7 @@ to_date(PG_FUNCTION_ARGS) * struct 'tm' and 'fsec'. */ static void -do_to_timestamp(text *date_txt, text *fmt, +do_to_timestamp(text *date_txt, text *fmt, bool std, struct pg_tm *tm, fsec_t *fsec, int *fprec) { FormatNode *format; @@ -3880,15 +3945,15 @@ do_to_timestamp(text *date_txt, text *fmt, format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); - parse_format(format, fmt_str, DCH_keywords, - DCH_suff, DCH_index, DCH_TYPE, NULL); + parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index, + DCH_FLAG | (std ? STD_FLAG : 0), NULL); } else { /* * Use cache buffers */ - DCHCacheEntry *ent = DCH_cache_fetch(fmt_str); + DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std); incache = true; format = ent->format; @@ -3899,7 +3964,7 @@ do_to_timestamp(text *date_txt, text *fmt, /* dump_index(DCH_keywords, DCH_index); */ #endif - DCH_from_char(format, date_str, &tmfc); + DCH_from_char(format, date_str, &tmfc, std); pfree(fmt_str); @@ -4268,7 +4333,7 @@ NUM_cache_fetch(const char *str) zeroize_NUM(&ent->Num); parse_format(ent->format, str, NUM_keywords, - NULL, NUM_index, NUM_TYPE, &ent->Num); + NULL, NUM_index, NUM_FLAG, &ent->Num); ent->valid = true; } @@ -4300,7 +4365,7 @@ NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree) zeroize_NUM(Num); parse_format(format, str, NUM_keywords, - NULL, NUM_index, NUM_TYPE, Num); + NULL, NUM_index, NUM_FLAG, Num); } else {