diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 471fbb7ee6..ce259efed9 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -122,9 +122,83 @@ pg_strtoint16_safe(const char *s, Node *escontext) const char *firstdigit; uint16 tmp = 0; bool neg = false; + unsigned char digit; + + /* + * The majority of cases are likely to be base-10 digits without any + * underscore separator characters. We'll first try to parse the string with + * the assumption that's the case and only fallback on a slower + * implementation which handles hex, octal and binary strings and + * underscores if the fastpath version cannot parse the string. + */ + + /* leave it up to the slow path to look for leading spaces */ + + if (*ptr == '-') + { + ptr++; + neg = true; + } + + /* a leading '+' is uncommon so leave that for the slow path */ + + /* process the first digit */ + digit = (*ptr - '0'); + + /* + * Exploit unsigned arithmetic to save having to check both the upper and + * lower bounds of the digit. + */ + if (likely(digit < 10)) + { + ptr++; + tmp = digit; + } + + /* process remaining digits */ + for (;;) + { + digit = (*ptr - '0'); + + if (digit >= 10) + break; + + ptr++; + + if (unlikely(tmp > -(PG_INT16_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + digit; + } + + /* we need at least one digit */ + if (unlikely(ptr == s)) + goto slow; + + /* when the string does not end in a digit, let the slow path handle it */ + if (unlikely(*ptr != '\0')) + goto slow; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)) + goto out_of_range; + return -((int16) tmp); + } + + if (unlikely(tmp > PG_INT16_MAX)) + goto out_of_range; + + return (int16) tmp; + +slow: + tmp = 0; + ptr = s; + /* no need to reset neg */ /* skip leading spaces */ - while (likely(*ptr) && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; /* handle sign */ @@ -141,7 +215,7 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (isxdigit((unsigned char) *ptr)) { @@ -165,7 +239,7 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '7') { @@ -189,7 +263,7 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '1') { @@ -213,9 +287,9 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr; - while (*ptr) + for (;;) { - if (isdigit((unsigned char) *ptr)) + if (*ptr >= '0' && *ptr <= '9') { if (unlikely(tmp > -(PG_INT16_MIN / 10))) goto out_of_range; @@ -242,7 +316,7 @@ pg_strtoint16_safe(const char *s, Node *escontext) goto invalid_syntax; /* allow trailing whitespace, but not other trailing chars */ - while (*ptr != '\0' && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; if (unlikely(*ptr != '\0')) @@ -300,9 +374,83 @@ pg_strtoint32_safe(const char *s, Node *escontext) const char *firstdigit; uint32 tmp = 0; bool neg = false; + unsigned char digit; + + /* + * The majority of cases are likely to be base-10 digits without any + * underscore separator characters. We'll first try to parse the string with + * the assumption that's the case and only fallback on a slower + * implementation which handles hex, octal and binary strings and + * underscores if the fastpath version cannot parse the string. + */ + + /* leave it up to the slow path to look for leading spaces */ + + if (*ptr == '-') + { + ptr++; + neg = true; + } + + /* a leading '+' is uncommon so leave that for the slow path */ + + /* process the first digit */ + digit = (*ptr - '0'); + + /* + * Exploit unsigned arithmetic to save having to check both the upper and + * lower bounds of the digit. + */ + if (likely(digit < 10)) + { + ptr++; + tmp = digit; + } + + /* process remaining digits */ + for (;;) + { + digit = (*ptr - '0'); + + if (digit >= 10) + break; + + ptr++; + + if (unlikely(tmp > -(PG_INT32_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + digit; + } + + /* we need at least one digit */ + if (unlikely(ptr == s)) + goto slow; + + /* when the string does not end in a digit, let the slow path handle it */ + if (unlikely(*ptr != '\0')) + goto slow; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)) + goto out_of_range; + return -((int32) tmp); + } + + if (unlikely(tmp > PG_INT32_MAX)) + goto out_of_range; + + return (int32) tmp; + +slow: + tmp = 0; + ptr = s; + /* no need to reset neg */ /* skip leading spaces */ - while (likely(*ptr) && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; /* handle sign */ @@ -319,7 +467,7 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (isxdigit((unsigned char) *ptr)) { @@ -343,7 +491,7 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '7') { @@ -367,7 +515,7 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '1') { @@ -391,9 +539,9 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr; - while (*ptr) + for (;;) { - if (isdigit((unsigned char) *ptr)) + if (*ptr >= '0' && *ptr <= '9') { if (unlikely(tmp > -(PG_INT32_MIN / 10))) goto out_of_range; @@ -420,7 +568,7 @@ pg_strtoint32_safe(const char *s, Node *escontext) goto invalid_syntax; /* allow trailing whitespace, but not other trailing chars */ - while (*ptr != '\0' && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; if (unlikely(*ptr != '\0')) @@ -478,9 +626,83 @@ pg_strtoint64_safe(const char *s, Node *escontext) const char *firstdigit; uint64 tmp = 0; bool neg = false; + unsigned char digit; + + /* + * The majority of cases are likely to be base-10 digits without any + * underscore separator characters. We'll first try to parse the string with + * the assumption that's the case and only fallback on a slower + * implementation which handles hex, octal and binary strings and + * underscores if the fastpath version cannot parse the string. + */ + + /* leave it up to the slow path to look for leading spaces */ + + if (*ptr == '-') + { + ptr++; + neg = true; + } + + /* a leading '+' is uncommon so leave that for the slow path */ + + /* process the first digit */ + digit = (*ptr - '0'); + + /* + * Exploit unsigned arithmetic to save having to check both the upper and + * lower bounds of the digit. + */ + if (likely(digit < 10)) + { + ptr++; + tmp = digit; + } + + /* process remaining digits */ + for (;;) + { + digit = (*ptr - '0'); + + if (digit >= 10) + break; + + ptr++; + + if (unlikely(tmp > -(PG_INT64_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + digit; + } + + /* we need at least one digit */ + if (unlikely(ptr == s)) + goto slow; + + /* when the string does not end in a digit, let the slow path handle it */ + if (unlikely(*ptr != '\0')) + goto slow; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)) + goto out_of_range; + return -((int64) tmp); + } + + if (unlikely(tmp > PG_INT64_MAX)) + goto out_of_range; + + return (int64) tmp; + +slow: + tmp = 0; + ptr = s; + /* no need to reset neg */ /* skip leading spaces */ - while (*ptr && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; /* handle sign */ @@ -497,7 +719,7 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (isxdigit((unsigned char) *ptr)) { @@ -521,7 +743,7 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '7') { @@ -545,7 +767,7 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '1') { @@ -569,9 +791,9 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr; - while (*ptr) + for (;;) { - if (isdigit((unsigned char) *ptr)) + if (*ptr >= '0' && *ptr <= '9') { if (unlikely(tmp > -(PG_INT64_MIN / 10))) goto out_of_range; @@ -598,7 +820,7 @@ pg_strtoint64_safe(const char *s, Node *escontext) goto invalid_syntax; /* allow trailing whitespace, but not other trailing chars */ - while (*ptr != '\0' && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; if (unlikely(*ptr != '\0'))