Thread: Re: [HACKERS] bytea, index and like operator again and detailed report

Re: [HACKERS] bytea, index and like operator again and detailed report

From
Joe Conway
Date:
Alvar Freude wrote:
> -- Joe Conway <mail@joeconway.com> wrote:
>>Please try the attached patch and let me know how it works for you. It is
>>against cvs HEAD, but should apply OK to 7.4.
>
> so, I checked it with my database.
> It looks good, all checks I made are OK.

The attached fixes the bytea-like bug found by Alvar -- as well as some
others I found while working on it. I would like to apply this evening
so that it can be in the 7.4.1 release. Any objections?

Thanks,

Joe
Index: src/backend/utils/adt/selfuncs.c
===================================================================
RCS file: /opt/src/cvs/pgsql-server/src/backend/utils/adt/selfuncs.c,v
retrieving revision 1.149
diff -c -r1.149 selfuncs.c
*** src/backend/utils/adt/selfuncs.c    29 Nov 2003 19:51:59 -0000    1.149
--- src/backend/utils/adt/selfuncs.c    5 Dec 2003 19:42:39 -0000
***************
*** 184,189 ****
--- 184,190 ----
  static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
  static Datum string_to_datum(const char *str, Oid datatype);
  static Const *string_to_const(const char *str, Oid datatype);
+ static Const *string_to_bytea_const(const char *str, size_t str_len);


  /*
***************
*** 3135,3154 ****
      }
      else
      {
!         patt = DatumGetCString(DirectFunctionCall1(byteaout, patt_const->constvalue));
!         pattlen = toast_raw_datum_size(patt_const->constvalue) - VARHDRSZ;
      }

      match = palloc(pattlen + 1);
      match_pos = 0;
-
      for (pos = 0; pos < pattlen; pos++)
      {
          /* % and _ are wildcard characters in LIKE */
          if (patt[pos] == '%' ||
              patt[pos] == '_')
              break;
!         /* Backslash quotes the next character */
          if (patt[pos] == '\\')
          {
              pos++;
--- 3136,3166 ----
      }
      else
      {
!         bytea   *bstr = DatumGetByteaP(patt_const->constvalue);
!
!         pattlen = VARSIZE(bstr) - VARHDRSZ;
!         if (pattlen > 0)
!         {
!             patt = (char *) palloc(pattlen);
!             memcpy(patt, VARDATA(bstr), pattlen);
!         }
!         else
!             patt = NULL;
!
!         if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
!             pfree(bstr);
      }

      match = palloc(pattlen + 1);
      match_pos = 0;
      for (pos = 0; pos < pattlen; pos++)
      {
          /* % and _ are wildcard characters in LIKE */
          if (patt[pos] == '%' ||
              patt[pos] == '_')
              break;
!
!         /* Backslash escapes the next character */
          if (patt[pos] == '\\')
          {
              pos++;
***************
*** 3174,3183 ****
      match[match_pos] = '\0';
      rest = &patt[pos];

!     *prefix_const = string_to_const(match, typeid);
!     *rest_const = string_to_const(rest, typeid);

!     pfree(patt);
      pfree(match);

      /* in LIKE, an empty pattern is an exact match! */
--- 3186,3204 ----
      match[match_pos] = '\0';
      rest = &patt[pos];

!     if (typeid != BYTEAOID)
!     {
!         *prefix_const = string_to_const(match, typeid);
!         *rest_const = string_to_const(rest, typeid);
!     }
!     else
!     {
!         *prefix_const = string_to_bytea_const(match, match_pos);
!         *rest_const = string_to_bytea_const(rest, pattlen - match_pos);
!     }

!     if (patt != NULL)
!         pfree(patt);
      pfree(match);

      /* in LIKE, an empty pattern is an exact match! */
***************
*** 3500,3508 ****
      }
      else
      {
!         patt = DatumGetCString(DirectFunctionCall1(byteaout, patt_const->constvalue));
!         pattlen = toast_raw_datum_size(patt_const->constvalue) - VARHDRSZ;
      }

      /* Skip any leading %; it's already factored into initial sel */
      start = (*patt == '%') ? 1 : 0;
--- 3521,3542 ----
      }
      else
      {
!         bytea   *bstr = DatumGetByteaP(patt_const->constvalue);
!
!         pattlen = VARSIZE(bstr) - VARHDRSZ;
!         if (pattlen > 0)
!         {
!             patt = (char *) palloc(pattlen);
!             memcpy(patt, VARDATA(bstr), pattlen);
!         }
!         else
!             patt = NULL;
!
!         if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
!             pfree(bstr);
      }
+     /* patt should never be NULL in practice */
+     Assert(patt != NULL);

      /* Skip any leading %; it's already factored into initial sel */
      start = (*patt == '%') ? 1 : 0;
***************
*** 3693,3700 ****

  /*
   * Try to generate a string greater than the given string or any
!  * string it is a prefix of.  If successful, return a palloc'd string;
!  * else return NULL.
   *
   * The key requirement here is that given a prefix string, say "foo",
   * we must be able to generate another string "fop" that is greater
--- 3727,3734 ----

  /*
   * Try to generate a string greater than the given string or any
!  * string it is a prefix of.  If successful, return a palloc'd string
!  * in the form of a Const pointer; else return NULL.
   *
   * The key requirement here is that given a prefix string, say "foo",
   * we must be able to generate another string "fop" that is greater
***************
*** 3712,3738 ****
  make_greater_string(const Const *str_const)
  {
      Oid            datatype = str_const->consttype;
-     char       *str;
      char       *workstr;
      int            len;

      /* Get the string and a modifiable copy */
      if (datatype == NAMEOID)
      {
!         str = DatumGetCString(DirectFunctionCall1(nameout, str_const->constvalue));
!         len = strlen(str);
      }
      else if (datatype == BYTEAOID)
      {
!         str = DatumGetCString(DirectFunctionCall1(byteaout, str_const->constvalue));
!         len = toast_raw_datum_size(str_const->constvalue) - VARHDRSZ;
      }
      else
      {
!         str = DatumGetCString(DirectFunctionCall1(textout, str_const->constvalue));
!         len = strlen(str);
      }
-     workstr = pstrdup(str);

      while (len > 0)
      {
--- 3746,3783 ----
  make_greater_string(const Const *str_const)
  {
      Oid            datatype = str_const->consttype;
      char       *workstr;
      int            len;

      /* Get the string and a modifiable copy */
      if (datatype == NAMEOID)
      {
!         workstr = DatumGetCString(DirectFunctionCall1(nameout,
!                                                       str_const->constvalue));
!         len = strlen(workstr);
      }
      else if (datatype == BYTEAOID)
      {
!         bytea   *bstr = DatumGetByteaP(str_const->constvalue);
!
!         len = VARSIZE(bstr) - VARHDRSZ;
!         if (len > 0)
!         {
!             workstr = (char *) palloc(len);
!             memcpy(workstr, VARDATA(bstr), len);
!         }
!         else
!             workstr = NULL;
!
!         if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
!             pfree(bstr);
      }
      else
      {
!         workstr = DatumGetCString(DirectFunctionCall1(textout,
!                                                       str_const->constvalue));
!         len = strlen(workstr);
      }

      while (len > 0)
      {
***************
*** 3747,3755 ****
              Const       *workstr_const;

              (*lastchar)++;
!             workstr_const = string_to_const(workstr, datatype);

-             pfree(str);
              pfree(workstr);
              return workstr_const;
          }
--- 3792,3802 ----
              Const       *workstr_const;

              (*lastchar)++;
!             if (datatype != BYTEAOID)
!                 workstr_const = string_to_const(workstr, datatype);
!             else
!                 workstr_const = string_to_bytea_const(workstr, len);

              pfree(workstr);
              return workstr_const;
          }
***************
*** 3771,3778 ****
      }

      /* Failed... */
!     pfree(str);
!     pfree(workstr);

      return (Const *) NULL;
  }
--- 3818,3825 ----
      }

      /* Failed... */
!     if (workstr != NULL)
!         pfree(workstr);

      return (Const *) NULL;
  }
***************
*** 3809,3814 ****
--- 3856,3877 ----

      return makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
                       conval, false, false);
+ }
+
+ /*
+  * Generate a Const node of bytea type from a binary C string and a length.
+  */
+ static Const *
+ string_to_bytea_const(const char *str, size_t str_len)
+ {
+     bytea  *bstr = palloc(VARHDRSZ + str_len);
+     Datum    conval;
+
+     memcpy(VARDATA(bstr), str, str_len);
+     VARATT_SIZEP(bstr) = VARHDRSZ + str_len;
+     conval = PointerGetDatum(bstr);
+
+     return makeConst(BYTEAOID, -1, conval, false, false);
  }

  /*-------------------------------------------------------------------------

Re: [HACKERS] bytea, index and like operator again and detailed report

From
Tom Lane
Date:
Joe Conway <mail@joeconway.com> writes:
> !         *prefix_const = string_to_bytea_const(match, match_pos);
> !         *rest_const = string_to_bytea_const(rest, pattlen - match_pos);

I think that should be pattlen - pos not pattlen - match_pos, no?

Otherwise it looks reasonable ...

            regards, tom lane

Re: [HACKERS] bytea, index and like operator again and

From
Joe Conway
Date:
Tom Lane wrote:
> Joe Conway <mail@joeconway.com> writes:
>>!         *prefix_const = string_to_bytea_const(match, match_pos);
>>!         *rest_const = string_to_bytea_const(rest, pattlen - match_pos);
>
> I think that should be pattlen - pos not pattlen - match_pos, no?
>

Yup -- you're right. Thanks for the review! Fixed and committed.

Joe