Re: [HACKERS] invalidly encoded strings - Mailing list pgsql-patches

From Andrew Dunstan
Subject Re: [HACKERS] invalidly encoded strings
Date
Msg-id 46EC8F83.1030503@dunslane.net
Whole thread Raw
In response to Re: [HACKERS] invalidly encoded strings  (Andrew Dunstan <andrew@dunslane.net>)
List pgsql-patches


and this time the patch is attached


Andrew Dunstan wrote:
>
>
> Tom Lane wrote:
>> What I think we'd need to have a complete solution is
>>
>> convert(text, name) returns bytea
>>     -- convert from DB encoding to arbitrary encoding
>>
>> convert(bytea, name, name) returns bytea
>>     -- convert between any two encodings
>>
>> convert(bytea, name) returns text
>>     -- convert from arbitrary encoding to DB encoding
>>
>> The second and third would need to do a verify step before
>> converting, of course.
>>
>>
>>
>
> Here's a patch that implements the above. It actually does the verify
> step for all three cases - if that bothers people I can remove it at
> the cost of a little code complexity.
>
> It also fixes the "convert ... using ..." case in a similar way (makes
> it return a bytea).
>
> On reflection I think we also need to provide length(bytea, name) as
> has been suggested, so we can check the length in the foreign encoding
> of a bytea we have converted this way. That shouldn't be too difficult
> to add.
>
> cheers
>
> andrew
>
Index: src/backend/catalog/pg_conversion.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/catalog/pg_conversion.c,v
retrieving revision 1.36
diff -c -r1.36 pg_conversion.c
*** src/backend/catalog/pg_conversion.c    27 Feb 2007 23:48:07 -0000    1.36
--- src/backend/catalog/pg_conversion.c    16 Sep 2007 01:43:24 -0000
***************
*** 282,288 ****
   * CONVERT <left paren> <character value expression>
   * USING <form-of-use conversion name> <right paren>
   *
!  * TEXT convert_using(TEXT string, TEXT conversion_name)
   */
  Datum
  pg_convert_using(PG_FUNCTION_ARGS)
--- 282,291 ----
   * CONVERT <left paren> <character value expression>
   * USING <form-of-use conversion name> <right paren>
   *
!  * BYTEA convert_using(TEXT string, TEXT conversion_name)
!  *
!  * bytea is returned so we don't give a value that is
!  * not valid in the database encoding.
   */
  Datum
  pg_convert_using(PG_FUNCTION_ARGS)
***************
*** 344,348 ****
      pfree(result);
      pfree(str);

!     PG_RETURN_TEXT_P(retval);
  }
--- 347,351 ----
      pfree(result);
      pfree(str);

!     PG_RETURN_BYTEA_P(retval);
  }
Index: src/backend/utils/mb/mbutils.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v
retrieving revision 1.63
diff -c -r1.63 mbutils.c
*** src/backend/utils/mb/mbutils.c    28 May 2007 16:43:24 -0000    1.63
--- src/backend/utils/mb/mbutils.c    16 Sep 2007 01:43:25 -0000
***************
*** 292,303 ****
  }

  /*
!  * Convert string using encoding_nanme. We assume that string's
!  * encoding is same as DB encoding.
   *
!  * TEXT convert(TEXT string, NAME encoding_name) */
  Datum
! pg_convert(PG_FUNCTION_ARGS)
  {
      Datum        string = PG_GETARG_DATUM(0);
      Datum        dest_encoding_name = PG_GETARG_DATUM(1);
--- 292,303 ----
  }

  /*
!  * Convert string using encoding_name. The source
!  * encoding is the DB encoding.
   *
!  * BYTEA convert(TEXT string, NAME encoding_name) */
  Datum
! pg_convert_from_db(PG_FUNCTION_ARGS)
  {
      Datum        string = PG_GETARG_DATUM(0);
      Datum        dest_encoding_name = PG_GETARG_DATUM(1);
***************
*** 306,312 ****
      Datum        result;

      result = DirectFunctionCall3(
!                  pg_convert2, string, src_encoding_name, dest_encoding_name);

      /* free memory allocated by namein */
      pfree((void *) src_encoding_name);
--- 306,335 ----
      Datum        result;

      result = DirectFunctionCall3(
!                  pg_convert, string, src_encoding_name, dest_encoding_name);
!
!     /* free memory allocated by namein */
!     pfree((void *) src_encoding_name);
!
!     PG_RETURN_BYTEA_P(result);
! }
!
! /*
!  * Convert string using encoding_name. The destination
!  * encoding is the DB encoding.
!  *
!  * TEXT convert(BYTEA string, NAME encoding_name) */
! Datum
! pg_convert_to_db(PG_FUNCTION_ARGS)
! {
!     Datum        string = PG_GETARG_DATUM(0);
!     Datum        src_encoding_name = PG_GETARG_DATUM(1);
!     Datum        dest_encoding_name = DirectFunctionCall1(
!                             namein, CStringGetDatum(DatabaseEncoding->name));
!     Datum        result;
!
!     result = DirectFunctionCall3(
!                  pg_convert, string, src_encoding_name, dest_encoding_name);

      /* free memory allocated by namein */
      pfree((void *) src_encoding_name);
***************
*** 315,334 ****
  }

  /*
!  * Convert string using encoding_name.
   *
!  * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
   */
  Datum
! pg_convert2(PG_FUNCTION_ARGS)
  {
!     text       *string = PG_GETARG_TEXT_P(0);
      char       *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
      int            src_encoding = pg_char_to_encoding(src_encoding_name);
      char       *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
      int            dest_encoding = pg_char_to_encoding(dest_encoding_name);
      unsigned char *result;
!     text       *retval;
      unsigned char *str;
      int            len;

--- 338,357 ----
  }

  /*
!  * Convert string using encoding_names.
   *
!  * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
   */
  Datum
! pg_convert(PG_FUNCTION_ARGS)
  {
!     bytea       *string = PG_GETARG_TEXT_P(0);
      char       *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
      int            src_encoding = pg_char_to_encoding(src_encoding_name);
      char       *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
      int            dest_encoding = pg_char_to_encoding(dest_encoding_name);
      unsigned char *result;
!     bytea       *retval;
      unsigned char *str;
      int            len;

***************
*** 343,350 ****
                   errmsg("invalid destination encoding name \"%s\"",
                          dest_encoding_name)));

!     /* make sure that source string is null terminated */
      len = VARSIZE(string) - VARHDRSZ;
      str = palloc(len + 1);
      memcpy(str, VARDATA(string), len);
      *(str + len) = '\0';
--- 366,374 ----
                   errmsg("invalid destination encoding name \"%s\"",
                          dest_encoding_name)));

!     /* make sure that source string is valid and null terminated */
      len = VARSIZE(string) - VARHDRSZ;
+     pg_verify_mbstr(src_encoding,VARDATA(string),len,false);
      str = palloc(len + 1);
      memcpy(str, VARDATA(string), len);
      *(str + len) = '\0';
***************
*** 354,361 ****
          elog(ERROR, "encoding conversion failed");

      /*
!      * build text data type structure. we cannot use textin() here, since
!      * textin assumes that input string encoding is same as database encoding.
       */
      len = strlen((char *) result) + VARHDRSZ;
      retval = palloc(len);
--- 378,384 ----
          elog(ERROR, "encoding conversion failed");

      /*
!      * build bytea data type structure.
       */
      len = strlen((char *) result) + VARHDRSZ;
      retval = palloc(len);
***************
*** 369,375 ****
      /* free memory if allocated by the toaster */
      PG_FREE_IF_COPY(string, 0);

!     PG_RETURN_TEXT_P(retval);
  }

  /*
--- 392,398 ----
      /* free memory if allocated by the toaster */
      PG_FREE_IF_COPY(string, 0);

!     PG_RETURN_BYTEA_P(retval);
  }

  /*
Index: src/include/catalog/catversion.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/catversion.h,v
retrieving revision 1.423
diff -c -r1.423 catversion.h
*** src/include/catalog/catversion.h    5 Sep 2007 18:10:48 -0000    1.423
--- src/include/catalog/catversion.h    16 Sep 2007 01:43:25 -0000
***************
*** 53,58 ****
   */

  /*                            yyyymmddN */
! #define CATALOG_VERSION_NO    200709042

  #endif
--- 53,58 ----
   */

  /*                            yyyymmddN */
! #define CATALOG_VERSION_NO    200709151

  #endif
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.468
diff -c -r1.468 pg_proc.h
*** src/include/catalog/pg_proc.h    4 Sep 2007 16:41:42 -0000    1.468
--- src/include/catalog/pg_proc.h    16 Sep 2007 01:43:25 -0000
***************
*** 2232,2244 ****
  DATA(insert OID = 810 (  pg_client_encoding    PGNSP PGUID 12 1 0 f f t f s 0 19 "" _null_ _null_ _null_
pg_client_encoding- _null_ _null_ )); 
  DESCR("encoding name of current database");

! DATA(insert OID = 1717 (  convert           PGNSP PGUID 12 1 0 f f t f s 2 25 "25 19" _null_ _null_ _null_ pg_convert
-_null_ _null_ )); 
  DESCR("convert string with specified destination encoding name");

! DATA(insert OID = 1813 (  convert           PGNSP PGUID 12 1 0 f f t f s 3 25 "25 19 19" _null_ _null_ _null_
pg_convert2- _null_ _null_ )); 
  DESCR("convert string with specified encoding names");

! DATA(insert OID = 1619 (  convert_using    PGNSP PGUID 12 1 0 f f t f s 2 25 "25 25" _null_ _null_ _null_
pg_convert_using- _null_ _null_ )); 
  DESCR("convert string with specified conversion name");

  DATA(insert OID = 1264 (  pg_char_to_encoding       PGNSP PGUID 12 1 0 f f t f s 1 23 "19" _null_ _null_ _null_
PG_char_to_encoding- _null_ _null_ )); 
--- 2232,2247 ----
  DATA(insert OID = 810 (  pg_client_encoding    PGNSP PGUID 12 1 0 f f t f s 0 19 "" _null_ _null_ _null_
pg_client_encoding- _null_ _null_ )); 
  DESCR("encoding name of current database");

! DATA(insert OID = 1717 (  convert           PGNSP PGUID 12 1 0 f f t f s 2 17 "25 19" _null_ _null_ _null_
pg_convert_from_db- _null_ _null_ )); 
  DESCR("convert string with specified destination encoding name");

! DATA(insert OID = 1713 (  convert           PGNSP PGUID 12 1 0 f f t f s 2 25 "17 19" _null_ _null_ _null_
pg_convert_to_db- _null_ _null_ )); 
! DESCR("convert string with specified source encoding name");
!
! DATA(insert OID = 1813 (  convert           PGNSP PGUID 12 1 0 f f t f s 3 17 "17 19 19" _null_ _null_ _null_
pg_convert- _null_ _null_ )); 
  DESCR("convert string with specified encoding names");

! DATA(insert OID = 1619 (  convert_using    PGNSP PGUID 12 1 0 f f t f s 2 17 "25 25" _null_ _null_ _null_
pg_convert_using- _null_ _null_ )); 
  DESCR("convert string with specified conversion name");

  DATA(insert OID = 1264 (  pg_char_to_encoding       PGNSP PGUID 12 1 0 f f t f s 1 23 "19" _null_ _null_ _null_
PG_char_to_encoding- _null_ _null_ )); 
Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.302
diff -c -r1.302 builtins.h
*** src/include/utils/builtins.h    4 Sep 2007 16:41:43 -0000    1.302
--- src/include/utils/builtins.h    16 Sep 2007 01:43:26 -0000
***************
*** 902,908 ****
  extern Datum PG_character_set_name(PG_FUNCTION_ARGS);
  extern Datum PG_character_set_id(PG_FUNCTION_ARGS);
  extern Datum pg_convert(PG_FUNCTION_ARGS);
! extern Datum pg_convert2(PG_FUNCTION_ARGS);

  /* format_type.c */
  extern Datum format_type(PG_FUNCTION_ARGS);
--- 902,909 ----
  extern Datum PG_character_set_name(PG_FUNCTION_ARGS);
  extern Datum PG_character_set_id(PG_FUNCTION_ARGS);
  extern Datum pg_convert(PG_FUNCTION_ARGS);
! extern Datum pg_convert_to_db(PG_FUNCTION_ARGS);
! extern Datum pg_convert_from_db(PG_FUNCTION_ARGS);

  /* format_type.c */
  extern Datum format_type(PG_FUNCTION_ARGS);

pgsql-patches by date:

Previous
From: Andrew Dunstan
Date:
Subject: Re: [HACKERS] invalidly encoded strings
Next
From: Stefan Kaltenbrunner
Date:
Subject: Re: PL/TCL Patch to prevent postgres from becoming multithreaded