Re: [HACKERS] invalidly encoded strings - Mailing list pgsql-patches
From | Andrew Dunstan |
---|---|
Subject | Re: [HACKERS] invalidly encoded strings |
Date | |
Msg-id | 46EC8F83.1030503@dunslane.net Whole thread Raw |
In response to | Re: [HACKERS] invalidly encoded strings (Andrew Dunstan <andrew@dunslane.net>) |
List | pgsql-patches |
and this time the patch is attached Andrew Dunstan wrote: > > > Tom Lane wrote: >> What I think we'd need to have a complete solution is >> >> convert(text, name) returns bytea >> -- convert from DB encoding to arbitrary encoding >> >> convert(bytea, name, name) returns bytea >> -- convert between any two encodings >> >> convert(bytea, name) returns text >> -- convert from arbitrary encoding to DB encoding >> >> The second and third would need to do a verify step before >> converting, of course. >> >> >> > > Here's a patch that implements the above. It actually does the verify > step for all three cases - if that bothers people I can remove it at > the cost of a little code complexity. > > It also fixes the "convert ... using ..." case in a similar way (makes > it return a bytea). > > On reflection I think we also need to provide length(bytea, name) as > has been suggested, so we can check the length in the foreign encoding > of a bytea we have converted this way. That shouldn't be too difficult > to add. > > cheers > > andrew > Index: src/backend/catalog/pg_conversion.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/catalog/pg_conversion.c,v retrieving revision 1.36 diff -c -r1.36 pg_conversion.c *** src/backend/catalog/pg_conversion.c 27 Feb 2007 23:48:07 -0000 1.36 --- src/backend/catalog/pg_conversion.c 16 Sep 2007 01:43:24 -0000 *************** *** 282,288 **** * CONVERT <left paren> <character value expression> * USING <form-of-use conversion name> <right paren> * ! * TEXT convert_using(TEXT string, TEXT conversion_name) */ Datum pg_convert_using(PG_FUNCTION_ARGS) --- 282,291 ---- * CONVERT <left paren> <character value expression> * USING <form-of-use conversion name> <right paren> * ! * BYTEA convert_using(TEXT string, TEXT conversion_name) ! * ! * bytea is returned so we don't give a value that is ! * not valid in the database encoding. */ Datum pg_convert_using(PG_FUNCTION_ARGS) *************** *** 344,348 **** pfree(result); pfree(str); ! PG_RETURN_TEXT_P(retval); } --- 347,351 ---- pfree(result); pfree(str); ! PG_RETURN_BYTEA_P(retval); } Index: src/backend/utils/mb/mbutils.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v retrieving revision 1.63 diff -c -r1.63 mbutils.c *** src/backend/utils/mb/mbutils.c 28 May 2007 16:43:24 -0000 1.63 --- src/backend/utils/mb/mbutils.c 16 Sep 2007 01:43:25 -0000 *************** *** 292,303 **** } /* ! * Convert string using encoding_nanme. We assume that string's ! * encoding is same as DB encoding. * ! * TEXT convert(TEXT string, NAME encoding_name) */ Datum ! pg_convert(PG_FUNCTION_ARGS) { Datum string = PG_GETARG_DATUM(0); Datum dest_encoding_name = PG_GETARG_DATUM(1); --- 292,303 ---- } /* ! * Convert string using encoding_name. The source ! * encoding is the DB encoding. * ! * BYTEA convert(TEXT string, NAME encoding_name) */ Datum ! pg_convert_from_db(PG_FUNCTION_ARGS) { Datum string = PG_GETARG_DATUM(0); Datum dest_encoding_name = PG_GETARG_DATUM(1); *************** *** 306,312 **** Datum result; result = DirectFunctionCall3( ! pg_convert2, string, src_encoding_name, dest_encoding_name); /* free memory allocated by namein */ pfree((void *) src_encoding_name); --- 306,335 ---- Datum result; result = DirectFunctionCall3( ! pg_convert, string, src_encoding_name, dest_encoding_name); ! ! /* free memory allocated by namein */ ! pfree((void *) src_encoding_name); ! ! PG_RETURN_BYTEA_P(result); ! } ! ! /* ! * Convert string using encoding_name. The destination ! * encoding is the DB encoding. ! * ! * TEXT convert(BYTEA string, NAME encoding_name) */ ! Datum ! pg_convert_to_db(PG_FUNCTION_ARGS) ! { ! Datum string = PG_GETARG_DATUM(0); ! Datum src_encoding_name = PG_GETARG_DATUM(1); ! Datum dest_encoding_name = DirectFunctionCall1( ! namein, CStringGetDatum(DatabaseEncoding->name)); ! Datum result; ! ! result = DirectFunctionCall3( ! pg_convert, string, src_encoding_name, dest_encoding_name); /* free memory allocated by namein */ pfree((void *) src_encoding_name); *************** *** 315,334 **** } /* ! * Convert string using encoding_name. * ! * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name) */ Datum ! pg_convert2(PG_FUNCTION_ARGS) { ! text *string = PG_GETARG_TEXT_P(0); char *src_encoding_name = NameStr(*PG_GETARG_NAME(1)); int src_encoding = pg_char_to_encoding(src_encoding_name); char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2)); int dest_encoding = pg_char_to_encoding(dest_encoding_name); unsigned char *result; ! text *retval; unsigned char *str; int len; --- 338,357 ---- } /* ! * Convert string using encoding_names. * ! * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name) */ Datum ! pg_convert(PG_FUNCTION_ARGS) { ! bytea *string = PG_GETARG_TEXT_P(0); char *src_encoding_name = NameStr(*PG_GETARG_NAME(1)); int src_encoding = pg_char_to_encoding(src_encoding_name); char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2)); int dest_encoding = pg_char_to_encoding(dest_encoding_name); unsigned char *result; ! bytea *retval; unsigned char *str; int len; *************** *** 343,350 **** errmsg("invalid destination encoding name \"%s\"", dest_encoding_name))); ! /* make sure that source string is null terminated */ len = VARSIZE(string) - VARHDRSZ; str = palloc(len + 1); memcpy(str, VARDATA(string), len); *(str + len) = '\0'; --- 366,374 ---- errmsg("invalid destination encoding name \"%s\"", dest_encoding_name))); ! /* make sure that source string is valid and null terminated */ len = VARSIZE(string) - VARHDRSZ; + pg_verify_mbstr(src_encoding,VARDATA(string),len,false); str = palloc(len + 1); memcpy(str, VARDATA(string), len); *(str + len) = '\0'; *************** *** 354,361 **** elog(ERROR, "encoding conversion failed"); /* ! * build text data type structure. we cannot use textin() here, since ! * textin assumes that input string encoding is same as database encoding. */ len = strlen((char *) result) + VARHDRSZ; retval = palloc(len); --- 378,384 ---- elog(ERROR, "encoding conversion failed"); /* ! * build bytea data type structure. */ len = strlen((char *) result) + VARHDRSZ; retval = palloc(len); *************** *** 369,375 **** /* free memory if allocated by the toaster */ PG_FREE_IF_COPY(string, 0); ! PG_RETURN_TEXT_P(retval); } /* --- 392,398 ---- /* free memory if allocated by the toaster */ PG_FREE_IF_COPY(string, 0); ! PG_RETURN_BYTEA_P(retval); } /* Index: src/include/catalog/catversion.h =================================================================== RCS file: /cvsroot/pgsql/src/include/catalog/catversion.h,v retrieving revision 1.423 diff -c -r1.423 catversion.h *** src/include/catalog/catversion.h 5 Sep 2007 18:10:48 -0000 1.423 --- src/include/catalog/catversion.h 16 Sep 2007 01:43:25 -0000 *************** *** 53,58 **** */ /* yyyymmddN */ ! #define CATALOG_VERSION_NO 200709042 #endif --- 53,58 ---- */ /* yyyymmddN */ ! #define CATALOG_VERSION_NO 200709151 #endif Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v retrieving revision 1.468 diff -c -r1.468 pg_proc.h *** src/include/catalog/pg_proc.h 4 Sep 2007 16:41:42 -0000 1.468 --- src/include/catalog/pg_proc.h 16 Sep 2007 01:43:25 -0000 *************** *** 2232,2244 **** DATA(insert OID = 810 ( pg_client_encoding PGNSP PGUID 12 1 0 f f t f s 0 19 "" _null_ _null_ _null_ pg_client_encoding- _null_ _null_ )); DESCR("encoding name of current database"); ! DATA(insert OID = 1717 ( convert PGNSP PGUID 12 1 0 f f t f s 2 25 "25 19" _null_ _null_ _null_ pg_convert -_null_ _null_ )); DESCR("convert string with specified destination encoding name"); ! DATA(insert OID = 1813 ( convert PGNSP PGUID 12 1 0 f f t f s 3 25 "25 19 19" _null_ _null_ _null_ pg_convert2- _null_ _null_ )); DESCR("convert string with specified encoding names"); ! DATA(insert OID = 1619 ( convert_using PGNSP PGUID 12 1 0 f f t f s 2 25 "25 25" _null_ _null_ _null_ pg_convert_using- _null_ _null_ )); DESCR("convert string with specified conversion name"); DATA(insert OID = 1264 ( pg_char_to_encoding PGNSP PGUID 12 1 0 f f t f s 1 23 "19" _null_ _null_ _null_ PG_char_to_encoding- _null_ _null_ )); --- 2232,2247 ---- DATA(insert OID = 810 ( pg_client_encoding PGNSP PGUID 12 1 0 f f t f s 0 19 "" _null_ _null_ _null_ pg_client_encoding- _null_ _null_ )); DESCR("encoding name of current database"); ! DATA(insert OID = 1717 ( convert PGNSP PGUID 12 1 0 f f t f s 2 17 "25 19" _null_ _null_ _null_ pg_convert_from_db- _null_ _null_ )); DESCR("convert string with specified destination encoding name"); ! DATA(insert OID = 1713 ( convert PGNSP PGUID 12 1 0 f f t f s 2 25 "17 19" _null_ _null_ _null_ pg_convert_to_db- _null_ _null_ )); ! DESCR("convert string with specified source encoding name"); ! ! DATA(insert OID = 1813 ( convert PGNSP PGUID 12 1 0 f f t f s 3 17 "17 19 19" _null_ _null_ _null_ pg_convert- _null_ _null_ )); DESCR("convert string with specified encoding names"); ! DATA(insert OID = 1619 ( convert_using PGNSP PGUID 12 1 0 f f t f s 2 17 "25 25" _null_ _null_ _null_ pg_convert_using- _null_ _null_ )); DESCR("convert string with specified conversion name"); DATA(insert OID = 1264 ( pg_char_to_encoding PGNSP PGUID 12 1 0 f f t f s 1 23 "19" _null_ _null_ _null_ PG_char_to_encoding- _null_ _null_ )); Index: src/include/utils/builtins.h =================================================================== RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v retrieving revision 1.302 diff -c -r1.302 builtins.h *** src/include/utils/builtins.h 4 Sep 2007 16:41:43 -0000 1.302 --- src/include/utils/builtins.h 16 Sep 2007 01:43:26 -0000 *************** *** 902,908 **** extern Datum PG_character_set_name(PG_FUNCTION_ARGS); extern Datum PG_character_set_id(PG_FUNCTION_ARGS); extern Datum pg_convert(PG_FUNCTION_ARGS); ! extern Datum pg_convert2(PG_FUNCTION_ARGS); /* format_type.c */ extern Datum format_type(PG_FUNCTION_ARGS); --- 902,909 ---- extern Datum PG_character_set_name(PG_FUNCTION_ARGS); extern Datum PG_character_set_id(PG_FUNCTION_ARGS); extern Datum pg_convert(PG_FUNCTION_ARGS); ! extern Datum pg_convert_to_db(PG_FUNCTION_ARGS); ! extern Datum pg_convert_from_db(PG_FUNCTION_ARGS); /* format_type.c */ extern Datum format_type(PG_FUNCTION_ARGS);
pgsql-patches by date: