From d9f019e5b84908903c95a6b7a5d5ed38933dc43a Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 1 Feb 2021 17:28:31 +0200 Subject: [PATCH v3 3/5] Add tests for the new noError variants of built-in conversions. TODO: When this is pushed, it probably makes more sense to add the whole regression as one commit after the code changes. But this is perhaps useful to keep as a separate commit for now. --- src/backend/utils/mb/mbutils.c | 55 ++ src/include/mb/pg_wchar.h | 6 + src/test/regress/expected/conversion.out | 575 +++++++++--------- .../regress/input/create_function_1.source | 4 + .../regress/output/create_function_1.source | 3 + src/test/regress/regress.c | 121 ++++ src/test/regress/sql/conversion.sql | 17 +- 7 files changed, 482 insertions(+), 299 deletions(-) diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 65753860e35..3e106027d75 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -436,6 +436,61 @@ pg_do_encoding_conversion(unsigned char *src, int len, return result; } +/* + * Convert src string to another encoding. + * + * This function has a different API than the other conversion functions. + * The caller should've looked up the conversion function using + * FindDefaultConversionProc(). Unlike the other functions, the converted + * result is not palloc'd. It is written to a caller-supplied buffer instead. + * + * src_encoding - encoding to convert from + * dest_encoding - encoding to convert to + * src, srclen - input buffer and its length in bytes + * dest, destlen - destination buffer and its size in bytes + * + * The output is null-terminated. + * + * If destlen < srclen * MAX_CONVERSION_LENGTH + 1, the converted output + * wouldn't necessarily fit in the output buffer, and the function will not + * convert the whole input. + * + * TODO: It would be nice to also return the number of bytes written to the + * caller, to avoid a call to strlen(). + */ +int +pg_do_encoding_conversion_buf(Oid proc, + int src_encoding, + int dest_encoding, + unsigned char *src, int srclen, + unsigned char *dest, int destlen, + bool noError) +{ + Datum result; + + /* + * If the destination buffer is not large enough to hold the + * result in the worst case, limit the input size passed to + * the conversion function. + * + * TODO: It would perhaps be more efficient to pass the destination + * buffer size to the conversion function, so that if the conversion + * expands less than the worst case, it could continue to fill up the + * whole buffer. + */ + if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH)) + srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH); + + result = OidFunctionCall6(proc, + Int32GetDatum(src_encoding), + Int32GetDatum(dest_encoding), + CStringGetDatum(src), + CStringGetDatum(dest), + Int32GetDatum(srclen), + BoolGetDatum(noError)); + return DatumGetInt32(result); +} + /* * Convert string to encoding encoding_name. The source * encoding is the DB encoding. diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 346a41a1f3d..9a22a6461d6 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -616,6 +616,12 @@ extern int pg_bind_textdomain_codeset(const char *domainname); extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding); +extern int pg_do_encoding_conversion_buf(Oid proc, + int src_encoding, + int dest_encoding, + unsigned char *src, int srclen, + unsigned char *dst, int dstlen, + bool noError); extern char *pg_client_to_server(const char *s, int len); extern char *pg_server_to_client(const char *s, int len); diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out index 38f8cef0f38..571815683e9 100644 --- a/src/test/regress/expected/conversion.out +++ b/src/test/regress/expected/conversion.out @@ -53,24 +53,19 @@ $$ declare validlen int; begin - -- Try to perform the conversion. If it fails, catch the error and return - -- it to the caller. begin - select * into result from convert(input, src_encoding, dst_encoding); - validlen = length(input); + select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false); errorat = NULL; error := NULL; exception when others then - result = NULL; - errorat = NULL; + select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true); + errorat = substr(input, validlen + 1); error := sqlerrm; end; return; end; $$; --- --- UTF-8 --- +-- Test verification functions CREATE TABLE utf8_inputs (inbytes bytea, description text); insert into utf8_inputs values ('\x666f6f', 'valid, pure ASCII'), @@ -87,123 +82,123 @@ insert into utf8_inputs values ('\x666f6fe8b1', 'incomplete character at end'); -- Test UTF-8 verification select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs; - description | result | errorat | error -------------------------------------------------------+----------------------+---------+----------------------------------------------------------- - valid, pure ASCII | \x666f6f | | - valid, extra latin chars | \xc3a4c3b6 | | - valid, cyrillic | \xd184d0bed0be | | - valid, kanji/Chinese | \x666f6fe8b1a1 | | - valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | | - only first half of combined char in EUC_JIS_2004 | \xe382ab | | - valid, Hangul, Korean | \xecbd94eb81bceba6ac | | - valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | | - invalid byte sequence | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff - invalid, NUL byte | | | invalid byte sequence for encoding "UTF8": 0x00 - invalid, NUL byte | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 - incomplete character at end | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 + description | result | errorat | error +------------------------------------------------------+----------------------+--------------+----------------------------------------------------------- + valid, pure ASCII | \x666f6f | | + valid, extra latin chars | \xc3a4c3b6 | | + valid, cyrillic | \xd184d0bed0be | | + valid, kanji/Chinese | \x666f6fe8b1a1 | | + valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | | + only first half of combined char in EUC_JIS_2004 | \xe382ab | | + valid, Hangul, Korean | \xecbd94eb81bceba6ac | | + valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | | + invalid byte sequence | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff + invalid, NUL byte | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00 + invalid, NUL byte | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 + incomplete character at end | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 (12 rows) -- Test conversions from UTF-8 select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs; - description | inbytes | result | errorat | error -------------------------------------------------------+----------------------+----------------+---------+------------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid, extra latin chars | \xc3a4c3b6 | \xa9daa9ec | | - valid, cyrillic | \xd184d0bed0be | \xa7e6a7e0a7e0 | | - valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6fbedd | | - valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \xa5f7 | | - only first half of combined char in EUC_JIS_2004 | \xe382ab | \xa5ab | | - valid, Hangul, Korean | \xecbd94eb81bceba6ac | | | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004" - valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | | | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004" - invalid byte sequence | \x66e8b1ff6f6f | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff - invalid, NUL byte | \x66006f | | | invalid byte sequence for encoding "UTF8": 0x00 - invalid, NUL byte | \x666f6fe8b100 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 - incomplete character at end | \x666f6fe8b1 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 + description | inbytes | result | errorat | error +------------------------------------------------------+----------------------+----------------+----------------------+------------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid, extra latin chars | \xc3a4c3b6 | \xa9daa9ec | | + valid, cyrillic | \xd184d0bed0be | \xa7e6a7e0a7e0 | | + valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6fbedd | | + valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \xa5f7 | | + only first half of combined char in EUC_JIS_2004 | \xe382ab | \xa5ab | | + valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004" + valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004" + invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff + invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00 + invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 + incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 (12 rows) select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs; - description | inbytes | result | errorat | error -------------------------------------------------------+----------------------+----------+---------+------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | | - valid, cyrillic | \xd184d0bed0be | | | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN1" - valid, kanji/Chinese | \x666f6fe8b1a1 | | | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN1" - valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | | | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1" - only first half of combined char in EUC_JIS_2004 | \xe382ab | | | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1" - valid, Hangul, Korean | \xecbd94eb81bceba6ac | | | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN1" - valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | | | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN1" - invalid byte sequence | \x66e8b1ff6f6f | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff - invalid, NUL byte | \x66006f | | | invalid byte sequence for encoding "UTF8": 0x00 - invalid, NUL byte | \x666f6fe8b100 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 - incomplete character at end | \x666f6fe8b1 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 + description | inbytes | result | errorat | error +------------------------------------------------------+----------------------+----------+----------------------+------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | | + valid, cyrillic | \xd184d0bed0be | \x | \xd184d0bed0be | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN1" + valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN1" + valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1" + only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1" + valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN1" + valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN1" + invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff + invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00 + invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 + incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 (12 rows) select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs; - description | inbytes | result | errorat | error -------------------------------------------------------+----------------------+----------+---------+------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | | - valid, cyrillic | \xd184d0bed0be | | | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN2" - valid, kanji/Chinese | \x666f6fe8b1a1 | | | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN2" - valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | | | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2" - only first half of combined char in EUC_JIS_2004 | \xe382ab | | | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2" - valid, Hangul, Korean | \xecbd94eb81bceba6ac | | | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN2" - valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | | | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN2" - invalid byte sequence | \x66e8b1ff6f6f | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff - invalid, NUL byte | \x66006f | | | invalid byte sequence for encoding "UTF8": 0x00 - invalid, NUL byte | \x666f6fe8b100 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 - incomplete character at end | \x666f6fe8b1 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 + description | inbytes | result | errorat | error +------------------------------------------------------+----------------------+----------+----------------------+------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | | + valid, cyrillic | \xd184d0bed0be | \x | \xd184d0bed0be | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN2" + valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN2" + valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2" + only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2" + valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN2" + valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN2" + invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff + invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00 + invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 + incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 (12 rows) select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs; - description | inbytes | result | errorat | error -------------------------------------------------------+----------------------+----------+---------+------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | | - valid, cyrillic | \xd184d0bed0be | | | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN5" - valid, kanji/Chinese | \x666f6fe8b1a1 | | | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN5" - valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | | | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5" - only first half of combined char in EUC_JIS_2004 | \xe382ab | | | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5" - valid, Hangul, Korean | \xecbd94eb81bceba6ac | | | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN5" - valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | | | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN5" - invalid byte sequence | \x66e8b1ff6f6f | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff - invalid, NUL byte | \x66006f | | | invalid byte sequence for encoding "UTF8": 0x00 - invalid, NUL byte | \x666f6fe8b100 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 - incomplete character at end | \x666f6fe8b1 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 + description | inbytes | result | errorat | error +------------------------------------------------------+----------------------+----------+----------------------+------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | | + valid, cyrillic | \xd184d0bed0be | \x | \xd184d0bed0be | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN5" + valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN5" + valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5" + only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5" + valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN5" + valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN5" + invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff + invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00 + invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 + incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 (12 rows) select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs; - description | inbytes | result | errorat | error -------------------------------------------------------+----------------------+----------+---------+------------------------------------------------------------------------------------------------------ - valid, pure ASCII | \x666f6f | \x666f6f | | - valid, extra latin chars | \xc3a4c3b6 | | | character with byte sequence 0xc3 0xa4 in encoding "UTF8" has no equivalent in encoding "KOI8R" - valid, cyrillic | \xd184d0bed0be | \xc6cfcf | | - valid, kanji/Chinese | \x666f6fe8b1a1 | | | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "KOI8R" - valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | | | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R" - only first half of combined char in EUC_JIS_2004 | \xe382ab | | | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R" - valid, Hangul, Korean | \xecbd94eb81bceba6ac | | | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "KOI8R" - valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | | | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "KOI8R" - invalid byte sequence | \x66e8b1ff6f6f | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff - invalid, NUL byte | \x66006f | | | invalid byte sequence for encoding "UTF8": 0x00 - invalid, NUL byte | \x666f6fe8b100 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 - incomplete character at end | \x666f6fe8b1 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 + description | inbytes | result | errorat | error +------------------------------------------------------+----------------------+----------+----------------------+------------------------------------------------------------------------------------------------------ + valid, pure ASCII | \x666f6f | \x666f6f | | + valid, extra latin chars | \xc3a4c3b6 | \x | \xc3a4c3b6 | character with byte sequence 0xc3 0xa4 in encoding "UTF8" has no equivalent in encoding "KOI8R" + valid, cyrillic | \xd184d0bed0be | \xc6cfcf | | + valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "KOI8R" + valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R" + only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R" + valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "KOI8R" + valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "KOI8R" + invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff + invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00 + invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 + incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 (12 rows) select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs; - description | inbytes | result | errorat | error -------------------------------------------------------+----------------------+----------------------------+---------+----------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid, extra latin chars | \xc3a4c3b6 | \x81308a3181308b32 | | - valid, cyrillic | \xd184d0bed0be | \xa7e6a7e0a7e0 | | - valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6fcff3 | | - valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \xa5ab8139a732 | | - only first half of combined char in EUC_JIS_2004 | \xe382ab | \xa5ab | | - valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x8334e5398238c4338330b335 | | - valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f84309c38 | | - invalid byte sequence | \x66e8b1ff6f6f | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff - invalid, NUL byte | \x66006f | | | invalid byte sequence for encoding "UTF8": 0x00 - invalid, NUL byte | \x666f6fe8b100 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 - incomplete character at end | \x666f6fe8b1 | | | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 + description | inbytes | result | errorat | error +------------------------------------------------------+----------------------+----------------------------+--------------+----------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid, extra latin chars | \xc3a4c3b6 | \x81308a3181308b32 | | + valid, cyrillic | \xd184d0bed0be | \xa7e6a7e0a7e0 | | + valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6fcff3 | | + valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \xa5ab8139a732 | | + only first half of combined char in EUC_JIS_2004 | \xe382ab | \xa5ab | | + valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x8334e5398238c4338330b335 | | + valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f84309c38 | | + invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff + invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00 + invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00 + incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 (12 rows) -- @@ -221,30 +216,30 @@ insert into euc_jis_2004_inputs values ('\xbe04', 'invalid byte sequence'); -- Test EUC_JIS_2004 verification select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs; - description | inbytes | result | errorat | error ----------------------------------------+----------------+--------------+---------+-------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6fbedd | \x666f6fbedd | | - valid, translates to two UTF-8 chars | \xa5f7 | \xa5f7 | | - incomplete char | \xbeddbe | | | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe - invalid, NUL byte | \x666f6f00bedd | | | invalid byte sequence for encoding "EUC_JIS_2004": 0x00 - invalid, NUL byte | \x666f6fbe00dd | | | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00 - invalid, NUL byte | \x666f6fbedd00 | | | invalid byte sequence for encoding "EUC_JIS_2004": 0x00 - invalid byte sequence | \xbe04 | | | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04 + description | inbytes | result | errorat | error +---------------------------------------+----------------+--------------+----------+-------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6fbedd | \x666f6fbedd | | + valid, translates to two UTF-8 chars | \xa5f7 | \xa5f7 | | + incomplete char | \xbeddbe | \xbedd | \xbe | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe + invalid, NUL byte | \x666f6f00bedd | \x666f6f | \x00bedd | invalid byte sequence for encoding "EUC_JIS_2004": 0x00 + invalid, NUL byte | \x666f6fbe00dd | \x666f6f | \xbe00dd | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00 + invalid, NUL byte | \x666f6fbedd00 | \x666f6fbedd | \x00 | invalid byte sequence for encoding "EUC_JIS_2004": 0x00 + invalid byte sequence | \xbe04 | \x | \xbe04 | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04 (8 rows) -- Test conversions from EUC_JIS_2004 select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs; - description | inbytes | result | errorat | error ----------------------------------------+----------------+----------------+---------+-------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6fbedd | \x666f6fe8b1a1 | | - valid, translates to two UTF-8 chars | \xa5f7 | \xe382abe3829a | | - incomplete char | \xbeddbe | | | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe - invalid, NUL byte | \x666f6f00bedd | | | invalid byte sequence for encoding "EUC_JIS_2004": 0x00 - invalid, NUL byte | \x666f6fbe00dd | | | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00 - invalid, NUL byte | \x666f6fbedd00 | | | invalid byte sequence for encoding "EUC_JIS_2004": 0x00 - invalid byte sequence | \xbe04 | | | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04 + description | inbytes | result | errorat | error +---------------------------------------+----------------+----------------+----------+-------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6fbedd | \x666f6fe8b1a1 | | + valid, translates to two UTF-8 chars | \xa5f7 | \xe382abe3829a | | + incomplete char | \xbeddbe | \xe8b1a1 | \xbe | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe + invalid, NUL byte | \x666f6f00bedd | \x666f6f | \x00bedd | invalid byte sequence for encoding "EUC_JIS_2004": 0x00 + invalid, NUL byte | \x666f6fbe00dd | \x666f6f | \xbe00dd | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00 + invalid, NUL byte | \x666f6fbedd00 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "EUC_JIS_2004": 0x00 + invalid byte sequence | \xbe04 | \x | \xbe04 | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04 (8 rows) -- @@ -263,46 +258,46 @@ insert into shiftjis2004_inputs values ('\x666f6f8fdb00', 'invalid, NUL byte'); -- Test SHIFT-JIS-2004 verification select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs; - description | inbytes | result | errorat | error ----------------------------------------+----------------+--------------+---------+---------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6f8fdb | \x666f6f8fdb | | - valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6f81c0 | | - valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6f82f5 | | - incomplete char | \x666f6f8fdb8f | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f - incomplete char, followed by newline | \x666f6f820a | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a - invalid, NUL byte | \x666f6f008fdb | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 - invalid, NUL byte | \x666f6f8f00db | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00 - invalid, NUL byte | \x666f6f8fdb00 | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 + description | inbytes | result | errorat | error +---------------------------------------+----------------+--------------+----------+---------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6f8fdb | \x666f6f8fdb | | + valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6f81c0 | | + valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6f82f5 | | + incomplete char | \x666f6f8fdb8f | \x666f6f8fdb | \x8f | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f + incomplete char, followed by newline | \x666f6f820a | \x666f6f | \x820a | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a + invalid, NUL byte | \x666f6f008fdb | \x666f6f | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 + invalid, NUL byte | \x666f6f8f00db | \x666f6f | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00 + invalid, NUL byte | \x666f6f8fdb00 | \x666f6f8fdb | \x00 | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 (9 rows) -- Test conversions from SHIFT-JIS-2004 select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs; - description | inbytes | result | errorat | error ----------------------------------------+----------------+----------------------+---------+---------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6f8fdb | \x666f6fe8b1a1 | | - valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6fe28a84 | | - valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6fe3818be3829a | | - incomplete char | \x666f6f8fdb8f | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f - incomplete char, followed by newline | \x666f6f820a | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a - invalid, NUL byte | \x666f6f008fdb | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 - invalid, NUL byte | \x666f6f8f00db | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00 - invalid, NUL byte | \x666f6f8fdb00 | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 + description | inbytes | result | errorat | error +---------------------------------------+----------------+----------------------+----------+---------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6f8fdb | \x666f6fe8b1a1 | | + valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6fe28a84 | | + valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6fe3818be3829a | | + incomplete char | \x666f6f8fdb8f | \x666f6fe8b1a1 | \x8f | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f + incomplete char, followed by newline | \x666f6f820a | \x666f6f | \x820a | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a + invalid, NUL byte | \x666f6f008fdb | \x666f6f | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 + invalid, NUL byte | \x666f6f8f00db | \x666f6f | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00 + invalid, NUL byte | \x666f6f8fdb00 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 (9 rows) select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs; - description | inbytes | result | errorat | error ----------------------------------------+----------------+--------------+---------+---------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6f8fdb | \x666f6fbedd | | - valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6fa2c2 | | - valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6fa4f7 | | - incomplete char | \x666f6f8fdb8f | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f - incomplete char, followed by newline | \x666f6f820a | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a - invalid, NUL byte | \x666f6f008fdb | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 - invalid, NUL byte | \x666f6f8f00db | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00 - invalid, NUL byte | \x666f6f8fdb00 | | | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 + description | inbytes | result | errorat | error +---------------------------------------+----------------+--------------+----------+---------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6f8fdb | \x666f6fbedd | | + valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6fa2c2 | | + valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6fa4f7 | | + incomplete char | \x666f6f8fdb8f | \x666f6fbedd | \x8f | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f + incomplete char, followed by newline | \x666f6f820a | \x666f6f | \x820a | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a + invalid, NUL byte | \x666f6f008fdb | \x666f6f | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 + invalid, NUL byte | \x666f6f8f00db | \x666f6f | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00 + invalid, NUL byte | \x666f6f8fdb00 | \x666f6fbedd | \x00 | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00 (9 rows) -- @@ -320,30 +315,30 @@ insert into gb18030_inputs values ('\x666f6f84309c0038', 'invalid, NUL byte'); -- Test GB18030 verification select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs; - description | inbytes | result | errorat | error -------------------------------------------------+--------------------+------------------+---------+------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6fcff3 | \x666f6fcff3 | | - valid, no translation to UTF-8 | \x666f6f8431a530 | \x666f6f8431a530 | | - valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6f84309c38 | | - incomplete char | \x666f6f84309c | | | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c - incomplete char, followed by newline | \x666f6f84309c0a | | | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a - invalid, NUL byte | \x666f6f84309c3800 | | | invalid byte sequence for encoding "GB18030": 0x00 - invalid, NUL byte | \x666f6f84309c0038 | | | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00 + description | inbytes | result | errorat | error +------------------------------------------------+--------------------+------------------+--------------+------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6fcff3 | \x666f6fcff3 | | + valid, no translation to UTF-8 | \x666f6f8431a530 | \x666f6f8431a530 | | + valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6f84309c38 | | + incomplete char | \x666f6f84309c | \x666f6f | \x84309c | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c + incomplete char, followed by newline | \x666f6f84309c0a | \x666f6f | \x84309c0a | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a + invalid, NUL byte | \x666f6f84309c3800 | \x666f6f84309c38 | \x00 | invalid byte sequence for encoding "GB18030": 0x00 + invalid, NUL byte | \x666f6f84309c0038 | \x666f6f | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00 (8 rows) -- Test conversions from GB18030 select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs; - description | inbytes | result | errorat | error -------------------------------------------------+--------------------+----------------+---------+------------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6fcff3 | \x666f6fe8b1a1 | | - valid, no translation to UTF-8 | \x666f6f8431a530 | | | character with byte sequence 0x84 0x31 0xa5 0x30 in encoding "GB18030" has no equivalent in encoding "UTF8" - valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6fefa8aa | | - incomplete char | \x666f6f84309c | | | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c - incomplete char, followed by newline | \x666f6f84309c0a | | | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a - invalid, NUL byte | \x666f6f84309c3800 | | | invalid byte sequence for encoding "GB18030": 0x00 - invalid, NUL byte | \x666f6f84309c0038 | | | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00 + description | inbytes | result | errorat | error +------------------------------------------------+--------------------+----------------+--------------+------------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6fcff3 | \x666f6fe8b1a1 | | + valid, no translation to UTF-8 | \x666f6f8431a530 | \x666f6f | \x8431a530 | character with byte sequence 0x84 0x31 0xa5 0x30 in encoding "GB18030" has no equivalent in encoding "UTF8" + valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6fefa8aa | | + incomplete char | \x666f6f84309c | \x666f6f | \x84309c | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c + incomplete char, followed by newline | \x666f6f84309c0a | \x666f6f | \x84309c0a | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a + invalid, NUL byte | \x666f6f84309c3800 | \x666f6fefa8aa | \x00 | invalid byte sequence for encoding "GB18030": 0x00 + invalid, NUL byte | \x666f6f84309c0038 | \x666f6f | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00 (8 rows) -- @@ -358,44 +353,44 @@ insert into iso8859_5_inputs values ('\xe4dede00', 'invalid, NUL byte'); -- Test ISO-88591 verification select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs; - description | inbytes | result | errorat | error --------------------+------------+----------+---------+------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \xe4dede | \xe4dede | | - invalid, NUL byte | \x00 | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 - invalid, NUL byte | \xe400dede | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 - invalid, NUL byte | \xe4dede00 | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 + description | inbytes | result | errorat | error +-------------------+------------+----------+----------+------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \xe4dede | \xe4dede | | + invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00 + invalid, NUL byte | \xe400dede | \xe4 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00 + invalid, NUL byte | \xe4dede00 | \xe4dede | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00 (5 rows) -- Test conversions from ISO-88591 select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs; - description | inbytes | result | errorat | error --------------------+------------+----------------+---------+------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \xe4dede | \xd184d0bed0be | | - invalid, NUL byte | \x00 | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 - invalid, NUL byte | \xe400dede | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 - invalid, NUL byte | \xe4dede00 | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 + description | inbytes | result | errorat | error +-------------------+------------+----------------+----------+------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \xe4dede | \xd184d0bed0be | | + invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00 + invalid, NUL byte | \xe400dede | \xd184 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00 + invalid, NUL byte | \xe4dede00 | \xd184d0bed0be | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00 (5 rows) select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs; - description | inbytes | result | errorat | error --------------------+------------+----------+---------+------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \xe4dede | \xc6cfcf | | - invalid, NUL byte | \x00 | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 - invalid, NUL byte | \xe400dede | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 - invalid, NUL byte | \xe4dede00 | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 + description | inbytes | result | errorat | error +-------------------+------------+----------+----------+------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \xe4dede | \xc6cfcf | | + invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00 + invalid, NUL byte | \xe400dede | \xc6 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00 + invalid, NUL byte | \xe4dede00 | \xc6cfcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00 (5 rows) select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs; - description | inbytes | result | errorat | error --------------------+------------+----------------+---------+------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \xe4dede | \x8bc68bcf8bcf | | - invalid, NUL byte | \x00 | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 - invalid, NUL byte | \xe400dede | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 - invalid, NUL byte | \xe4dede00 | | | invalid byte sequence for encoding "ISO_8859_5": 0x00 + description | inbytes | result | errorat | error +-------------------+------------+----------------+----------+------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \xe4dede | \x8bc68bcf8bcf | | + invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00 + invalid, NUL byte | \xe400dede | \x8bc6 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00 + invalid, NUL byte | \xe4dede00 | \x8bc68bcf8bcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00 (5 rows) -- @@ -410,37 +405,39 @@ insert into big5_inputs values ('\x666f6fb64800', 'invalid, NUL byte'); -- Test Big5 verification select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs; - description | inbytes | result | errorat | error ---------------------------------+----------------+--------------+---------+------------------------------------------------------ - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6fb648 | \x666f6fb648 | | - valid, no translation to UTF-8 | \x666f6fa27f | \x666f6fa27f | | - invalid, NUL byte | \x666f6fb60048 | | | invalid byte sequence for encoding "BIG5": 0xb6 0x00 - invalid, NUL byte | \x666f6fb64800 | | | invalid byte sequence for encoding "BIG5": 0x00 + description | inbytes | result | errorat | error +--------------------------------+----------------+--------------+----------+------------------------------------------------------ + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6fb648 | \x666f6fb648 | | + valid, no translation to UTF-8 | \x666f6fa27f | \x666f6fa27f | | + invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00 + invalid, NUL byte | \x666f6fb64800 | \x666f6fb648 | \x00 | invalid byte sequence for encoding "BIG5": 0x00 (5 rows) -- Test conversions from Big5 select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs; - description | inbytes | result | errorat | error ---------------------------------+----------------+----------------+---------+------------------------------------------------------------------------------------------------ - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6fb648 | \x666f6fe8b1a1 | | - valid, no translation to UTF-8 | \x666f6fa27f | | | character with byte sequence 0xa2 0x7f in encoding "BIG5" has no equivalent in encoding "UTF8" - invalid, NUL byte | \x666f6fb60048 | | | invalid byte sequence for encoding "BIG5": 0xb6 0x00 - invalid, NUL byte | \x666f6fb64800 | | | invalid byte sequence for encoding "BIG5": 0x00 + description | inbytes | result | errorat | error +--------------------------------+----------------+----------------+----------+------------------------------------------------------------------------------------------------ + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6fb648 | \x666f6fe8b1a1 | | + valid, no translation to UTF-8 | \x666f6fa27f | \x666f6f | \xa27f | character with byte sequence 0xa2 0x7f in encoding "BIG5" has no equivalent in encoding "UTF8" + invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00 + invalid, NUL byte | \x666f6fb64800 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "BIG5": 0x00 (5 rows) select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs; - description | inbytes | result | errorat | error ---------------------------------+----------------+----------------+---------+------------------------------------------------------ - valid, pure ASCII | \x666f6f | \x666f6f | | - valid | \x666f6fb648 | \x666f6f95e2af | | - valid, no translation to UTF-8 | \x666f6fa27f | \x666f6f95a3c1 | | - invalid, NUL byte | \x666f6fb60048 | | | invalid byte sequence for encoding "BIG5": 0xb6 0x00 - invalid, NUL byte | \x666f6fb64800 | | | invalid byte sequence for encoding "BIG5": 0x00 + description | inbytes | result | errorat | error +--------------------------------+----------------+----------------+----------+------------------------------------------------------ + valid, pure ASCII | \x666f6f | \x666f6f | | + valid | \x666f6fb648 | \x666f6f95e2af | | + valid, no translation to UTF-8 | \x666f6fa27f | \x666f6f95a3c1 | | + invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00 + invalid, NUL byte | \x666f6fb64800 | \x666f6f95e2af | \x00 | invalid byte sequence for encoding "BIG5": 0x00 (5 rows) +-- -- MULE_INTERNAL +-- CREATE TABLE mic_inputs (inbytes bytea, description text); insert into mic_inputs values ('\x666f6f', 'valid, pure ASCII'), @@ -455,78 +452,78 @@ insert into mic_inputs values ('\x8b00c68bcf8bcf', 'invalid, NUL byte'); -- Test MULE_INTERNAL verification select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs; - description | inbytes | result | errorat | error ----------------------------+------------------+----------+---------+--------------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid (in KOI8R) | \x8bc68bcf8bcf | \xc6cfcf | | - invalid,incomplete char | \x8bc68bcf8b | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b - valid (in SHIFT_JIS) | \x92bedd | | | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R" - invalid, incomplete char) | \x92be | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe - valid (in Big5) | \x666f6f95a3c1 | | | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R" - invalid, incomplete char | \x666f6f95a3 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 - invalid, NUL byte | \x9200bedd | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe - invalid, NUL byte | \x92bedd00 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x00 - invalid, NUL byte | \x8b00c68bcf8bcf | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00 + description | inbytes | result | errorat | error +---------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid (in KOI8R) | \x8bc68bcf8bcf | \xc6cfcf | | + invalid,incomplete char | \x8bc68bcf8b | \xc6cf | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b + valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R" + invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe + valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R" + invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 + invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R" + invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R" + invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R" (10 rows) -- Test conversions from MULE_INTERNAL select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs; - description | inbytes | result | errorat | error ----------------------------+------------------+----------+---------+-------------------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid (in KOI8R) | \x8bc68bcf8bcf | \xe4dede | | - invalid,incomplete char | \x8bc68bcf8b | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b - valid (in SHIFT_JIS) | \x92bedd | | | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5" - invalid, incomplete char) | \x92be | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe - valid (in Big5) | \x666f6f95a3c1 | | | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5" - invalid, incomplete char | \x666f6f95a3 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 - invalid, NUL byte | \x9200bedd | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe - invalid, NUL byte | \x92bedd00 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x00 - invalid, NUL byte | \x8b00c68bcf8bcf | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00 + description | inbytes | result | errorat | error +---------------------------+------------------+----------+------------------+-------------------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid (in KOI8R) | \x8bc68bcf8bcf | \xe4dede | | + invalid,incomplete char | \x8bc68bcf8b | \xe4de | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b + valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5" + invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe + valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5" + invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 + invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5" + invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5" + invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5" (10 rows) select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs; - description | inbytes | result | errorat | error ----------------------------+------------------+----------+---------+-------------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid (in KOI8R) | \x8bc68bcf8bcf | | | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS" - invalid,incomplete char | \x8bc68bcf8b | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b - valid (in SHIFT_JIS) | \x92bedd | \x8fdb | | - invalid, incomplete char) | \x92be | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe - valid (in Big5) | \x666f6f95a3c1 | | | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS" - invalid, incomplete char | \x666f6f95a3 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 - invalid, NUL byte | \x9200bedd | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe - invalid, NUL byte | \x92bedd00 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x00 - invalid, NUL byte | \x8b00c68bcf8bcf | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00 + description | inbytes | result | errorat | error +---------------------------+------------------+----------+------------------+-------------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS" + invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS" + valid (in SHIFT_JIS) | \x92bedd | \x8fdb | | + invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe + valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS" + invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 + invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe + invalid, NUL byte | \x92bedd00 | \x8fdb | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00 + invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00 (10 rows) select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs; - description | inbytes | result | errorat | error ----------------------------+------------------+--------------+---------+-------------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid (in KOI8R) | \x8bc68bcf8bcf | | | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5" - invalid,incomplete char | \x8bc68bcf8b | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b - valid (in SHIFT_JIS) | \x92bedd | | | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5" - invalid, incomplete char) | \x92be | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe - valid (in Big5) | \x666f6f95a3c1 | \x666f6fa2a1 | | - invalid, incomplete char | \x666f6f95a3 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 - invalid, NUL byte | \x9200bedd | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe - invalid, NUL byte | \x92bedd00 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x00 - invalid, NUL byte | \x8b00c68bcf8bcf | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00 + description | inbytes | result | errorat | error +---------------------------+------------------+--------------+------------------+-------------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5" + invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5" + valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5" + invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe + valid (in Big5) | \x666f6f95a3c1 | \x666f6fa2a1 | | + invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 + invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe + invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5" + invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00 (10 rows) select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs; - description | inbytes | result | errorat | error ----------------------------+------------------+----------+---------+---------------------------------------------------------------------------------------------------------------- - valid, pure ASCII | \x666f6f | \x666f6f | | - valid (in KOI8R) | \x8bc68bcf8bcf | | | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP" - invalid,incomplete char | \x8bc68bcf8b | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b - valid (in SHIFT_JIS) | \x92bedd | \xbedd | | - invalid, incomplete char) | \x92be | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe - valid (in Big5) | \x666f6f95a3c1 | | | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP" - invalid, incomplete char | \x666f6f95a3 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 - invalid, NUL byte | \x9200bedd | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe - invalid, NUL byte | \x92bedd00 | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x00 - invalid, NUL byte | \x8b00c68bcf8bcf | | | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00 + description | inbytes | result | errorat | error +---------------------------+------------------+----------+------------------+---------------------------------------------------------------------------------------------------------------- + valid, pure ASCII | \x666f6f | \x666f6f | | + valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP" + invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP" + valid (in SHIFT_JIS) | \x92bedd | \xbedd | | + invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe + valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP" + invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3 + invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe + invalid, NUL byte | \x92bedd00 | \xbedd | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00 + invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00 (10 rows) diff --git a/src/test/regress/input/create_function_1.source b/src/test/regress/input/create_function_1.source index 412e339fcf2..6ba37fe63b6 100644 --- a/src/test/regress/input/create_function_1.source +++ b/src/test/regress/input/create_function_1.source @@ -78,6 +78,10 @@ CREATE FUNCTION test_opclass_options_func(internal) AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func' LANGUAGE C; +CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea) + AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion' + LANGUAGE C; + -- Things that shouldn't work: CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL diff --git a/src/test/regress/output/create_function_1.source b/src/test/regress/output/create_function_1.source index 4d78fa12289..cb38a039bf4 100644 --- a/src/test/regress/output/create_function_1.source +++ b/src/test/regress/output/create_function_1.source @@ -68,6 +68,9 @@ CREATE FUNCTION test_opclass_options_func(internal) RETURNS void AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func' LANGUAGE C; +CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea) + AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion' + LANGUAGE C; -- Things that shouldn't work: CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL AS 'SELECT ''not an integer'';'; diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 32ab9ed6b53..6dc43ff7fea 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -23,12 +23,15 @@ #include "access/htup_details.h" #include "access/transam.h" #include "access/xact.h" +#include "catalog/namespace.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "commands/sequence.h" #include "commands/trigger.h" #include "executor/executor.h" #include "executor/spi.h" +#include "funcapi.h" +#include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/supportnodes.h" #include "optimizer/optimizer.h" @@ -1060,3 +1063,121 @@ test_opclass_options_func(PG_FUNCTION_ARGS) { PG_RETURN_NULL(); } + + +PG_FUNCTION_INFO_V1(test_enc_conversion); +Datum +test_enc_conversion(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + char *src_encoding_name = NameStr(*PG_GETARG_NAME(1)); + int src_encoding = pg_char_to_encoding(src_encoding_name); + char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2)); + int dest_encoding = pg_char_to_encoding(dest_encoding_name); + bool noError = PG_GETARG_BOOL(3); + TupleDesc tupdesc; + char *src; + char *dst; + bytea *retval; + Size srclen; + Size dstsize; + Oid proc; + int convertedbytes; + int dstlen; + Datum values[2]; + bool nulls[2]; + HeapTuple tuple; + + if (src_encoding < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid source encoding name \"%s\"", + src_encoding_name))); + if (dest_encoding < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid destination encoding name \"%s\"", + dest_encoding_name))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + tupdesc = BlessTupleDesc(tupdesc); + + srclen = VARSIZE_ANY_EXHDR(string); + src = VARDATA_ANY(string); + + if (src_encoding == dest_encoding) + { + /* just check that the source string is valid */ + int oklen; + + oklen = pg_encoding_verifymbstr(src_encoding, src, srclen); + + if (oklen == srclen) + { + convertedbytes = oklen; + retval = string; + } + else if (!noError) + { + report_invalid_encoding(src_encoding, src + oklen, srclen - oklen); + } + else + { + /* + * build bytea data type structure. + */ + Assert(oklen < srclen); + convertedbytes = oklen; + retval = (bytea *) palloc(oklen + VARHDRSZ); + SET_VARSIZE(retval, oklen + VARHDRSZ); + memcpy(VARDATA(retval), src, oklen); + } + } + else + { + proc = FindDefaultConversionProc(src_encoding, dest_encoding); + if (!OidIsValid(proc)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist", + pg_encoding_to_char(src_encoding), + pg_encoding_to_char(dest_encoding)))); + + if (srclen >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("out of memory"), + errdetail("String of %d bytes is too long for encoding conversion.", + (int) srclen))); + + dstsize = (Size) srclen * MAX_CONVERSION_GROWTH + 1; + dst = MemoryContextAlloc(CurrentMemoryContext, dstsize); + + /* perform conversion */ + convertedbytes = pg_do_encoding_conversion_buf(proc, + src_encoding, + dest_encoding, + (unsigned char *) src, srclen, + (unsigned char *) dst, dstsize, + noError); + dstlen = strlen(dst); + + /* + * build bytea data type structure. + */ + retval = (bytea *) palloc(dstlen + VARHDRSZ); + SET_VARSIZE(retval, dstlen + VARHDRSZ); + memcpy(VARDATA(retval), dst, dstlen); + + pfree(dst); + } + + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int32GetDatum(convertedbytes); + values[1] = PointerGetDatum(retval); + tuple = heap_form_tuple(tupdesc, values, nulls); + + PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); +} diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index 644531d3333..41e2686d46a 100644 --- a/src/test/regress/sql/conversion.sql +++ b/src/test/regress/sql/conversion.sql @@ -55,25 +55,21 @@ $$ declare validlen int; begin - -- Try to perform the conversion. If it fails, catch the error and return - -- it to the caller. begin - select * into result from convert(input, src_encoding, dst_encoding); - validlen = length(input); + select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false); errorat = NULL; error := NULL; exception when others then - result = NULL; - errorat = NULL; + select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true); + errorat = substr(input, validlen + 1); error := sqlerrm; end; return; end; $$; --- --- UTF-8 --- +-- Test verification functions + CREATE TABLE utf8_inputs (inbytes bytea, description text); insert into utf8_inputs values ('\x666f6f', 'valid, pure ASCII'), @@ -194,8 +190,9 @@ select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_in select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs; select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs; - +-- -- MULE_INTERNAL +-- CREATE TABLE mic_inputs (inbytes bytea, description text); insert into mic_inputs values ('\x666f6f', 'valid, pure ASCII'), -- 2.29.2