--- src/test/encodings/expected/gb18030.out 2025-09-11 14:10:18.511609698 +0700 +++ patch.out 2025-09-11 14:12:42.001008191 +0700 @@ -1,62 +1,3 @@ --- Enumerate all valid GB18030 byte sequences. Not all of these encode an actual character, but --- this enumerates everything that's accepted by the verification function. -create function most_gb18030() returns setof bytea language plpgsql as -$$ -declare - byte1 integer; - byte2 integer; - byte3 integer; - byte4 integer; -begin - -- 4-byte encoded values have: - -- byte1 in range 0x81-0xfe - -- byte2 in range 0x30-0x39 - -- byte3 in range 0x81-0xfe - -- byte2 in range 0x30-0x39 - for byte1 in hex('81') .. hex('fe') loop - - -- Skip some values that are not interesting, to keep the table - -- manageable. - if (byte1 >= hex('85') and byte1 <= hex('8e')) or - (byte1 >= hex('e4') and byte1 <= hex('fd')) then - continue; - end if; - - -- skip more values - if (byte1 >= hex('a1') and byte1 <= hex('d0')) then - continue; - end if; - - for byte2 in hex('30') .. hex('39') loop - for byte3 in hex('81') .. hex('fe') loop - for byte4 in hex('30') .. hex('39') loop - return next b(byte1, byte2, byte3, byte4); - end loop; - -- - end loop; - end loop; - end loop; - - -- 2-byte encoded values have: - -- byte1 in range 0x81-0xfe - -- byte2 in range 0x40-0x7e or 0x80-0xfe - for byte1 in hex('81') .. hex('fe') loop - for byte2 in hex('40') .. hex('7e') loop - return next b(byte1, byte2); - end loop; - - for byte2 in hex('80') .. hex('fe') loop - return next b(byte1, byte2); - end loop; - end loop; - - -- ASCII - for byte1 in hex('01')..hex('7f') loop - return next b(byte1); - end loop; -end; -$$; -select gb18030_char, convert_roundtrip(gb18030_char, 'gb18030', 'utf8') from most_gb18030() as gb18030_char; gb18030_char | convert_roundtrip --------------+------------------------------------------------------------------------------------------------------------- \x81308130 | \xc280 @@ -7516,7 +7457,7 @@ \x8135f434 | \xe1b8bc \x8135f435 | \xe1b8bd \x8135f436 | \xe1b8be - \x8135f437 | \xe1b8bf + \x8135f437 | \xee9f87 \x8135f438 | \xe1b980 \x8135f439 | \xe1b981 \x8135f530 | \xe1b982 @@ -19116,14 +19057,14 @@ \x82359034 | \xe9beb1 \x82359035 | \xe9beb2 \x82359036 | \xe9beb3 - \x82359037 | \xe9beb4 - \x82359038 | \xe9beb5 - \x82359039 | \xe9beb6 - \x82359130 | \xe9beb7 - \x82359131 | \xe9beb8 - \x82359132 | \xe9beb9 - \x82359133 | \xe9beba - \x82359134 | \xe9bebb + \x82359037 | \xe9beb4, roundtripped to \xfe59 + \x82359038 | \xe9beb5, roundtripped to \xfe61 + \x82359039 | \xe9beb6, roundtripped to \xfe66 + \x82359130 | \xe9beb7, roundtripped to \xfe67 + \x82359131 | \xe9beb8, roundtripped to \xfe6d + \x82359132 | \xe9beb9, roundtripped to \xfe7e + \x82359133 | \xe9beba, roundtripped to \xfe90 + \x82359134 | \xe9bebb, roundtripped to \xfea0 \x82359135 | \xe9bebc \x82359136 | \xe9bebd \x82359137 | \xe9bebe @@ -39135,16 +39076,16 @@ \x84318233 | \xefb88d \x84318234 | \xefb88e \x84318235 | \xefb88f - \x84318236 | \xefb890 - \x84318237 | \xefb891 - \x84318238 | \xefb892 - \x84318239 | \xefb893 - \x84318330 | \xefb894 - \x84318331 | \xefb895 - \x84318332 | \xefb896 - \x84318333 | \xefb897 - \x84318334 | \xefb898 - \x84318335 | \xefb899 + \x84318236 | \xefb890, roundtripped to \xa6d9 + \x84318237 | \xefb891, roundtripped to \xa6db + \x84318238 | \xefb892, roundtripped to \xa6da + \x84318239 | \xefb893, roundtripped to \xa6dc + \x84318330 | \xefb894, roundtripped to \xa6dd + \x84318331 | \xefb895, roundtripped to \xa6de + \x84318332 | \xefb896, roundtripped to \xa6df + \x84318333 | \xefb897, roundtripped to \xa6ec + \x84318334 | \xefb898, roundtripped to \xa6ed + \x84318335 | \xefb899, roundtripped to \xa6f3 \x84318336 | \xefb89a \x84318337 | \xefb89b \x84318338 | \xefb89c @@ -535814,7 +535755,7 @@ \xa39d | \xee97a2 \xa39e | \xee97a3 \xa39f | \xee97a4 - \xa3a0 | \xee97a5 + \xa3a0 | character with byte sequence 0xa3 0xa0 in encoding "GB18030" has no equivalent in encoding "UTF8" \xa3a1 | \xefbc81 \xa3a2 | \xefbc82 \xa3a3 | \xefbc83 @@ -536441,13 +536382,13 @@ \xa6d6 | \xcf87 \xa6d7 | \xcf88 \xa6d8 | \xcf89 - \xa6d9 | \xee9e8d - \xa6da | \xee9e8e - \xa6db | \xee9e8f - \xa6dc | \xee9e90 - \xa6dd | \xee9e91 - \xa6de | \xee9e92 - \xa6df | \xee9e93 + \xa6d9 | \xefb890 + \xa6da | \xefb892 + \xa6db | \xefb891 + \xa6dc | \xefb893 + \xa6dd | \xefb894 + \xa6de | \xefb895 + \xa6df | \xefb896 \xa6e0 | \xefb8b5 \xa6e1 | \xefb8b6 \xa6e2 | \xefb8b9 @@ -536460,14 +536401,14 @@ \xa6e9 | \xefb982 \xa6ea | \xefb983 \xa6eb | \xefb984 - \xa6ec | \xee9e94 - \xa6ed | \xee9e95 + \xa6ec | \xefb897 + \xa6ed | \xefb898 \xa6ee | \xefb8bb \xa6ef | \xefb8bc \xa6f0 | \xefb8b7 \xa6f1 | \xefb8b8 \xa6f2 | \xefb8b1 - \xa6f3 | \xee9e96 + \xa6f3 | \xefb899 \xa6f4 | \xefb8b3 \xa6f5 | \xefb8b4 \xa6f6 | \xee9e97 @@ -536792,7 +536733,7 @@ \xa8b9 | \xc3bc \xa8ba | \xc3aa \xa8bb | \xc991 - \xa8bc | \xee9f87 + \xa8bc | \xe1b8bf \xa8bd | \xc584 \xa8be | \xc588 \xa8bf | \xc7b9 @@ -553034,7 +552975,7 @@ \xfe56 | \xe39187 \xfe57 | \xe2ba88 \xfe58 | \xe2ba8b - \xfe59 | \xeea09e + \xfe59 | \xe9beb4 \xfe5a | \xe3969e \xfe5b | \xe3989a \xfe5c | \xe3988e @@ -553042,19 +552983,19 @@ \xfe5e | \xe2ba97 \xfe5f | \xe3a5ae \xfe60 | \xe3a498 - \xfe61 | \xeea0a6 + \xfe61 | \xe9beb5 \xfe62 | \xe3a78f \xfe63 | \xe3a79f \xfe64 | \xe3a9b3 \xfe65 | \xe3a790 - \xfe66 | \xeea0ab - \xfe67 | \xeea0ac + \xfe66 | \xe9beb6 + \xfe67 | \xe9beb7 \xfe68 | \xe3ad8e \xfe69 | \xe3b1ae \xfe6a | \xe3b3a0 \xfe6b | \xe2baa7 \xfe6c | \xeea0b1 - \xfe6d | \xeea0b2 + \xfe6d | \xe9beb8 \xfe6e | \xe2baaa \xfe6f | \xe48196 \xfe70 | \xe4859f @@ -553071,7 +553012,7 @@ \xfe7b | \xe49396 \xfe7c | \xe499a1 \xfe7d | \xe4998c - \xfe7e | \xeea183 + \xfe7e | \xe9beb9 \xfe80 | \xe49ca3 \xfe81 | \xe49ca9 \xfe82 | \xe49dbc @@ -553088,7 +553029,7 @@ \xfe8d | \xe4a69b \xfe8e | \xe4a6b7 \xfe8f | \xe4a6b6 - \xfe90 | \xeea194 + \xfe90 | \xe9beba \xfe91 | \xeea195 \xfe92 | \xe4b2a3 \xfe93 | \xe4b29f @@ -553104,7 +553045,7 @@ \xfe9d | \xe4b498 \xfe9e | \xe4b499 \xfe9f | \xe4b6ae - \xfea0 | \xeea1a4 + \xfea0 | \xe9bebb \xfea1 | \xee91a8 \xfea2 | \xee91a9 \xfea3 | \xee91aa