diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index 5595baa..8a9d677 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -254,7 +254,31 @@ sv2text_mbverified(SV *sv) * length, whatever uses the "verified" value might get something quite * weird. */ - val = SvPV(sv, len); + + /* + * When we are in an UTF8 encoding we want to make sure we get back a utf8 + * byte sequence instead of whatever perls internal format happens to be. + * + * Non UTF8 will just treat everything as bytes/latin1 that is + * SvPVutf8(chr(170)) len == 2 + * SvPVbyte(chr(170)) len == 1 + * SvPV(chr(170))) len == 1 || 2 + */ + if (GetDatabaseEncoding() == PG_UTF8) + val = SvPVutf8(sv, len); + else + { + /* + * See if we can safely represent our string as bytes if not bail out + * otherwise perl dies with "Wide Character" and takes the backend down + * with it + */ + if (sv_utf8_downgrade(sv, true)) + val = SvPVbyte(sv, len); + else + elog(ERROR, "invalid byte sequence"); + } + pg_verifymbstr(val, len, false); return val; }