From ed46c1d172297c5238d8446c6f51eb33587d2d4a Mon Sep 17 00:00:00 2001 From: Florents Tselai Date: Fri, 11 Jul 2025 11:22:08 +0300 Subject: [PATCH v4] Add base64url --- doc/src/sgml/func.sgml | 18 +++ src/backend/utils/adt/encode.c | 178 ++++++++++++++++++++------ src/test/regress/expected/strings.out | 150 ++++++++++++++++++++++ src/test/regress/sql/strings.sql | 54 ++++++++ 4 files changed, 362 insertions(+), 38 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 810b2b50f0d..8d0bce29d5e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -4999,6 +4999,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Encodes binary data into a textual representation; supported format values are: base64, + base64url, escape, hex. @@ -5056,6 +5057,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); + + base64url + + base64url format + + + + The base64url format is a URL-safe variant of + RFC 4648 + Section 5. Unlike standard base64, it replaces + '+' with '-' and '/' with '_' + to ensure safe usage in URLs and filenames. Additionally, the padding character + '=' is omitted. + + + + escape diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 4ccaed815d1..3f2dd448e2a 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -273,6 +273,9 @@ hex_dec_len(const char *src, size_t srclen) static const char _base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char _base64url[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + static const int8 b64lookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -285,17 +288,15 @@ static const int8 b64lookup[128] = { }; static uint64 -pg_base64_encode(const char *src, size_t len, char *dst) +pg_base64_encode_internal(const char *src, size_t len, char *dst, bool url) { - char *p, - *lend = dst + 76; - const char *s, - *end = src + len; - int pos = 2; - uint32 buf = 0; - - s = src; - p = dst; + const char *alphabet = url ? _base64url : _base64; + const char *end = src + len; + const char *s = src; + char *p = dst; + int pos = 2; + uint32 buf = 0; + char *lend = dst + 76; while (s < end) { @@ -306,53 +307,84 @@ pg_base64_encode(const char *src, size_t len, char *dst) /* write it out */ if (pos < 0) { - *p++ = _base64[(buf >> 18) & 0x3f]; - *p++ = _base64[(buf >> 12) & 0x3f]; - *p++ = _base64[(buf >> 6) & 0x3f]; - *p++ = _base64[buf & 0x3f]; + *p++ = alphabet[(buf >> 18) & 0x3f]; + *p++ = alphabet[(buf >> 12) & 0x3f]; + *p++ = alphabet[(buf >> 6) & 0x3f]; + *p++ = alphabet[buf & 0x3f]; pos = 2; buf = 0; - } - if (p >= lend) - { - *p++ = '\n'; - lend = p + 76; + + if (!url && p >= lend) + { + *p++ = '\n'; + lend = p + 76; + } } } + + /* handle remainder */ if (pos != 2) { - *p++ = _base64[(buf >> 18) & 0x3f]; - *p++ = _base64[(buf >> 12) & 0x3f]; - *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '='; - *p++ = '='; + *p++ = alphabet[(buf >> 18) & 0x3f]; + *p++ = alphabet[(buf >> 12) & 0x3f]; + + if (pos == 0) + { + *p++ = alphabet[(buf >> 6) & 0x3f]; + if (!url) + *p++ = '='; + } + else + { + if (!url) + { + *p++ = '='; + *p++ = '='; + } + } } return p - dst; } static uint64 -pg_base64_decode(const char *src, size_t len, char *dst) +pg_base64_encode(const char *src, size_t len, char *dst) +{ + return pg_base64_encode_internal(src, len, dst, false); +} + +static uint64 +pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url) { - const char *srcend = src + len, - *s = src; - char *p = dst; - char c; - int b = 0; - uint32 buf = 0; - int pos = 0, - end = 0; + const char *srcend = src + len; + const char *s = src; + char *p = dst; + char c; + int b = 0; + uint32 buf = 0; + int pos = 0; + int end = 0; while (s < srcend) { c = *s++; + /* skip whitespace */ if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue; + /* convert Base64URL to Base64 if needed */ + if (url) + { + if (c == '-') + c = '+'; + else if (c == '_') + c = '/'; + } + if (c == '=') { - /* end sequence */ if (!end) { if (pos == 2) @@ -377,30 +409,49 @@ pg_base64_decode(const char *src, size_t len, char *dst) errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence", pg_mblen(s - 1), s - 1))); } - /* add it to buffer */ + buf = (buf << 6) + b; pos++; + if (pos == 4) { - *p++ = (buf >> 16) & 255; + *p++ = (buf >> 16) & 0xFF; if (end == 0 || end > 1) - *p++ = (buf >> 8) & 255; + *p++ = (buf >> 8) & 0xFF; if (end == 0 || end > 2) - *p++ = buf & 255; + *p++ = buf & 0xFF; buf = 0; pos = 0; } } - if (pos != 0) + if (pos == 2) + { + buf <<= 12; /* 2 * 6 = 12 bits, pad remaining to 24 */ + *p++ = (buf >> 16) & 0xFF; + } + else if (pos == 3) + { + buf <<= 6; /* 3 * 6 = 18 bits */ + *p++ = (buf >> 16) & 0xFF; + *p++ = (buf >> 8) & 0xFF; + } + else if (pos != 0) + { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid base64 end sequence"), errhint("Input data is missing padding, is truncated, or is otherwise corrupted."))); + } return p - dst; } +static uint64 +pg_base64_decode(const char *src, size_t len, char *dst) +{ + return pg_base64_decode_internal(src, len, dst, false); +} static uint64 pg_base64_enc_len(const char *src, size_t srclen) @@ -415,6 +466,51 @@ pg_base64_dec_len(const char *src, size_t srclen) return ((uint64) srclen * 3) >> 2; } +/* + * Calculate the length of base64url encoded output for given input length + * Base64url encoding: 3 bytes -> 4 chars, padding to multiple of 4 + */ +static uint64 +pg_base64url_enc_len(const char *src, size_t srclen) +{ + uint64 result; + + /* + * Base64 encoding converts 3 bytes into 4 characters + * Formula: ceil(srclen / 3) * 4 + * + * Unlike standard base64, base64url doesn't use padding characters + * when the input length is not divisible by 3 + */ + result = (srclen + 2) / 3 * 4; /* ceiling division by 3, then multiply by 4 */ + + return result; +} + +static uint64 +pg_base64url_dec_len(const char *src, size_t srclen) +{ + /* For Base64, each 4 characters of input produce at most 3 bytes of output */ + /* For Base64URL without padding, we need to round up to the nearest 4 */ + size_t adjusted_len = srclen; + if (srclen % 4 != 0) + adjusted_len += 4 - (srclen % 4); + + return (adjusted_len * 3) / 4; +} + +static uint64 +pg_base64url_encode(const char *src, size_t len, char *dst) +{ + return pg_base64_encode_internal(src, len, dst, true); +} + +static uint64 +pg_base64url_decode(const char *src, size_t len, char *dst) +{ + return pg_base64_decode_internal(src, len, dst, true); +} + /* * Escape * Minimally escape bytea to text. @@ -606,6 +702,12 @@ static const struct pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode } }, + { + "base64url", + { + pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode + } + }, { "escape", { diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 788844abd20..e76f30a63eb 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -2462,6 +2462,156 @@ SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape'); \x1234567890abcdef00 (1 row) +-- +-- Base64URL encoding/decoding +-- +SET bytea_output TO hex; +-- Simple encoding/decoding +SELECT encode('\x69b73eff', 'base64url'); -- abc-_w + encode +-------- + abc-_w +(1 row) + +SELECT decode('abc-_w', 'base64url'); -- \x69b73eff + decode +------------ + \x69b73eff +(1 row) + +-- Round-trip: decode(encode(x)) = x +SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- \x1234567890abcdef00 + decode +---------------------- + \x1234567890abcdef00 +(1 row) + +-- Empty input +SELECT encode('', 'base64url'); -- '' + encode +-------- + +(1 row) + +SELECT decode('', 'base64url'); -- '' + decode +-------- + \x +(1 row) + +-- 1 byte input +SELECT encode('\x01', 'base64url'); -- AQ== + encode +-------- + AQ +(1 row) + +SELECT decode('AQ', 'base64url'); -- \x01 + decode +-------- + \x01 +(1 row) + +-- 2 byte input +SELECT encode('\x0102'::bytea, 'base64url'); -- AQI + encode +-------- + AQI +(1 row) + +SELECT decode('AQI', 'base64url'); -- \x0102 + decode +-------- + \x0102 +(1 row) + +-- 3 byte input (no padding needed) +SELECT encode('\x010203'::bytea, 'base64url'); -- AQID + encode +-------- + AQID +(1 row) + +SELECT decode('AQID', 'base64url'); -- \x010203 + decode +---------- + \x010203 +(1 row) + +-- 4 byte input (results in 6 base64 chars) +SELECT encode('\xdeadbeef'::bytea, 'base64url'); -- 3q2-7w + encode +-------- + 3q2-7w +(1 row) + +SELECT decode('3q2-7w', 'base64url'); -- \xdeadbeef + decode +------------ + \xdeadbeef +(1 row) + +-- Round-trip test for all lengths from 0–4 +SELECT encode(decode(encode(E'\\x', 'base64url'), 'base64url'), 'base64url'); + encode +-------- + +(1 row) + +SELECT encode(decode(encode(E'\\x00', 'base64url'), 'base64url'), 'base64url'); + encode +-------- + AA +(1 row) + +SELECT encode(decode(encode(E'\\x0001', 'base64url'), 'base64url'), 'base64url'); + encode +-------- + AAE +(1 row) + +SELECT encode(decode(encode(E'\\x000102', 'base64url'), 'base64url'), 'base64url'); + encode +-------- + AAEC +(1 row) + +SELECT encode(decode(encode(E'\\x00010203', 'base64url'), 'base64url'), 'base64url'); + encode +-------- + AAECAw +(1 row) + +-- Invalid inputs (should ERROR) +-- invalid character '@' +SELECT decode('QQ@=', 'base64url'); +ERROR: invalid symbol "@" found while decoding base64 sequence +-- missing characters (incomplete group) +SELECT decode('QQ', 'base64url'); -- ok (1 byte) + decode +-------- + \x41 +(1 row) + +SELECT decode('QQI', 'base64url'); -- ok (2 bytes) + decode +-------- + \x4102 +(1 row) + +SELECT decode('QQIDQ', 'base64url'); -- ERROR: invalid base64 end sequence +ERROR: invalid base64 end sequence +HINT: Input data is missing padding, is truncated, or is otherwise corrupted. +-- unexpected '=' at start +SELECT decode('=QQQ', 'base64url'); +ERROR: unexpected "=" while decoding base64 sequence +-- valid base64 padding in base64url (optional, but accepted) +SELECT decode('abc-_w==', 'base64url'); -- should decode to \x69b73eff + decode +------------ + \x69b73eff +(1 row) + -- -- get_bit/set_bit etc -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 2577a42987d..ac26d892006 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -774,6 +774,60 @@ SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, SELECT encode('\x1234567890abcdef00', 'escape'); SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape'); +-- +-- Base64URL encoding/decoding +-- +SET bytea_output TO hex; + +-- Simple encoding/decoding +SELECT encode('\x69b73eff', 'base64url'); -- abc-_w +SELECT decode('abc-_w', 'base64url'); -- \x69b73eff + +-- Round-trip: decode(encode(x)) = x +SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- \x1234567890abcdef00 + +-- Empty input +SELECT encode('', 'base64url'); -- '' +SELECT decode('', 'base64url'); -- '' + +-- 1 byte input +SELECT encode('\x01', 'base64url'); -- AQ== +SELECT decode('AQ', 'base64url'); -- \x01 + +-- 2 byte input +SELECT encode('\x0102'::bytea, 'base64url'); -- AQI +SELECT decode('AQI', 'base64url'); -- \x0102 + +-- 3 byte input (no padding needed) +SELECT encode('\x010203'::bytea, 'base64url'); -- AQID +SELECT decode('AQID', 'base64url'); -- \x010203 + +-- 4 byte input (results in 6 base64 chars) +SELECT encode('\xdeadbeef'::bytea, 'base64url'); -- 3q2-7w +SELECT decode('3q2-7w', 'base64url'); -- \xdeadbeef + +-- Round-trip test for all lengths from 0–4 +SELECT encode(decode(encode(E'\\x', 'base64url'), 'base64url'), 'base64url'); +SELECT encode(decode(encode(E'\\x00', 'base64url'), 'base64url'), 'base64url'); +SELECT encode(decode(encode(E'\\x0001', 'base64url'), 'base64url'), 'base64url'); +SELECT encode(decode(encode(E'\\x000102', 'base64url'), 'base64url'), 'base64url'); +SELECT encode(decode(encode(E'\\x00010203', 'base64url'), 'base64url'), 'base64url'); + +-- Invalid inputs (should ERROR) +-- invalid character '@' +SELECT decode('QQ@=', 'base64url'); + +-- missing characters (incomplete group) +SELECT decode('QQ', 'base64url'); -- ok (1 byte) +SELECT decode('QQI', 'base64url'); -- ok (2 bytes) +SELECT decode('QQIDQ', 'base64url'); -- ERROR: invalid base64 end sequence + +-- unexpected '=' at start +SELECT decode('=QQQ', 'base64url'); + +-- valid base64 padding in base64url (optional, but accepted) +SELECT decode('abc-_w==', 'base64url'); -- should decode to \x69b73eff + -- -- get_bit/set_bit etc -- -- 2.49.0