Thread: md5(bytea)
The appended patch fiddles with md5_text() until it handles any varlena, and adds an entry for md5(bytea) to pg_proc. -- ams *** src/include/catalog/pg_proc.h~ 2005-05-19 07:45:05.191855191 +0530 --- src/include/catalog/pg_proc.h 2005-05-19 07:56:45.785482224 +0530 *************** *** 3267,3273 **** DESCR("I/O"); /* cryptographic */ ! DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 f f t f i 1 25 "25" _null_ _null_ _null_ md5_text - _null_ )); DESCR("calculates md5 hash"); /* crosstype operations for date vs. timestamp and timestamptz */ --- 3267,3275 ---- DESCR("I/O"); /* cryptographic */ ! DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 f f t f i 1 25 "25" _null_ _null_ _null_ md5_varlena - _null_ )); ! DESCR("calculates md5 hash"); ! DATA(insert OID = 2321 ( md5 PGNSP PGUID 12 f f t f i 1 25 "17" _null_ _null_ _null_ md5_varlena - _null_ )); DESCR("calculates md5 hash"); /* crosstype operations for date vs. timestamp and timestamptz */ *** src/include/utils/builtins.h~ 2005-05-19 07:56:05.737477425 +0530 --- src/include/utils/builtins.h 2005-05-19 07:56:16.249641262 +0530 *************** *** 571,577 **** extern Datum array_to_text(PG_FUNCTION_ARGS); extern Datum to_hex32(PG_FUNCTION_ARGS); extern Datum to_hex64(PG_FUNCTION_ARGS); ! extern Datum md5_text(PG_FUNCTION_ARGS); extern Datum unknownin(PG_FUNCTION_ARGS); extern Datum unknownout(PG_FUNCTION_ARGS); --- 571,577 ---- extern Datum array_to_text(PG_FUNCTION_ARGS); extern Datum to_hex32(PG_FUNCTION_ARGS); extern Datum to_hex64(PG_FUNCTION_ARGS); ! extern Datum md5_varlena(PG_FUNCTION_ARGS); extern Datum unknownin(PG_FUNCTION_ARGS); extern Datum unknownout(PG_FUNCTION_ARGS); *** src/backend/utils/adt/varlena.c~ 2005-05-19 07:46:28.895234689 +0530 --- src/backend/utils/adt/varlena.c 2005-05-19 08:20:42.844470560 +0530 *************** *** 2297,2324 **** } /* ! * Create an md5 hash of a text string and return it as hex * * md5 produces a 16 byte (128 bit) hash; double it for hex */ #define MD5_HASH_LEN 32 Datum ! md5_text(PG_FUNCTION_ARGS) { ! text *in_text = PG_GETARG_TEXT_P(0); size_t len; ! char *hexsum; text *result_text; /* Calculate the length of the buffer using varlena metadata */ ! len = VARSIZE(in_text) - VARHDRSZ; ! ! /* leave room for the terminating '\0' */ ! hexsum = (char *) palloc(MD5_HASH_LEN + 1); /* get the hash result */ ! if (md5_hash(VARDATA(in_text), len, hexsum) == false) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); --- 2297,2323 ---- } /* ! * Create an md5 hash of a varlena and return it as hex * * md5 produces a 16 byte (128 bit) hash; double it for hex */ #define MD5_HASH_LEN 32 Datum ! md5_varlena(PG_FUNCTION_ARGS) { ! /* It would be nice if we could avoid de-toasting the whole varlena, ! * and feed it to md5_hash in small chunks instead. */ ! struct varlena *in = PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); size_t len; ! char hexsum[MD5_HASH_LEN+1]; text *result_text; /* Calculate the length of the buffer using varlena metadata */ ! len = VARSIZE(in) - VARHDRSZ; /* get the hash result */ ! if (md5_hash(VARDATA(in), len, hexsum) == false) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
Abhijit Menon-Sen <ams@oryx.com> writes: > The appended patch fiddles with md5_text() until it handles any varlena, > and adds an entry for md5(bytea) to pg_proc. Doesn't that change cause the opr_sanity regression test to complain? regards, tom lane
At 2005-05-18 23:31:27 -0400, tgl@sss.pgh.pa.us wrote: > > Doesn't that change cause the opr_sanity regression test to complain? Yes, it does. I'm sorry I didn't notice. As far as I can tell, updating the test as below is the correct thing to do. -- ams *** src/test/regress/expected/opr_sanity.out~ 2005-05-19 10:16:47.821895189 +0530 --- src/test/regress/expected/opr_sanity.out 2005-05-19 10:17:05.336835847 +0530 *************** *** 110,121 **** (p1.proargtypes[0] < p2.proargtypes[0]); proargtypes | proargtypes -------------+------------- 25 | 1042 25 | 1043 1114 | 1184 1560 | 1562 2277 | 2283 ! (5 rows) SELECT DISTINCT p1.proargtypes[1], p2.proargtypes[1] FROM pg_proc AS p1, pg_proc AS p2 --- 110,122 ---- (p1.proargtypes[0] < p2.proargtypes[0]); proargtypes | proargtypes -------------+------------- + 17 | 25 25 | 1042 25 | 1043 1114 | 1184 1560 | 1562 2277 | 2283 ! (6 rows) SELECT DISTINCT p1.proargtypes[1], p2.proargtypes[1] FROM pg_proc AS p1, pg_proc AS p2
Abhijit Menon-Sen <ams@oryx.com> writes: > At 2005-05-18 23:31:27 -0400, tgl@sss.pgh.pa.us wrote: >> Doesn't that change cause the opr_sanity regression test to complain? > As far as I can tell, updating the test as below is the correct thing > to do. No, I don't much care for that, because it gives free license for anyone to define pg_proc entries that allow bytea values to be fed to functions that are expecting text inputs. Many of the latter are not going to cope very well with strings that contain embedded zero bytes, nor byte sequences that aren't legal multibyte characters in the current encoding. I think you need to make a separate C-level function for this. The underlying md5 code might be the same, but text and bytea are not really binary-equivalent. regards, tom lane
At 2005-05-19 01:28:31 -0400, tgl@sss.pgh.pa.us wrote: > > No, I don't much care for that, because it gives free license for > anyone to define pg_proc entries that allow bytea values to be fed > to functions that are expecting text inputs. Ah. I misunderstood the comment before the failing opr_sanity test. Here's an updated patch, which also adds some regression tests. Thanks. -- ams *** src/include/catalog/pg_proc.h~ 2005-05-19 11:18:07.436175502 +0530 --- src/include/catalog/pg_proc.h 2005-05-19 11:20:13.530150617 +0530 *************** *** 3269,3274 **** --- 3269,3276 ---- /* cryptographic */ DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 f f t f i 1 25 "25" _null_ _null_ _null_ md5_text - _null_ )); DESCR("calculates md5 hash"); + DATA(insert OID = 2321 ( md5 PGNSP PGUID 12 f f t f i 1 25 "17" _null_ _null_ _null_ md5_bytea - _null_ )); + DESCR("calculates md5 hash"); /* crosstype operations for date vs. timestamp and timestamptz */ DATA(insert OID = 2338 ( date_lt_timestamp PGNSP PGUID 12 f f t f i 2 16 "1082 1114" _null_ _null_ _null_ date_lt_timestamp- _null_ )); *** src/include/utils/builtins.h~ 2005-05-19 11:18:33.563611812 +0530 --- src/include/utils/builtins.h 2005-05-19 11:18:45.621505655 +0530 *************** *** 572,577 **** --- 572,578 ---- extern Datum to_hex32(PG_FUNCTION_ARGS); extern Datum to_hex64(PG_FUNCTION_ARGS); extern Datum md5_text(PG_FUNCTION_ARGS); + extern Datum md5_bytea(PG_FUNCTION_ARGS); extern Datum unknownin(PG_FUNCTION_ARGS); extern Datum unknownout(PG_FUNCTION_ARGS); *** src/backend/utils/adt/varlena.c~ 2005-05-19 11:20:23.495409981 +0530 --- src/backend/utils/adt/varlena.c 2005-05-19 11:31:17.562163881 +0530 *************** *** 2327,2329 **** --- 2327,2353 ---- result_text = PG_STR_GET_TEXT(hexsum); PG_RETURN_TEXT_P(result_text); } + + /* This function takes a bytea and returns the text representation (32 + * lowercase hex characters) of its 16-byte MD5 checksum. + */ + + Datum + md5_bytea(PG_FUNCTION_ARGS) + { + /* It would be nice if we could avoid de-toasting the whole bytea, + * and feed it to md5_hash in small chunks instead. */ + struct varlena *in = PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + char hexsum[MD5_HASH_LEN+1]; + text *result_text; + size_t len; + + len = VARSIZE(in) - VARHDRSZ; + if (md5_hash(VARDATA(in), len, hexsum) == false) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + result_text = PG_STR_GET_TEXT(hexsum); + PG_RETURN_TEXT_P(result_text); + } *** src/test/regress/sql/strings.sql~ 2005-05-19 11:36:54.299345864 +0530 --- src/test/regress/sql/strings.sql 2005-05-19 11:39:06.825197521 +0530 *************** *** 331,333 **** --- 331,347 ---- select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') = 'd174ab98d277d9f5a5611c2c9f419d9f' AS "TRUE"; select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') = '57edf4a22be3c955ac49da2e2107b67a'AS "TRUE"; + + select md5(''::bytea) = 'd41d8cd98f00b204e9800998ecf8427e' AS "TRUE"; + + select md5('a'::bytea) = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE"; + + select md5('abc'::bytea) = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE"; + + select md5('message digest'::bytea) = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE"; + + select md5('abcdefghijklmnopqrstuvwxyz'::bytea) = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE"; + + select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'::bytea) = 'd174ab98d277d9f5a5611c2c9f419d9f'AS "TRUE"; + + select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890'::bytea) = '57edf4a22be3c955ac49da2e2107b67a'AS "TRUE"; *** src/test/regress/expected/strings.out~ 2005-05-19 11:40:21.864090448 +0530 --- src/test/regress/expected/strings.out 2005-05-19 11:40:29.050835136 +0530 *************** *** 825,827 **** --- 825,869 ---- t (1 row) + select md5(''::bytea) = 'd41d8cd98f00b204e9800998ecf8427e' AS "TRUE"; + TRUE + ------ + t + (1 row) + + select md5('a'::bytea) = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE"; + TRUE + ------ + t + (1 row) + + select md5('abc'::bytea) = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE"; + TRUE + ------ + t + (1 row) + + select md5('message digest'::bytea) = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE"; + TRUE + ------ + t + (1 row) + + select md5('abcdefghijklmnopqrstuvwxyz'::bytea) = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE"; + TRUE + ------ + t + (1 row) + + select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'::bytea) = 'd174ab98d277d9f5a5611c2c9f419d9f'AS "TRUE"; + TRUE + ------ + t + (1 row) + + select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890'::bytea) = '57edf4a22be3c955ac49da2e2107b67a'AS "TRUE"; + TRUE + ------ + t + (1 row) +
At 2005-05-19 11:47:16 +0530, ams@oryx.com wrote: > > + Datum > + md5_bytea(PG_FUNCTION_ARGS) > + { > + /* It would be nice if we could avoid de-toasting the whole bytea, > + * and feed it to md5_hash in small chunks instead. */ > + struct varlena *in = PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); Oops, I guess that should be "bytea *in = PG_GETARG_BYTEA_P(0);" now. -- ams
Abhijit Menon-Sen wrote: > Ah. I misunderstood the comment before the failing opr_sanity test. > Here's an updated patch, which also adds some regression tests. Thanks, applied with a few tweaks. -Neil