Re: [HACKERS] [GENERAL] workaround for lack of REPLACE() - Mailing list pgsql-patches
From | Bruce Momjian |
---|---|
Subject | Re: [HACKERS] [GENERAL] workaround for lack of REPLACE() |
Date | |
Msg-id | 200208220323.g7M3Nqc00798@candle.pha.pa.us Whole thread Raw |
In response to | Re: [HACKERS] [GENERAL] workaround for lack of REPLACE() (Joe Conway <mail@joeconway.com>) |
List | pgsql-patches |
Patch applied. Thanks. --------------------------------------------------------------------------- Joe Conway wrote: > Joe Conway wrote: > > I took Tom's advice and added wrapper functions around text_substr() and > > bytea_substr() to cover the 2 argument case. > > > > I also added tests to strings.sql to cover substr() on toasted columns > > of both text and bytea. > > > > Please replace the original patch (substr.2002.08.14.1.patch) with the > attached. It includes everything from the previous one, plus newly > implemented builtin functions: > > replace(string, from, to) > -- replaces all occurrences of "from" in "string" to "to" > split(string, fldsep, column) > -- splits "string" on "fldsep" and returns "column" number piece > to_hex(int32_num) & to_hex(int64_num) > -- takes integer number and returns as hex string > > All previously discussed on the list; see thread at: > http://archives.postgresql.org/pgsql-hackers/2002-07/msg00511.php > > Examples: > > SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo"; > ya123da123doo > --------------- > ya123da123doo > (1 row) > > select split('joeuser@mydatabase','@',1) AS "joeuser"; > joeuser > --------- > joeuser > (1 row) > > select split('joeuser@mydatabase','@',2) AS "mydatabase"; > mydatabase > ------------ > mydatabase > (1 row) > > select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS > "ffffffff"; > ffffffff > ---------- > ffffffff > (1 row) > > Tests have been added to the regression suite. > > Passes all regression tests. I've checked the strings.sql script in a > multibyte database and it works fine also. I'd appreciate a good look by > someone more familiar with multibyte related issues though. > > If it is OK, I'd like to hold off on docs until this is committed and > after beta starts. > > If there are no objections, please apply. > > Thanks, > > Joe > Index: src/backend/utils/adt/varlena.c > =================================================================== > RCS file: /opt/src/cvs/pgsql-server/src/backend/utils/adt/varlena.c,v > retrieving revision 1.87 > diff -c -r1.87 varlena.c > *** src/backend/utils/adt/varlena.c 4 Aug 2002 06:44:47 -0000 1.87 > --- src/backend/utils/adt/varlena.c 16 Aug 2002 19:54:03 -0000 > *************** > *** 18,23 **** > --- 18,25 ---- > > #include "mb/pg_wchar.h" > #include "miscadmin.h" > + #include "access/tuptoaster.h" > + #include "lib/stringinfo.h" > #include "utils/builtins.h" > #include "utils/pg_locale.h" > > *************** > *** 27,34 **** > --- 29,62 ---- > #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X)) > #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n)) > #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x) > + #define PG_TEXTARG_GET_STR(arg_) \ > + DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_))) > + #define PG_TEXT_GET_STR(textp_) \ > + DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_))) > + #define PG_STR_GET_TEXT(str_) \ > + DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_))) > + #define TEXTLEN(textp) \ > + text_length(PointerGetDatum(textp)) > + #define TEXTPOS(buf_text, from_sub_text) \ > + text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1) > + #define TEXTDUP(textp) \ > + DatumGetTextPCopy(PointerGetDatum(textp)) > + #define LEFT(buf_text, from_sub_text) \ > + text_substring(PointerGetDatum(buf_text), \ > + 1, \ > + TEXTPOS(buf_text, from_sub_text) - 1, false) > + #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \ > + text_substring(PointerGetDatum(buf_text), \ > + TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \ > + -1, true) > > static int text_cmp(text *arg1, text *arg2); > + static int32 text_length(Datum str); > + static int32 text_position(Datum str, Datum search_str, int matchnum); > + static text *text_substring(Datum str, > + int32 start, > + int32 length, > + bool length_not_specified); > > > /***************************************************************************** > *************** > *** 285,303 **** > Datum > textlen(PG_FUNCTION_ARGS) > { > ! text *t = PG_GETARG_TEXT_P(0); > > ! #ifdef MULTIBYTE > ! /* optimization for single byte encoding */ > ! if (pg_database_encoding_max_length() <= 1) > ! PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ); > ! > ! PG_RETURN_INT32( > ! pg_mbstrlen_with_len(VARDATA(t), VARSIZE(t) - VARHDRSZ) > ! ); > ! #else > ! PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ); > ! #endif > } > > /* > --- 313,348 ---- > Datum > textlen(PG_FUNCTION_ARGS) > { > ! PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0))); > ! } > > ! /* > ! * text_length - > ! * Does the real work for textlen() > ! * This is broken out so it can be called directly by other string processing > ! * functions. > ! */ > ! static int32 > ! text_length(Datum str) > ! { > ! /* fastpath when max encoding length is one */ > ! if (pg_database_encoding_max_length() == 1) > ! PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); > ! > ! if (pg_database_encoding_max_length() > 1) > ! { > ! text *t = DatumGetTextP(str); > ! > ! PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t), > ! VARSIZE(t) - VARHDRSZ)); > ! } > ! > ! /* should never get here */ > ! elog(ERROR, "Invalid backend encoding; encoding max length " > ! "is less than one."); > ! > ! /* not reached: suppress compiler warning */ > ! return 0; > } > > /* > *************** > *** 308,316 **** > Datum > textoctetlen(PG_FUNCTION_ARGS) > { > ! text *arg = PG_GETARG_TEXT_P(0); > ! > ! PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ); > } > > /* > --- 353,359 ---- > Datum > textoctetlen(PG_FUNCTION_ARGS) > { > ! PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ); > } > > /* > *************** > *** 382,471 **** > * - Thomas Lockhart 1998-12-10 > * Now uses faster TOAST-slicing interface > * - John Gray 2002-02-22 > */ > Datum > text_substr(PG_FUNCTION_ARGS) > { > ! text *string; > ! int32 m = PG_GETARG_INT32(1); > ! int32 n = PG_GETARG_INT32(2); > ! int32 sm; > ! int32 sn; > ! int eml = 1; > ! #ifdef MULTIBYTE > ! int i; > ! int len; > ! text *ret; > ! char *p; > ! #endif > > ! /* > ! * starting position before the start of the string? then offset into > ! * the string per SQL92 spec... > ! */ > ! if (m < 1) > { > ! n += (m - 1); > ! m = 1; > ! } > ! /* Check for m > octet length is made in TOAST access routine */ > > ! /* m will now become a zero-based starting position */ > ! sm = m - 1; > ! sn = n; > > ! #ifdef MULTIBYTE > ! eml = pg_database_encoding_max_length (); > > ! if (eml > 1) > { > ! sm = 0; > ! if (n > -1) > ! sn = (m + n) * eml + 3; /* +3 to avoid mb characters overhanging slice end */ > else > ! sn = n; /* n < 0 is special-cased by heap_tuple_untoast_attr_slice */ > ! } > ! #endif > > ! string = PG_GETARG_TEXT_P_SLICE (0, sm, sn); > > ! if (eml == 1) > ! { > ! PG_RETURN_TEXT_P (string); > ! } > ! #ifndef MULTIBYTE > ! PG_RETURN_NULL(); /* notreached: suppress compiler warning */ > ! #endif > ! #ifdef MULTIBYTE > ! if (n > -1) > ! len = pg_mbstrlen_with_len (VARDATA (string), sn - 3); > ! else /* n < 0 is special-cased; need full string length */ > ! len = pg_mbstrlen_with_len (VARDATA (string), VARSIZE(string)-VARHDRSZ); > ! > ! if (m > len) > ! { > ! m = 1; > ! n = 0; > ! } > ! m--; > ! if (((m + n) > len) || (n < 0)) > ! n = (len - m); > ! > ! p = VARDATA(string); > ! for (i = 0; i < m; i++) > ! p += pg_mblen(p); > ! m = p - VARDATA(string); > ! for (i = 0; i < n; i++) > ! p += pg_mblen(p); > ! n = p - (VARDATA(string) + m); > > ! ret = (text *) palloc(VARHDRSZ + n); > ! VARATT_SIZEP(ret) = VARHDRSZ + n; > > ! memcpy(VARDATA(ret), VARDATA(string) + m, n); > > ! PG_RETURN_TEXT_P(ret); > ! #endif > } > > /* > --- 425,625 ---- > * - Thomas Lockhart 1998-12-10 > * Now uses faster TOAST-slicing interface > * - John Gray 2002-02-22 > + * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change > + * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw > + * error; if E < 1, return '', not entire string). Fixed MB related bug when > + * S > LC and < LC + 4 sometimes garbage characters are returned. > + * - Joe Conway 2002-08-10 > */ > Datum > text_substr(PG_FUNCTION_ARGS) > { > ! PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), > ! PG_GETARG_INT32(1), > ! PG_GETARG_INT32(2), > ! false)); > ! } > > ! /* > ! * text_substr_no_len - > ! * Wrapper to avoid opr_sanity failure due to > ! * one function accepting a different number of args. > ! */ > ! Datum > ! text_substr_no_len(PG_FUNCTION_ARGS) > ! { > ! PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), > ! PG_GETARG_INT32(1), > ! -1, true)); > ! } > ! > ! /* > ! * text_substring - > ! * Does the real work for text_substr() and text_substr_no_len() > ! * This is broken out so it can be called directly by other string processing > ! * functions. > ! */ > ! static text* > ! text_substring(Datum str, int32 start, int32 length, bool length_not_specified) > ! { > ! int32 eml = pg_database_encoding_max_length(); > ! int32 S = start; /* start position */ > ! int32 S1; /* adjusted start position */ > ! int32 L1; /* adjusted substring length */ > ! > ! /* life is easy if the encoding max length is 1 */ > ! if (eml == 1) > { > ! S1 = Max(S, 1); > > ! if (length_not_specified) /* special case - get length to end of string */ > ! L1 = -1; > ! else > ! { > ! /* end position */ > ! int E = S + length; > > ! /* > ! * A negative value for L is the only way for the end position > ! * to be before the start. SQL99 says to throw an error. > ! */ > ! if (E < S) > ! elog(ERROR, "negative substring length not allowed"); > > ! /* > ! * A zero or negative value for the end position can happen if the start > ! * was negative or one. SQL99 says to return a zero-length string. > ! */ > ! if (E < 1) > ! return PG_STR_GET_TEXT(""); > ! > ! L1 = E - S1; > ! } > ! > ! /* > ! * If the start position is past the end of the string, > ! * SQL99 says to return a zero-length string -- > ! * PG_GETARG_TEXT_P_SLICE() will do that for us. > ! * Convert to zero-based starting position > ! */ > ! return DatumGetTextPSlice(str, S1 - 1, L1); > ! } > ! else if (eml > 1) > { > ! /* > ! * When encoding max length is > 1, we can't get LC without > ! * detoasting, so we'll grab a conservatively large slice > ! * now and go back later to do the right thing > ! */ > ! int32 slice_start; > ! int32 slice_size; > ! int32 slice_strlen; > ! text *slice; > ! int32 E1; > ! int32 i; > ! char *p; > ! char *s; > ! text *ret; > ! > ! /* > ! * if S is past the end of the string, the tuple toaster > ! * will return a zero-length string to us > ! */ > ! S1 = Max(S, 1); > ! > ! /* > ! * We need to start at position zero because there is no > ! * way to know in advance which byte offset corresponds to > ! * the supplied start position. > ! */ > ! slice_start = 0; > ! > ! if (length_not_specified) /* special case - get length to end of string */ > ! slice_size = L1 = -1; > else > ! { > ! int E = S + length; > ! > ! /* > ! * A negative value for L is the only way for the end position > ! * to be before the start. SQL99 says to throw an error. > ! */ > ! if (E < S) > ! elog(ERROR, "negative substring length not allowed"); > > ! /* > ! * A zero or negative value for the end position can happen if the start > ! * was negative or one. SQL99 says to return a zero-length string. > ! */ > ! if (E < 1) > ! return PG_STR_GET_TEXT(""); > > ! /* > ! * if E is past the end of the string, the tuple toaster > ! * will truncate the length for us > ! */ > ! L1 = E - S1; > ! > ! /* > ! * Total slice size in bytes can't be any longer than the start > ! * position plus substring length times the encoding max length. > ! */ > ! slice_size = (S1 + L1) * eml; > ! } > ! slice = DatumGetTextPSlice(str, slice_start, slice_size); > > ! /* see if we got back an empty string */ > ! if ((VARSIZE(slice) - VARHDRSZ) == 0) > ! return PG_STR_GET_TEXT(""); > > ! /* Now we can get the actual length of the slice in MB characters */ > ! slice_strlen = pg_mbstrlen_with_len (VARDATA(slice), VARSIZE(slice) - VARHDRSZ); > > ! /* Check that the start position wasn't > slice_strlen. If so, > ! * SQL99 says to return a zero-length string. > ! */ > ! if (S1 > slice_strlen) > ! return PG_STR_GET_TEXT(""); > ! > ! /* > ! * Adjust L1 and E1 now that we know the slice string length. > ! * Again remember that S1 is one based, and slice_start is zero based. > ! */ > ! if (L1 > -1) > ! E1 = Min(S1 + L1 , slice_start + 1 + slice_strlen); > ! else > ! E1 = slice_start + 1 + slice_strlen; > ! > ! /* > ! * Find the start position in the slice; > ! * remember S1 is not zero based > ! */ > ! p = VARDATA(slice); > ! for (i = 0; i < S1 - 1; i++) > ! p += pg_mblen(p); > ! > ! /* hang onto a pointer to our start position */ > ! s = p; > ! > ! /* > ! * Count the actual bytes used by the substring of > ! * the requested length. > ! */ > ! for (i = S1; i < E1; i++) > ! p += pg_mblen(p); > ! > ! ret = (text *) palloc(VARHDRSZ + (p - s)); > ! VARATT_SIZEP(ret) = VARHDRSZ + (p - s); > ! memcpy(VARDATA(ret), s, (p - s)); > ! > ! return ret; > ! } > ! else > ! elog(ERROR, "Invalid backend encoding; encoding max length " > ! "is less than one."); > ! > ! /* not reached: suppress compiler warning */ > ! return PG_STR_GET_TEXT(""); > } > > /* > *************** > *** 481,536 **** > Datum > textpos(PG_FUNCTION_ARGS) > { > ! text *t1 = PG_GETARG_TEXT_P(0); > ! text *t2 = PG_GETARG_TEXT_P(1); > ! int pos; > ! int px, > ! p; > ! int len1, > len2; > - pg_wchar *p1, > - *p2; > > ! #ifdef MULTIBYTE > ! pg_wchar *ps1, > ! *ps2; > ! #endif > > if (VARSIZE(t2) <= VARHDRSZ) > PG_RETURN_INT32(1); /* result for empty pattern */ > > len1 = (VARSIZE(t1) - VARHDRSZ); > len2 = (VARSIZE(t2) - VARHDRSZ); > ! #ifdef MULTIBYTE > ! ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar)); > ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1); > ! len1 = pg_wchar_strlen(p1); > ! ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar)); > ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2); > ! len2 = pg_wchar_strlen(p2); > ! #else > ! p1 = VARDATA(t1); > ! p2 = VARDATA(t2); > ! #endif > ! pos = 0; > px = (len1 - len2); > ! for (p = 0; p <= px; p++) > { > ! #ifdef MULTIBYTE > ! if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0)) > ! #else > ! if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0)) > ! #endif > { > ! pos = p + 1; > ! break; > ! }; > ! p1++; > ! }; > ! #ifdef MULTIBYTE > ! pfree(ps1); > ! pfree(ps2); > ! #endif > PG_RETURN_INT32(pos); > } > > --- 635,729 ---- > Datum > textpos(PG_FUNCTION_ARGS) > { > ! PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1)); > ! } > ! > ! /* > ! * text_position - > ! * Does the real work for textpos() > ! * This is broken out so it can be called directly by other string processing > ! * functions. > ! */ > ! static int32 > ! text_position(Datum str, Datum search_str, int matchnum) > ! { > ! int eml = pg_database_encoding_max_length(); > ! text *t1 = DatumGetTextP(str); > ! text *t2 = DatumGetTextP(search_str); > ! int match = 0, > ! pos = 0, > ! p = 0, > ! px, > ! len1, > len2; > > ! if(matchnum == 0) > ! return 0; /* result for 0th match */ > > if (VARSIZE(t2) <= VARHDRSZ) > PG_RETURN_INT32(1); /* result for empty pattern */ > > len1 = (VARSIZE(t1) - VARHDRSZ); > len2 = (VARSIZE(t2) - VARHDRSZ); > ! > ! /* no use in searching str past point where search_str will fit */ > px = (len1 - len2); > ! > ! if (eml == 1) /* simple case - single byte encoding */ > { > ! char *p1, > ! *p2; > ! > ! p1 = VARDATA(t1); > ! p2 = VARDATA(t2); > ! > ! for (p = 0; p <= px; p++) > { > ! if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0)) > ! { > ! if (++match == matchnum) > ! { > ! pos = p + 1; > ! break; > ! } > ! } > ! p1++; > ! } > ! } > ! else if (eml > 1) /* not as simple - multibyte encoding */ > ! { > ! pg_wchar *p1, > ! *p2, > ! *ps1, > ! *ps2; > ! > ! ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar)); > ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1); > ! len1 = pg_wchar_strlen(p1); > ! ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar)); > ! (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2); > ! len2 = pg_wchar_strlen(p2); > ! > ! for (p = 0; p <= px; p++) > ! { > ! if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0)) > ! { > ! if (++match == matchnum) > ! { > ! pos = p + 1; > ! break; > ! } > ! } > ! p1++; > ! } > ! > ! pfree(ps1); > ! pfree(ps2); > ! } > ! else > ! elog(ERROR, "Invalid backend encoding; encoding max length " > ! "is less than one."); > ! > PG_RETURN_INT32(pos); > } > > *************** > *** 758,766 **** > Datum > byteaoctetlen(PG_FUNCTION_ARGS) > { > ! bytea *v = PG_GETARG_BYTEA_P(0); > ! > ! PG_RETURN_INT32(VARSIZE(v) - VARHDRSZ); > } > > /* > --- 951,957 ---- > Datum > byteaoctetlen(PG_FUNCTION_ARGS) > { > ! PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ); > } > > /* > *************** > *** 805,810 **** > --- 996,1003 ---- > PG_RETURN_BYTEA_P(result); > } > > + #define PG_STR_GET_BYTEA(str_) \ > + DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) > /* > * bytea_substr() > * Return a substring starting at the specified position. > *************** > *** 813,845 **** > * Input: > * - string > * - starting position (is one-based) > ! * - string length > * > * If the starting position is zero or less, then return from the start of the string > * adjusting the length to be consistent with the "negative start" per SQL92. > ! * If the length is less than zero, return the remaining string. > ! * > */ > Datum > bytea_substr(PG_FUNCTION_ARGS) > { > ! int32 m = PG_GETARG_INT32(1); > ! int32 n = PG_GETARG_INT32(2); > > ! /* > ! * starting position before the start of the string? then offset into > ! * the string per SQL92 spec... > ! */ > ! if (m < 1) > { > ! n += (m - 1); > ! m = 1; > } > > ! /* m will now become a zero-based starting position */ > ! m--; > > ! PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, m, n)); > } > > /* > --- 1006,1076 ---- > * Input: > * - string > * - starting position (is one-based) > ! * - string length (optional) > * > * If the starting position is zero or less, then return from the start of the string > * adjusting the length to be consistent with the "negative start" per SQL92. > ! * If the length is less than zero, an ERROR is thrown. If no third argument > ! * (length) is provided, the length to the end of the string is assumed. > */ > Datum > bytea_substr(PG_FUNCTION_ARGS) > { > ! int S = PG_GETARG_INT32(1); /* start position */ > ! int S1; /* adjusted start position */ > ! int L1; /* adjusted substring length */ > > ! S1 = Max(S, 1); > ! > ! if (fcinfo->nargs == 2) > ! { > ! /* > ! * Not passed a length - PG_GETARG_BYTEA_P_SLICE() > ! * grabs everything to the end of the string if we pass it > ! * a negative value for length. > ! */ > ! L1 = -1; > ! } > ! else > { > ! /* end position */ > ! int E = S + PG_GETARG_INT32(2); > ! > ! /* > ! * A negative value for L is the only way for the end position > ! * to be before the start. SQL99 says to throw an error. > ! */ > ! if (E < S) > ! elog(ERROR, "negative substring length not allowed"); > ! > ! /* > ! * A zero or negative value for the end position can happen if the start > ! * was negative or one. SQL99 says to return a zero-length string. > ! */ > ! if (E < 1) > ! PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA("")); > ! > ! L1 = E - S1; > } > > ! /* > ! * If the start position is past the end of the string, > ! * SQL99 says to return a zero-length string -- > ! * PG_GETARG_TEXT_P_SLICE() will do that for us. > ! * Convert to zero-based starting position > ! */ > ! PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, S1 - 1, L1)); > ! } > > ! /* > ! * bytea_substr_no_len - > ! * Wrapper to avoid opr_sanity failure due to > ! * one function accepting a different number of args. > ! */ > ! Datum > ! bytea_substr_no_len(PG_FUNCTION_ARGS) > ! { > ! return bytea_substr(fcinfo); > } > > /* > *************** > *** 1422,1424 **** > --- 1653,1834 ---- > > PG_RETURN_INT32(cmp); > } > + > + /* > + * replace_text > + * replace all occurences of 'old_sub_str' in 'orig_str' > + * with 'new_sub_str' to form 'new_str' > + * > + * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == '' > + * otherwise returns 'new_str' > + */ > + Datum > + replace_text(PG_FUNCTION_ARGS) > + { > + text *left_text; > + text *right_text; > + text *buf_text; > + text *ret_text; > + int curr_posn; > + text *src_text = PG_GETARG_TEXT_P(0); > + int src_text_len = TEXTLEN(src_text); > + text *from_sub_text = PG_GETARG_TEXT_P(1); > + int from_sub_text_len = TEXTLEN(from_sub_text); > + text *to_sub_text = PG_GETARG_TEXT_P(2); > + char *to_sub_str = PG_TEXT_GET_STR(to_sub_text); > + StringInfo str = makeStringInfo(); > + > + if (src_text_len == 0 || from_sub_text_len == 0) > + PG_RETURN_TEXT_P(src_text); > + > + buf_text = TEXTDUP(src_text); > + curr_posn = TEXTPOS(buf_text, from_sub_text); > + > + while (curr_posn > 0) > + { > + left_text = LEFT(buf_text, from_sub_text); > + right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len); > + > + appendStringInfo(str, PG_TEXT_GET_STR(left_text)); > + appendStringInfo(str, to_sub_str); > + > + pfree(buf_text); > + pfree(left_text); > + buf_text = right_text; > + curr_posn = TEXTPOS(buf_text, from_sub_text); > + } > + > + appendStringInfo(str, PG_TEXT_GET_STR(buf_text)); > + pfree(buf_text); > + > + ret_text = PG_STR_GET_TEXT(str->data); > + pfree(str->data); > + pfree(str); > + > + PG_RETURN_TEXT_P(ret_text); > + } > + > + /* > + * split_text > + * parse input string > + * return ord item (1 based) > + * based on provided field separator > + */ > + Datum > + split_text(PG_FUNCTION_ARGS) > + { > + text *inputstring = PG_GETARG_TEXT_P(0); > + int inputstring_len = TEXTLEN(inputstring); > + text *fldsep = PG_GETARG_TEXT_P(1); > + int fldsep_len = TEXTLEN(fldsep); > + int fldnum = PG_GETARG_INT32(2); > + int start_posn = 0; > + int end_posn = 0; > + text *result_text; > + > + /* return empty string for empty input string */ > + if (inputstring_len < 1) > + PG_RETURN_TEXT_P(PG_STR_GET_TEXT("")); > + > + /* empty field separator */ > + if (fldsep_len < 1) > + { > + if (fldnum == 1) /* first field - just return the input string */ > + PG_RETURN_TEXT_P(inputstring); > + else /* otherwise return an empty string */ > + PG_RETURN_TEXT_P(PG_STR_GET_TEXT("")); > + } > + > + /* field number is 1 based */ > + if (fldnum < 1) > + elog(ERROR, "field position must be > 0"); > + > + start_posn = text_position(PointerGetDatum(inputstring), > + PointerGetDatum(fldsep), > + fldnum - 1); > + end_posn = text_position(PointerGetDatum(inputstring), > + PointerGetDatum(fldsep), > + fldnum); > + > + if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */ > + { > + if (fldnum == 1) /* first field - just return the input string */ > + PG_RETURN_TEXT_P(inputstring); > + else /* otherwise return an empty string */ > + PG_RETURN_TEXT_P(PG_STR_GET_TEXT("")); > + } > + else if ((start_posn != 0) && (end_posn == 0)) > + { > + /* last field requested */ > + result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true); > + PG_RETURN_TEXT_P(result_text); > + } > + else if ((start_posn == 0) && (end_posn != 0)) > + { > + /* first field requested */ > + result_text = LEFT(inputstring, fldsep); > + PG_RETURN_TEXT_P(result_text); > + } > + else > + { > + /* prior to last field requested */ > + result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len,false); > + PG_RETURN_TEXT_P(result_text); > + } > + } > + > + #define HEXBASE 16 > + /* > + * Convert a int32 to a string containing a base 16 (hex) representation of > + * the number. > + */ > + Datum > + to_hex32(PG_FUNCTION_ARGS) > + { > + static char digits[] = "0123456789abcdef"; > + char buf[32]; /* bigger than needed, but reasonable */ > + char *ptr, > + *end; > + text *result_text; > + int32 value = PG_GETARG_INT32(0); > + > + end = ptr = buf + sizeof(buf) - 1; > + *ptr = '\0'; > + > + do > + { > + *--ptr = digits[value % HEXBASE]; > + value /= HEXBASE; > + } while (ptr > buf && value); > + > + result_text = PG_STR_GET_TEXT(ptr); > + PG_RETURN_TEXT_P(result_text); > + } > + > + /* > + * Convert a int64 to a string containing a base 16 (hex) representation of > + * the number. > + */ > + Datum > + to_hex64(PG_FUNCTION_ARGS) > + { > + static char digits[] = "0123456789abcdef"; > + char buf[32]; /* bigger than needed, but reasonable */ > + char *ptr, > + *end; > + text *result_text; > + int64 value = PG_GETARG_INT64(0); > + > + end = ptr = buf + sizeof(buf) - 1; > + *ptr = '\0'; > + > + do > + { > + *--ptr = digits[value % HEXBASE]; > + value /= HEXBASE; > + } while (ptr > buf && value); > + > + result_text = PG_STR_GET_TEXT(ptr); > + PG_RETURN_TEXT_P(result_text); > + } > + > Index: src/include/catalog/pg_proc.h > =================================================================== > RCS file: /opt/src/cvs/pgsql-server/src/include/catalog/pg_proc.h,v > retrieving revision 1.254 > diff -c -r1.254 pg_proc.h > *** src/include/catalog/pg_proc.h 15 Aug 2002 02:51:27 -0000 1.254 > --- src/include/catalog/pg_proc.h 16 Aug 2002 18:53:13 -0000 > *************** > *** 2121,2127 **** > DESCR("remove initial characters from string"); > DATA(insert OID = 882 ( rtrim PGNSP PGUID 14 f f t f i 1 25 "25" "select rtrim($1, \' \')" - _null_ )); > DESCR("remove trailing characters from string"); > ! DATA(insert OID = 883 ( substr PGNSP PGUID 14 f f t f i 2 25 "25 23" "select substr($1, $2, -1)" - _null_)); > DESCR("return portion of string"); > DATA(insert OID = 884 ( btrim PGNSP PGUID 12 f f t f i 2 25 "25 25" btrim - _null_ )); > DESCR("trim both ends of string"); > --- 2121,2127 ---- > DESCR("remove initial characters from string"); > DATA(insert OID = 882 ( rtrim PGNSP PGUID 14 f f t f i 1 25 "25" "select rtrim($1, \' \')" - _null_ )); > DESCR("remove trailing characters from string"); > ! DATA(insert OID = 883 ( substr PGNSP PGUID 12 f f t f i 2 25 "25 23" text_substr_no_len - _null_ )); > DESCR("return portion of string"); > DATA(insert OID = 884 ( btrim PGNSP PGUID 12 f f t f i 2 25 "25 25" btrim - _null_ )); > DESCR("trim both ends of string"); > *************** > *** 2130,2137 **** > > DATA(insert OID = 936 ( substring PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ )); > DESCR("return portion of string"); > ! DATA(insert OID = 937 ( substring PGNSP PGUID 14 f f t f i 2 25 "25 23" "select substring($1, $2, -1)" - _null_)); > DESCR("return portion of string"); > > /* for multi-byte support */ > > --- 2130,2145 ---- > > DATA(insert OID = 936 ( substring PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ )); > DESCR("return portion of string"); > ! DATA(insert OID = 937 ( substring PGNSP PGUID 12 f f t f i 2 25 "25 23" text_substr_no_len - _null_ )); > DESCR("return portion of string"); > + DATA(insert OID = 2087 ( replace PGNSP PGUID 12 f f t f i 3 25 "25 25 25" replace_text - _null_ )); > + DESCR("replace all occurrences of old_substr with new_substr in string"); > + DATA(insert OID = 2088 ( split PGNSP PGUID 12 f f t f i 3 25 "25 25 23" split_text - _null_ )); > + DESCR("split string by field_sep and return field_num"); > + DATA(insert OID = 2089 ( to_hex PGNSP PGUID 12 f f t f i 1 25 "23" to_hex32 - _null_ )); > + DESCR("convert int32 number to hex"); > + DATA(insert OID = 2090 ( to_hex PGNSP PGUID 12 f f t f i 1 25 "20" to_hex64 - _null_ )); > + DESCR("convert int64 number to hex"); > > /* for multi-byte support */ > > *************** > *** 2778,2784 **** > DESCR("concatenate"); > DATA(insert OID = 2012 ( substring PGNSP PGUID 12 f f t f i 3 17 "17 23 23" bytea_substr - _null_ )); > DESCR("return portion of string"); > ! DATA(insert OID = 2013 ( substring PGNSP PGUID 14 f f t f i 2 17 "17 23" "select substring($1, $2, -1)"- _null_ )); > DESCR("return portion of string"); > DATA(insert OID = 2014 ( position PGNSP PGUID 12 f f t f i 2 23 "17 17" byteapos - _null_ )); > DESCR("return position of substring"); > --- 2786,2796 ---- > DESCR("concatenate"); > DATA(insert OID = 2012 ( substring PGNSP PGUID 12 f f t f i 3 17 "17 23 23" bytea_substr - _null_ )); > DESCR("return portion of string"); > ! DATA(insert OID = 2013 ( substring PGNSP PGUID 12 f f t f i 2 17 "17 23" bytea_substr_no_len - _null_ )); > ! DESCR("return portion of string"); > ! DATA(insert OID = 2085 ( substr PGNSP PGUID 12 f f t f i 3 17 "17 23 23" bytea_substr - _null_ )); > ! DESCR("return portion of string"); > ! DATA(insert OID = 2086 ( substr PGNSP PGUID 12 f f t f i 2 17 "17 23" bytea_substr_no_len - _null_ )); > DESCR("return portion of string"); > DATA(insert OID = 2014 ( position PGNSP PGUID 12 f f t f i 2 23 "17 17" byteapos - _null_ )); > DESCR("return position of substring"); > Index: src/include/utils/builtins.h > =================================================================== > RCS file: /opt/src/cvs/pgsql-server/src/include/utils/builtins.h,v > retrieving revision 1.191 > diff -c -r1.191 builtins.h > *** src/include/utils/builtins.h 15 Aug 2002 02:51:27 -0000 1.191 > --- src/include/utils/builtins.h 16 Aug 2002 18:53:13 -0000 > *************** > *** 447,458 **** > --- 447,463 ---- > extern Datum textoctetlen(PG_FUNCTION_ARGS); > extern Datum textpos(PG_FUNCTION_ARGS); > extern Datum text_substr(PG_FUNCTION_ARGS); > + extern Datum text_substr_no_len(PG_FUNCTION_ARGS); > extern Datum name_text(PG_FUNCTION_ARGS); > extern Datum text_name(PG_FUNCTION_ARGS); > extern int varstr_cmp(char *arg1, int len1, char *arg2, int len2); > extern List *textToQualifiedNameList(text *textval, const char *caller); > extern bool SplitIdentifierString(char *rawstring, char separator, > List **namelist); > + extern Datum replace_text(PG_FUNCTION_ARGS); > + extern Datum split_text(PG_FUNCTION_ARGS); > + extern Datum to_hex32(PG_FUNCTION_ARGS); > + extern Datum to_hex64(PG_FUNCTION_ARGS); > > extern Datum unknownin(PG_FUNCTION_ARGS); > extern Datum unknownout(PG_FUNCTION_ARGS); > *************** > *** 476,481 **** > --- 481,487 ---- > extern Datum byteacat(PG_FUNCTION_ARGS); > extern Datum byteapos(PG_FUNCTION_ARGS); > extern Datum bytea_substr(PG_FUNCTION_ARGS); > + extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); > > /* version.c */ > extern Datum pgsql_version(PG_FUNCTION_ARGS); > Index: src/test/regress/expected/strings.out > =================================================================== > RCS file: /opt/src/cvs/pgsql-server/src/test/regress/expected/strings.out,v > retrieving revision 1.12 > diff -c -r1.12 strings.out > *** src/test/regress/expected/strings.out 11 Jun 2002 15:41:38 -0000 1.12 > --- src/test/regress/expected/strings.out 16 Aug 2002 18:53:13 -0000 > *************** > *** 573,575 **** > --- 573,738 ---- > text and varchar > (1 row) > > + -- > + -- test substr with toasted text values > + -- > + CREATE TABLE toasttest(f1 text); > + insert into toasttest values(repeat('1234567890',10000)); > + insert into toasttest values(repeat('1234567890',10000)); > + -- If the starting position is zero or less, then return from the start of the string > + -- adjusting the length to be consistent with the "negative start" per SQL92. > + SELECT substr(f1, -1, 5) from toasttest; > + substr > + -------- > + 123 > + 123 > + (2 rows) > + > + -- If the length is less than zero, an ERROR is thrown. > + SELECT substr(f1, 5, -1) from toasttest; > + ERROR: negative substring length not allowed > + -- If no third argument (length) is provided, the length to the end of the > + -- string is assumed. > + SELECT substr(f1, 99995) from toasttest; > + substr > + -------- > + 567890 > + 567890 > + (2 rows) > + > + -- If start plus length is > string length, the result is truncated to > + -- string length > + SELECT substr(f1, 99995, 10) from toasttest; > + substr > + -------- > + 567890 > + 567890 > + (2 rows) > + > + DROP TABLE toasttest; > + -- > + -- test substr with toasted bytea values > + -- > + CREATE TABLE toasttest(f1 bytea); > + insert into toasttest values(decode(repeat('1234567890',10000),'escape')); > + insert into toasttest values(decode(repeat('1234567890',10000),'escape')); > + -- If the starting position is zero or less, then return from the start of the string > + -- adjusting the length to be consistent with the "negative start" per SQL92. > + SELECT substr(f1, -1, 5) from toasttest; > + substr > + -------- > + 123 > + 123 > + (2 rows) > + > + -- If the length is less than zero, an ERROR is thrown. > + SELECT substr(f1, 5, -1) from toasttest; > + ERROR: negative substring length not allowed > + -- If no third argument (length) is provided, the length to the end of the > + -- string is assumed. > + SELECT substr(f1, 99995) from toasttest; > + substr > + -------- > + 567890 > + 567890 > + (2 rows) > + > + -- If start plus length is > string length, the result is truncated to > + -- string length > + SELECT substr(f1, 99995, 10) from toasttest; > + substr > + -------- > + 567890 > + 567890 > + (2 rows) > + > + DROP TABLE toasttest; > + -- > + -- test length > + -- > + SELECT length('abcdef') AS "length_6"; > + length_6 > + ---------- > + 6 > + (1 row) > + > + -- > + -- test strpos > + -- > + SELECT strpos('abcdef', 'cd') AS "pos_3"; > + pos_3 > + ------- > + 3 > + (1 row) > + > + SELECT strpos('abcdef', 'xy') AS "pos_0"; > + pos_0 > + ------- > + 0 > + (1 row) > + > + -- > + -- test replace > + -- > + SELECT replace('abcdef', 'de', '45') AS "abc45f"; > + abc45f > + -------- > + abc45f > + (1 row) > + > + SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo"; > + ya123da123doo > + --------------- > + ya123da123doo > + (1 row) > + > + SELECT replace('yabadoo', 'bad', '') AS "yaoo"; > + yaoo > + ------ > + yaoo > + (1 row) > + > + -- > + -- test split > + -- > + select split('joeuser@mydatabase','@',0) AS "an error"; > + ERROR: field position must be > 0 > + select split('joeuser@mydatabase','@',1) AS "joeuser"; > + joeuser > + --------- > + joeuser > + (1 row) > + > + select split('joeuser@mydatabase','@',2) AS "mydatabase"; > + mydatabase > + ------------ > + mydatabase > + (1 row) > + > + select split('joeuser@mydatabase','@',3) AS "empty string"; > + empty string > + -------------- > + > + (1 row) > + > + select split('@joeuser@mydatabase@','@',2) AS "joeuser"; > + joeuser > + --------- > + joeuser > + (1 row) > + > + -- > + -- test to_hex > + -- > + select to_hex(256*256*256 - 1) AS "ffffff"; > + ffffff > + -------- > + ffffff > + (1 row) > + > + select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff"; > + ffffffff > + ---------- > + ffffffff > + (1 row) > + > Index: src/test/regress/sql/strings.sql > =================================================================== > RCS file: /opt/src/cvs/pgsql-server/src/test/regress/sql/strings.sql,v > retrieving revision 1.8 > diff -c -r1.8 strings.sql > *** src/test/regress/sql/strings.sql 11 Jun 2002 15:41:38 -0000 1.8 > --- src/test/regress/sql/strings.sql 16 Aug 2002 18:53:13 -0000 > *************** > *** 197,199 **** > --- 197,292 ---- > SELECT text 'text' || char(20) ' and characters' AS "Concat text to char"; > > SELECT text 'text' || varchar ' and varchar' AS "Concat text to varchar"; > + > + -- > + -- test substr with toasted text values > + -- > + CREATE TABLE toasttest(f1 text); > + > + insert into toasttest values(repeat('1234567890',10000)); > + insert into toasttest values(repeat('1234567890',10000)); > + > + -- If the starting position is zero or less, then return from the start of the string > + -- adjusting the length to be consistent with the "negative start" per SQL92. > + SELECT substr(f1, -1, 5) from toasttest; > + > + -- If the length is less than zero, an ERROR is thrown. > + SELECT substr(f1, 5, -1) from toasttest; > + > + -- If no third argument (length) is provided, the length to the end of the > + -- string is assumed. > + SELECT substr(f1, 99995) from toasttest; > + > + -- If start plus length is > string length, the result is truncated to > + -- string length > + SELECT substr(f1, 99995, 10) from toasttest; > + > + DROP TABLE toasttest; > + > + -- > + -- test substr with toasted bytea values > + -- > + CREATE TABLE toasttest(f1 bytea); > + > + insert into toasttest values(decode(repeat('1234567890',10000),'escape')); > + insert into toasttest values(decode(repeat('1234567890',10000),'escape')); > + > + -- If the starting position is zero or less, then return from the start of the string > + -- adjusting the length to be consistent with the "negative start" per SQL92. > + SELECT substr(f1, -1, 5) from toasttest; > + > + -- If the length is less than zero, an ERROR is thrown. > + SELECT substr(f1, 5, -1) from toasttest; > + > + -- If no third argument (length) is provided, the length to the end of the > + -- string is assumed. > + SELECT substr(f1, 99995) from toasttest; > + > + -- If start plus length is > string length, the result is truncated to > + -- string length > + SELECT substr(f1, 99995, 10) from toasttest; > + > + DROP TABLE toasttest; > + > + -- > + -- test length > + -- > + > + SELECT length('abcdef') AS "length_6"; > + > + -- > + -- test strpos > + -- > + > + SELECT strpos('abcdef', 'cd') AS "pos_3"; > + > + SELECT strpos('abcdef', 'xy') AS "pos_0"; > + > + -- > + -- test replace > + -- > + SELECT replace('abcdef', 'de', '45') AS "abc45f"; > + > + SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo"; > + > + SELECT replace('yabadoo', 'bad', '') AS "yaoo"; > + > + -- > + -- test split > + -- > + select split('joeuser@mydatabase','@',0) AS "an error"; > + > + select split('joeuser@mydatabase','@',1) AS "joeuser"; > + > + select split('joeuser@mydatabase','@',2) AS "mydatabase"; > + > + select split('joeuser@mydatabase','@',3) AS "empty string"; > + > + select split('@joeuser@mydatabase@','@',2) AS "joeuser"; > + > + -- > + -- test to_hex > + -- > + select to_hex(256*256*256 - 1) AS "ffffff"; > + > + select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff"; > > ---------------------------(end of broadcast)--------------------------- > TIP 5: Have you checked our extensive FAQ? > > http://www.postgresql.org/users-lounge/docs/faq.html -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 359-1001 + If your life is a hard drive, | 13 Roberts Road + Christ can be your backup. | Newtown Square, Pennsylvania 19073
pgsql-patches by date: