diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml new file mode 100644 index 9c143b2..f85bfd0 *** a/doc/src/sgml/func.sgml --- b/doc/src/sgml/func.sgml *************** *** 1778,1783 **** --- 1778,1800 ---- + parse_ident + + parse_ident(str text, + [ strictmode boolean DEFAULT true ] ) + + text[] + Split qualified identifier to array parts. + When second parameter is true, then no any chars after last identifier is allowed. When + second parameter is false, then chars after last identifier are ignored. + + parse_ident('"SomeSchema".someTable') + "SomeSchema,sometable" + + + + + pg_client_encoding pg_client_encoding() diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql new file mode 100644 index 923fe58..61d5b80 *** a/src/backend/catalog/system_views.sql --- b/src/backend/catalog/system_views.sql *************** RETURNS jsonb *** 965,967 **** --- 965,974 ---- LANGUAGE INTERNAL STRICT IMMUTABLE AS 'jsonb_set'; + + CREATE OR REPLACE FUNCTION + parse_ident(str text, strictmode boolean DEFAULT true) + RETURNS text[] + LANGUAGE INTERNAL + STRICT IMMUTABLE + AS 'parse_ident'; diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c new file mode 100644 index 6a306f3..a6c3452 *** a/src/backend/utils/adt/misc.c --- b/src/backend/utils/adt/misc.c *************** *** 21,32 **** --- 21,35 ---- #include #include "access/sysattr.h" + #include "access/htup_details.h" #include "catalog/catalog.h" + #include "catalog/namespace.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "commands/dbcommands.h" #include "funcapi.h" #include "miscadmin.h" + #include "parser/scansup.h" #include "parser/keywords.h" #include "postmaster/syslogger.h" #include "rewrite/rewriteHandler.h" *************** *** 38,43 **** --- 41,47 ---- #include "utils/ruleutils.h" #include "tcop/tcopprot.h" #include "utils/acl.h" + #include "utils/array.h" #include "utils/builtins.h" #include "utils/timestamp.h" *************** pg_column_is_updatable(PG_FUNCTION_ARGS) *** 598,600 **** --- 602,752 ---- PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS); } + + + /* + * This simple parser utility are compatible with lexer implementation, + * used only in parse_ident function + */ + static bool + is_ident_start(unsigned char c) + { + if (c == '_') + return true; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + return true; + + if (c >= 0200 && c <= 0377) + return true; + + return false; + } + + static bool + is_ident_cont(unsigned char c) + { + if (c >= '0' && c <= '9') + return true; + + return is_ident_start(c); + } + + /* + * parse_ident - parse SQL composed identifier to separate identifiers. + * When strict mode is active (second parameter), then any chars after + * last identifiers are disallowed. + */ + Datum + parse_ident(PG_FUNCTION_ARGS) + { + text *qualname; + char *qualname_str; + bool strict_mode; + ArrayBuildState *astate = NULL; + char *nextp; + + qualname = PG_GETARG_TEXT_PP(0); + qualname_str = text_to_cstring(qualname); + strict_mode = PG_GETARG_BOOL(1); + + nextp = qualname_str; + + /* skip leading whitespace */ + while (isspace((unsigned char) *nextp)) + nextp++; + + for (;;) + { + char *curname; + char *endp; + bool missing_ident; + + missing_ident = true; + + if (*nextp == '\"') + { + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '\"'); + if (endp == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unclosed double quotes"), + errdetail("string \"%s\" is not valid identifier", + text_to_cstring(qualname)))); + if (endp[1] != '\"') + break; + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + nextp = endp + 1; + *endp = '\0'; + + if (endp - curname == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("identifier should not be empty: \"%s\"", + text_to_cstring(qualname)))); + + astate = accumArrayResult(astate, + CStringGetTextDatum(curname), false, + TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + else + { + if (is_ident_start((unsigned char) *nextp)) + { + char *downname; + int len; + text *part; + + curname = nextp++; + while (is_ident_cont((unsigned char) *nextp)) + nextp++; + + len = nextp - curname; + + downname = downcase_truncate_identifier(curname, len, false); + part = cstring_to_text_with_len(downname, len); + astate = accumArrayResult(astate, + PointerGetDatum(part), false, + TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + } + + if (missing_ident) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing identifier after \".\" symbol: \"%s\"", + text_to_cstring(qualname)))); + + while (isspace((unsigned char) *nextp)) + nextp++; + + if (*nextp == '.') + { + nextp++; + while (isspace((unsigned char) *nextp)) + nextp++; + continue; + } + else if (*nextp == '\0') + { + break; + } + else + { + if (strict_mode) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("identifier contains disallowed characters: \"%s\"", + text_to_cstring(qualname)))); + break; + } + } + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); + } diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h new file mode 100644 index 79e92ff..a67518e *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** DESCR("I/O"); *** 3443,3448 **** --- 3443,3451 ---- DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ )); DESCR("convert namespace name to regnamespace"); + DATA(insert OID = 3318 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strictmode}" _null_ _null_ parse_ident _null_ _null_ _null_ )); + DESCR("parse qualified identifier to array of identifiers"); + DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ )); DESCR("(internal)"); DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ )); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h new file mode 100644 index c2e529f..193b049 *** a/src/include/utils/builtins.h --- b/src/include/utils/builtins.h *************** extern Datum pg_typeof(PG_FUNCTION_ARGS) *** 503,508 **** --- 503,509 ---- extern Datum pg_collation_for(PG_FUNCTION_ARGS); extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS); extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS); + extern Datum parse_ident(PG_FUNCTION_ARGS); /* oid.c */ extern Datum oidin(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/name.out b/src/test/regress/expected/name.out new file mode 100644 index b359d52..8de3f79 *** a/src/test/regress/expected/name.out --- b/src/test/regress/expected/name.out *************** SELECT '' AS two, c.f1 FROM NAME_TBL c W *** 124,126 **** --- 124,151 ---- (2 rows) DROP TABLE NAME_TBL; + DO $$ + DECLARE r text[]; + BEGIN + r := parse_ident('Schemax.Tabley'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); + r := parse_ident('"SchemaX"."TableY"'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); + END; + $$; + NOTICE: schemax.tabley + NOTICE: "SchemaX"."TableY" + SELECT parse_ident('foo.boo'); + parse_ident + ------------- + {foo,boo} + (1 row) + + SELECT parse_ident('foo.boo[]'); -- should fail + ERROR: identifier contains disallowed characters: "foo.boo[]" + SELECT parse_ident('foo.boo[]', strictmode => false); -- ok + parse_ident + ------------- + {foo,boo} + (1 row) + diff --git a/src/test/regress/sql/name.sql b/src/test/regress/sql/name.sql new file mode 100644 index 1c7a671..629e23f *** a/src/test/regress/sql/name.sql --- b/src/test/regress/sql/name.sql *************** SELECT '' AS three, c.f1 FROM NAME_TBL c *** 52,54 **** --- 52,69 ---- SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*'; DROP TABLE NAME_TBL; + + DO $$ + DECLARE r text[]; + BEGIN + r := parse_ident('Schemax.Tabley'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); + r := parse_ident('"SchemaX"."TableY"'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); + END; + $$; + + SELECT parse_ident('foo.boo'); + SELECT parse_ident('foo.boo[]'); -- should fail + SELECT parse_ident('foo.boo[]', strictmode => false); -- ok +