Index: src/backend/commands/copy.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/commands/copy.c,v retrieving revision 1.245 diff -c -r1.245 copy.c *** src/backend/commands/copy.c 2 Jun 2005 01:21:22 -0000 1.245 --- src/backend/commands/copy.c 27 Jun 2005 06:19:56 -0000 *************** *** 51,56 **** --- 51,57 ---- #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7')) #define OCTVALUE(c) ((c) - '0') + #define COPY_BUF_SIZE 65536 /* * Represents the different source/dest cases we need to worry about at *************** *** 84,90 **** EOL_CRNL } EolType; - static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"; /* --- 85,90 ---- *************** *** 92,97 **** --- 92,100 ---- * never been reentrant... */ static CopyDest copy_dest; + static int eol_ch[2]; /* The byte values of the 1 or 2 eol bytes */ + static char escapec; /* escape char for delimited data format. */ + static bool client_encoding_only; /* true if client encoding is a non supported server encoding */ static FILE *copy_file; /* used if copy_dest == COPY_FILE */ static StringInfo copy_msgbuf; /* used if copy_dest == COPY_NEW_FE */ static bool fe_eof; /* true if detected end of copy data */ *************** *** 101,131 **** /* these are just for error messages, see copy_in_error_callback */ static bool copy_binary; /* is it a binary copy? */ ! static const char *copy_relname; /* table name for error messages */ static int copy_lineno; /* line number for error messages */ ! static const char *copy_attname; /* current att for error messages */ /* * These static variables are used to avoid incurring overhead for each ! * attribute processed. attribute_buf is reused on each CopyReadAttribute * call to hold the string being read in. Under normal use it will soon * grow to a suitable size, and then we will avoid palloc/pfree overhead * for subsequent attributes. Note that CopyReadAttribute returns a pointer ! * to attribute_buf's data buffer! */ ! static StringInfoData attribute_buf; ! /* * Similarly, line_buf holds the whole input line being processed (its * cursor field points to the next character to be read by CopyReadAttribute). * The input cycle is first to read the whole line into line_buf, convert it * to server encoding, and then extract individual attribute fields into ! * attribute_buf. (We used to have CopyReadAttribute read the input source * directly, but that caused a lot of encoding issues and unnecessary logic * complexity.) */ ! static StringInfoData line_buf; static bool line_buf_converted; /* non-export function prototypes */ --- 104,144 ---- /* these are just for error messages, see copy_in_error_callback */ static bool copy_binary; /* is it a binary copy? */ ! static const char *copy_relname;/* table name for error messages */ static int copy_lineno; /* line number for error messages */ ! static const char *copy_attname;/* current att for error messages */ ! ! /* Static variables for buffered input parsing */ ! /* leave room for extra null (to enable use of string functions) */ ! static char input_buf[COPY_BUF_SIZE + 1]; ! static char *begloc; ! static char *endloc; ! static bool esc_in_prevbuf; /* backslash was last character of the data input buffer */ ! static bool cr_in_prevbuf; /* CR was last character of the data input buffer */ ! static bool line_done; /* finished processing the whole line or stopped in the middle */ ! static bool buf_done; /* finished processing the current buffer */ ! static int buffer_index; /* input buffer index */ /* * These static variables are used to avoid incurring overhead for each ! * attribute processed. attr_buf is reused on each CopyReadAttribute * call to hold the string being read in. Under normal use it will soon * grow to a suitable size, and then we will avoid palloc/pfree overhead * for subsequent attributes. Note that CopyReadAttribute returns a pointer ! * to attr_buf's data buffer! */ ! static StringInfoData attr_buf; /* still used in CopyFrom() */ /* * Similarly, line_buf holds the whole input line being processed (its * cursor field points to the next character to be read by CopyReadAttribute). * The input cycle is first to read the whole line into line_buf, convert it * to server encoding, and then extract individual attribute fields into ! * attr_buf. (We used to have CopyReadAttribute read the input source * directly, but that caused a lot of encoding issues and unnecessary logic * complexity.) */ ! static StringInfoData line_buf; /* still used in CopyFromBinary/CSV() */ static bool line_buf_converted; /* non-export function prototypes */ *************** *** 151,156 **** --- 164,182 ---- char *escape, bool force_quote); static List *CopyGetAttnums(Relation rel, List *attnamelist); static void limit_printout_length(StringInfo buf); + static bool CopyReadLineBuffered(size_t bytesread); + static void CopyFromDelimited(Relation rel, List *attnumlist, bool binary, bool oids, + char *delim, char *null_print, bool csv_mode, char *quote, char *escape, + List *force_notnull_atts); + static char *CopyReadAttribute(const char *delim, const char *null_print, + CopyReadResult *result, bool *isnull); + static void CopyReadAllAttrs(const char *delim, const char *null_print, int null_print_len, + char *nulls, List *attnumlist , int *attr_offsets, int num_phys_attrs, + Form_pg_attribute *attr); + static char *CopyReadOidAttr(const char *delim, const char *null_print, int null_print_len, + CopyReadResult *result, bool *isnull); + static bool DetectLineEnd(size_t bytesread); + /* Internal communications functions */ static void SendCopyBegin(bool binary, int natts); *************** *** 160,166 **** static void CopySendString(const char *str); static void CopySendChar(char c); static void CopySendEndOfRow(bool binary); ! static void CopyGetData(void *databuf, int datasize); static int CopyGetChar(void); #define CopyGetEof() (fe_eof) --- 186,192 ---- static void CopySendString(const char *str); static void CopySendChar(char c); static void CopySendEndOfRow(bool binary); ! static int CopyGetData(void *databuf, int datasize); static int CopyGetChar(void); #define CopyGetEof() (fe_eof) *************** *** 171,176 **** --- 197,205 ---- static void CopySendInt16(int16 val); static int16 CopyGetInt16(void); + /* new parsing utils */ + static char *strchrlen(const char *s, int c, size_t len); + static char *str2chrlen(const char *s, int c1, int c2, size_t len, char *c_found); /* * Send copy start/stop messages for frontend copies. These have changed *************** *** 383,396 **** * It seems unwise to allow the COPY IN to complete normally in that case. * * NB: no data conversion is applied by these functions */ ! static void CopyGetData(void *databuf, int datasize) { switch (copy_dest) { case COPY_FILE: ! fread(databuf, datasize, 1, copy_file); if (feof(copy_file)) fe_eof = true; break; --- 412,430 ---- * It seems unwise to allow the COPY IN to complete normally in that case. * * NB: no data conversion is applied by these functions + * + * Returns: the number of bytes that were successfully read + * into the data buffer. */ ! static int CopyGetData(void *databuf, int datasize) { + size_t bytesread = 0; + switch (copy_dest) { case COPY_FILE: ! bytesread = fread(databuf, 1, datasize, copy_file); if (feof(copy_file)) fe_eof = true; break; *************** *** 402,407 **** --- 436,442 ---- (errcode(ERRCODE_CONNECTION_FAILURE), errmsg("unexpected EOF on client connection"))); } + bytesread += datasize; /* update the count of bytes that were read so far */ break; case COPY_NEW_FE: while (datasize > 0 && !fe_eof) *************** *** 413,419 **** /* Try to receive another message */ int mtype; ! readmessage: mtype = pq_getbyte(); if (mtype == EOF) ereport(ERROR, --- 448,454 ---- /* Try to receive another message */ int mtype; ! readmessage: mtype = pq_getbyte(); if (mtype == EOF) ereport(ERROR, *************** *** 430,436 **** case 'c': /* CopyDone */ /* COPY IN correctly terminated by frontend */ fe_eof = true; ! return; case 'f': /* CopyFail */ ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), --- 465,471 ---- case 'c': /* CopyDone */ /* COPY IN correctly terminated by frontend */ fe_eof = true; ! return bytesread; case 'f': /* CopyFail */ ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), *************** *** 460,469 **** --- 495,507 ---- avail = datasize; pq_copymsgbytes(copy_msgbuf, databuf, avail); databuf = (void *) ((char *) databuf + avail); + bytesread += avail; /* update the count of bytes that were read so far */ datasize -= avail; } break; } + + return bytesread; } static int *************** *** 709,715 **** AclResult aclresult; /* Extract options from the statement node tree */ ! foreach(option, stmt->options) { DefElem *defel = (DefElem *) lfirst(option); --- 747,753 ---- AclResult aclresult; /* Extract options from the statement node tree */ ! foreach (option, stmt->options) { DefElem *defel = (DefElem *) lfirst(option); *************** *** 828,833 **** --- 866,873 ---- escape = quote; } + escapec = '\\'; /* will be configurable in the future */ + /* Only single-character delimiter strings are supported. */ if (strlen(delim) != 1) ereport(ERROR, *************** *** 935,941 **** force_quote_atts = CopyGetAttnums(rel, force_quote); ! foreach(cur, force_quote_atts) { int attnum = lfirst_int(cur); --- 975,981 ---- force_quote_atts = CopyGetAttnums(rel, force_quote); ! foreach (cur, force_quote_atts) { int attnum = lfirst_int(cur); *************** *** 956,962 **** force_notnull_atts = CopyGetAttnums(rel, force_notnull); ! foreach(cur, force_notnull_atts) { int attnum = lfirst_int(cur); --- 996,1002 ---- force_notnull_atts = CopyGetAttnums(rel, force_notnull); ! foreach (cur, force_notnull_atts) { int attnum = lfirst_int(cur); *************** *** 969,981 **** } /* Set up variables to avoid per-attribute overhead. */ ! initStringInfo(&attribute_buf); initStringInfo(&line_buf); line_buf_converted = false; client_encoding = pg_get_client_encoding(); server_encoding = GetDatabaseEncoding(); copy_dest = COPY_FILE; /* default */ copy_file = NULL; copy_msgbuf = NULL; --- 1009,1039 ---- } /* Set up variables to avoid per-attribute overhead. */ ! initStringInfo(&attr_buf); initStringInfo(&line_buf); line_buf_converted = false; client_encoding = pg_get_client_encoding(); server_encoding = GetDatabaseEncoding(); + /* + * check if the client encoding is one of the 5 encodings + * that are not supported as a server encodings. + */ + switch(client_encoding) + { + case PG_SJIS: + case PG_BIG5: + case PG_GBK: + case PG_UHC: + case PG_GB18030: + client_encoding_only = true; + break; + default: + client_encoding_only = false; + } + + copy_dest = COPY_FILE; /* default */ copy_file = NULL; copy_msgbuf = NULL; *************** *** 1029,1036 **** --- 1087,1101 ---- errmsg("\"%s\" is a directory", filename))); } } + + if (csv_mode || binary) /* old path */ CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode, quote, escape, force_notnull_atts, header_line); + /* new path for improved performance (only for delimited format for now) */ + else + CopyFromDelimited(rel, attnumlist, binary, oids, delim, null_print, csv_mode, + quote, escape, force_notnull_atts); + } else { /* copy from database to file */ *************** *** 1106,1114 **** errmsg("could not write to file \"%s\": %m", filename))); } ! pfree(attribute_buf.data); pfree(line_buf.data); /* * Close the relation. If reading, we can release the AccessShareLock * we got; if writing, we should hold the lock until end of --- 1171,1180 ---- errmsg("could not write to file \"%s\": %m", filename))); } ! pfree(attr_buf.data); pfree(line_buf.data); + /* * Close the relation. If reading, we can release the AccessShareLock * we got; if writing, we should hold the lock until end of *************** *** 1181,1187 **** /* Get info about the columns we need to process. */ out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo)); force_quote = (bool *) palloc(num_phys_attrs * sizeof(bool)); ! foreach(cur, attnumlist) { int attnum = lfirst_int(cur); Oid out_func_oid; --- 1247,1253 ---- /* Get info about the columns we need to process. */ out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo)); force_quote = (bool *) palloc(num_phys_attrs * sizeof(bool)); ! foreach (cur, attnumlist) { int attnum = lfirst_int(cur); Oid out_func_oid; *************** *** 1248,1254 **** bool hdr_delim = false; char *colname; ! foreach(cur, attnumlist) { int attnum = lfirst_int(cur); --- 1314,1320 ---- bool hdr_delim = false; char *colname; ! foreach (cur, attnumlist) { int attnum = lfirst_int(cur); *************** *** 1304,1310 **** } } ! foreach(cur, attnumlist) { int attnum = lfirst_int(cur); Datum value; --- 1370,1376 ---- } } ! foreach (cur, attnumlist) { int attnum = lfirst_int(cur); Datum value; *************** *** 1346,1353 **** { bytea *outputbytes; ! outputbytes = DatumGetByteaP(FunctionCall1(&out_functions[attnum - 1], ! value)); /* We assume the result will not have been toasted */ CopySendInt32(VARSIZE(outputbytes) - VARHDRSZ); CopySendData(VARDATA(outputbytes), --- 1412,1420 ---- { bytea *outputbytes; ! outputbytes = ! DatumGetByteaP(FunctionCall1(&out_functions[attnum - 1], ! value)); /* We assume the result will not have been toasted */ CopySendInt32(VARSIZE(outputbytes) - VARHDRSZ); CopySendData(VARDATA(outputbytes), *************** *** 1396,1405 **** if (copy_attname) { /* error is relevant to a particular column */ ! limit_printout_length(&attribute_buf); errcontext("COPY %s, line %d, column %s: \"%s\"", copy_relname, copy_lineno, copy_attname, ! attribute_buf.data); } else { --- 1463,1472 ---- if (copy_attname) { /* error is relevant to a particular column */ ! limit_printout_length(&attr_buf); errcontext("COPY %s, line %d, column %s: \"%s\"", copy_relname, copy_lineno, copy_attname, ! attr_buf.data); } else { *************** *** 1407,1412 **** --- 1474,1481 ---- if (line_buf_converted || client_encoding == server_encoding) { + /* Strip off the newline */ + *(line_buf.data + line_buf.len - 1) ='\0'; limit_printout_length(&line_buf); errcontext("COPY %s, line %d: \"%s\"", copy_relname, copy_lineno, *************** *** 1460,1465 **** --- 1529,1535 ---- appendStringInfoString(buf, "..."); } + /* * Copy FROM file to relation. */ *************** *** 1761,1767 **** } /* Loop to read the user attributes on the line. */ ! foreach(cur, attnumlist) { int attnum = lfirst_int(cur); int m = attnum - 1; --- 1831,1837 ---- } /* Loop to read the user attributes on the line. */ ! foreach (cur, attnumlist) { int attnum = lfirst_int(cur); int m = attnum - 1; *************** *** 1855,1861 **** } i = 0; ! foreach(cur, attnumlist) { int attnum = lfirst_int(cur); int m = attnum - 1; --- 1925,1931 ---- } i = 0; ! foreach (cur, attnumlist) { int attnum = lfirst_int(cur); int m = attnum - 1; *************** *** 1987,1992 **** --- 2057,2506 ---- FreeExecutorState(estate); } + /* + * Copy FROM file to relation with faster processing. + */ + static void + CopyFromDelimited(Relation rel, List *attnumlist, bool binary, bool oids, + char *delim, char *null_print, bool csv_mode, char *quote, + char *escape, List *force_notnull_atts) + { + HeapTuple tuple; + TupleDesc tupDesc; + Form_pg_attribute *attr; + AttrNumber num_phys_attrs, + attr_count, + num_defaults; + FmgrInfo *in_functions; + Oid *typioparams; + ExprState **constraintexprs; + bool *force_notnull; + bool hasConstraints = false; + int attnum; + int i; + Oid in_func_oid; + Datum *values; + char *nulls; + /* an array of offsets into attr_buf that point to beginning of attributes */ + int *attr_offsets; + bool isnull; + int null_print_len; /* length of null print */ + ResultRelInfo *resultRelInfo; + EState *estate = CreateExecutorState(); /* for ExecConstraints() */ + TupleTableSlot *slot; + bool file_has_oids; + int *defmap; + ExprState **defexprs; /* array of default att expressions */ + ExprContext *econtext; /* used for ExecEvalExpr for default atts */ + MemoryContext oldcontext = CurrentMemoryContext; + ErrorContextCallback errcontext; + bool no_more_data; + CopyReadResult result; + ListCell *cur; + + + tupDesc = RelationGetDescr(rel); + attr = tupDesc->attrs; + num_phys_attrs = tupDesc->natts; + attr_count = list_length(attnumlist); + num_defaults = 0; + + /* + * We need a ResultRelInfo so we can use the regular executor's + * index-entry-making machinery. (There used to be a huge amount of + * code here that basically duplicated execUtils.c ...) + */ + resultRelInfo = makeNode(ResultRelInfo); + resultRelInfo->ri_RangeTableIndex = 1; /* dummy */ + resultRelInfo->ri_RelationDesc = rel; + resultRelInfo->ri_TrigDesc = CopyTriggerDesc(rel->trigdesc); + if (resultRelInfo->ri_TrigDesc) + resultRelInfo->ri_TrigFunctions = (FmgrInfo *) + palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(FmgrInfo)); + resultRelInfo->ri_TrigInstrument = NULL; + + ExecOpenIndices(resultRelInfo); + + estate->es_result_relations = resultRelInfo; + estate->es_num_result_relations = 1; + estate->es_result_relation_info = resultRelInfo; + + /* Set up a tuple slot too */ + slot = MakeSingleTupleTableSlot(tupDesc); + + econtext = GetPerTupleExprContext(estate); + + /* + * Pick up the required catalog information for each attribute in the + * relation, including the input function, the element type (to pass + * to the input function), and info about defaults and constraints. + * (Which input function we use depends on text/binary format choice.) + */ + in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo)); + typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid)); + defmap = (int *) palloc(num_phys_attrs * sizeof(int)); + defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *)); + constraintexprs = (ExprState **) palloc0(num_phys_attrs * sizeof(ExprState *)); + force_notnull = (bool *) palloc(num_phys_attrs * sizeof(bool)); + + for (attnum = 1; attnum <= num_phys_attrs; attnum++) + { + /* We don't need info for dropped attributes */ + if (attr[attnum - 1]->attisdropped) + continue; + + getTypeInputInfo(attr[attnum - 1]->atttypid, + &in_func_oid, &typioparams[attnum - 1]); + + fmgr_info(in_func_oid, &in_functions[attnum - 1]); + + if (list_member_int(force_notnull_atts, attnum)) + force_notnull[attnum - 1] = true; + else + force_notnull[attnum - 1] = false; + + /* Get default info if needed */ + if (!list_member_int(attnumlist, attnum)) + { + /* attribute is NOT to be copied from input */ + /* use default value if one exists */ + Node *defexpr = build_column_default(rel, attnum); + + if (defexpr != NULL) + { + defexprs[num_defaults] = ExecPrepareExpr((Expr *) defexpr, + estate); + defmap[num_defaults] = attnum - 1; + num_defaults++; + } + } + + /* If it's a domain type, set up to check domain constraints */ + if (get_typtype(attr[attnum - 1]->atttypid) == 'd') + { + Param *prm; + Node *node; + + /* + * Easiest way to do this is to use parse_coerce.c to set up + * an expression that checks the constraints. (At present, + * the expression might contain a length-coercion-function + * call and/or CoerceToDomain nodes.) The bottom of the + * expression is a Param node so that we can fill in the + * actual datum during the data input loop. + */ + prm = makeNode(Param); + prm->paramkind = PARAM_EXEC; + prm->paramid = 0; + prm->paramtype = getBaseType(attr[attnum - 1]->atttypid); + + node = coerce_to_domain((Node *) prm, + prm->paramtype, + attr[attnum - 1]->atttypid, + COERCE_IMPLICIT_CAST, false, false); + + constraintexprs[attnum - 1] = ExecPrepareExpr((Expr *) node, estate); + hasConstraints = true; + } + } + + /* + * Prepare to catch AFTER triggers. + */ + AfterTriggerBeginQuery(); + + /* + * Check BEFORE STATEMENT insertion triggers. It's debateable whether + * we should do this for COPY, since it's not really an "INSERT" + * statement as such. However, executing these triggers maintains + * consistency with the EACH ROW triggers that we already fire on + * COPY. + */ + ExecBSInsertTriggers(estate, resultRelInfo); + + file_has_oids = oids; /* must rely on user to tell us this... */ + + values = (Datum *) palloc(num_phys_attrs * sizeof(Datum)); + nulls = (char *) palloc(num_phys_attrs * sizeof(char)); + attr_offsets = (int *) palloc(num_phys_attrs * sizeof(int)); + + /* Make room for a PARAM_EXEC value for domain constraint checks */ + if (hasConstraints) + econtext->ecxt_param_exec_vals = (ParamExecData *) + palloc0(sizeof(ParamExecData)); + + /* Initialize static variables */ + fe_eof = false; + eol_type = EOL_UNKNOWN; + copy_binary = binary; + copy_relname = RelationGetRelationName(rel); + copy_lineno = 0; + copy_attname = NULL; + line_buf_converted = false; + + /* it is MUCH faster to do this once here than in the attribute parse loop */ + null_print_len = strlen(null_print); + + /* Set up callback to identify error line number */ + errcontext.callback = copy_in_error_callback; + errcontext.arg = NULL; + errcontext.previous = error_context_stack; + error_context_stack = &errcontext; + + /* Set up data buffer to hold a chunk of data*/ + MemSet(input_buf, ' ', COPY_BUF_SIZE * sizeof(char)); + input_buf[COPY_BUF_SIZE] = '\0'; + + no_more_data = false; /* no more input data to read from file or FE */ + line_done = true; + + do + { + /* read a chunk of data into the buffer */ + size_t bytesread = CopyGetData(input_buf, COPY_BUF_SIZE); + buf_done = false; + + /* set buffer pointers to beginning of the buffer */ + begloc = input_buf; + buffer_index = 0; + + /* continue if some bytes were read or if we didn't reach EOF. if we both * + * reached EOF _and_ no bytes were read, quit the loop we are done */ + if (bytesread > 0 || !fe_eof) + { + + while (!buf_done) + { + bool skip_tuple; + Oid loaded_oid = InvalidOid; + + CHECK_FOR_INTERRUPTS(); + + /* Reset the per-tuple exprcontext */ + ResetPerTupleExprContext(estate); + + /* Switch into its memory context */ + MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + + /* Initialize all values for row to NULL */ + MemSet(values, 0, num_phys_attrs * sizeof(Datum)); + MemSet(nulls, 'n', num_phys_attrs * sizeof(char)); + /* reset attribute pointers */ + MemSet(attr_offsets, 0, num_phys_attrs * sizeof(int)); + + result = NORMAL_ATTR; + + /* Actually read the line into memory here */ + line_done = CopyReadLineBuffered(bytesread); + copy_lineno++; + + /* if didn't finish processing data line, skip att parsing and read more data, + * unless there is no more data to read... (which means that the original last + * data line is missing attrs and we want to catch that error) + */ + if (!line_done) + { + copy_lineno--; + if (!fe_eof || buf_done) + break; + } + + if (file_has_oids) + { + char *oid_string; + /* can't be in CSV mode here */ + oid_string = CopyReadOidAttr(delim, null_print, null_print_len, + &result, &isnull); + + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("null OID in COPY data"))); + else + { + copy_attname = "oid"; + loaded_oid = DatumGetObjectId(DirectFunctionCall1(oidin, + CStringGetDatum(oid_string))); + if (loaded_oid == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("invalid OID in COPY data"))); + copy_attname = NULL; + } + } + + /* parse all the attribute in the data line */ + CopyReadAllAttrs(delim, null_print, null_print_len, + nulls, attnumlist, attr_offsets, num_phys_attrs, attr); + + /* + * Loop to read the user attributes on the line. + */ + foreach (cur, attnumlist) + { + int attnum = lfirst_int(cur); + int m = attnum - 1; + + if (nulls[m] == ' ') + isnull = false; + else + isnull = true; + + /* we read an SQL NULL, no need to do anything */ + if (!isnull) + { + copy_attname = NameStr(attr[m]->attname); + values[m] = FunctionCall3(&in_functions[m], + CStringGetDatum(attr_buf.data + attr_offsets[m]), + ObjectIdGetDatum(typioparams[m]), + Int32GetDatum(attr[m]->atttypmod)); + copy_attname = NULL; + } + } + + /* + * Now compute and insert any defaults available for the columns + * not provided by the input data. Anything not processed here or + * above will remain NULL. + */ + for (i = 0; i < num_defaults; i++) + { + values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext, &isnull, NULL); + if (!isnull) + nulls[defmap[i]] = ' '; + } + + /* + * Next apply any domain constraints + */ + if (hasConstraints) + { + ParamExecData *prmdata = &econtext->ecxt_param_exec_vals[0]; + + for (i = 0; i < num_phys_attrs; i++) + { + ExprState *exprstate = constraintexprs[i]; + + if (exprstate == NULL) + continue; /* no constraint for this attr */ + + /* Insert current row's value into the Param value */ + prmdata->value = values[i]; + prmdata->isnull = (nulls[i] == 'n'); + + /* + * Execute the constraint expression. Allow the + * expression to replace the value (consider e.g. a + * timestamp precision restriction). + */ + values[i] = ExecEvalExpr(exprstate, econtext, + &isnull, NULL); + nulls[i] = isnull ? 'n' : ' '; + } + } + + /* + * And now we can form the input tuple. + */ + tuple = heap_formtuple(tupDesc, values, nulls); + + if (oids && file_has_oids) + HeapTupleSetOid(tuple, loaded_oid); + + /* + * Triggers and stuff need to be invoked in query context. + */ + MemoryContextSwitchTo(oldcontext); + + skip_tuple = false; + + /* BEFORE ROW INSERT Triggers */ + if (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0) + { + HeapTuple newtuple; + + newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple); + + if (newtuple == NULL) /* "do nothing" */ + skip_tuple = true; + else if (newtuple != tuple) /* modified by Trigger(s) */ + { + heap_freetuple(tuple); + tuple = newtuple; + } + } + + if (!skip_tuple) + { + /* Place tuple in tuple slot */ + ExecStoreTuple(tuple, slot, InvalidBuffer, false); + + /* + * Check the constraints of the tuple + */ + if (rel->rd_att->constr) + ExecConstraints(resultRelInfo, slot, estate); + + /* + * OK, store the tuple and create index entries for it + */ + simple_heap_insert(rel, tuple); + + if (resultRelInfo->ri_NumIndices > 0) + ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false); + + /* AFTER ROW INSERT Triggers */ + ExecARInsertTriggers(estate, resultRelInfo, tuple); + } + + line_buf.len=0; /* we can reset line buffer now. */ + line_buf.data[0] = '\0'; + line_buf.cursor = 0; + } /* end while(!buf_done) */ + } /* end if (bytesread > 0 || !fe_eof) */ + else /* no bytes read, end of data */ + { + no_more_data = TRUE; + } + }while(!no_more_data); + + /* + * Done, clean up + */ + error_context_stack = errcontext.previous; + + MemoryContextSwitchTo(oldcontext); + + /* + * Execute AFTER STATEMENT insertion triggers + */ + ExecASInsertTriggers(estate, resultRelInfo); + + /* + * Handle queued AFTER triggers + */ + AfterTriggerEndQuery(estate); + + pfree(values); + pfree(nulls); + + pfree(attr_offsets); + + pfree(in_functions); + pfree(typioparams); + pfree(defmap); + pfree(defexprs); + pfree(constraintexprs); + pfree(force_notnull); + + ExecDropSingleTupleTableSlot(slot); + + ExecCloseIndices(resultRelInfo); + + FreeExecutorState(estate); + } + /* * Read the next input line and stash it in line_buf, with conversion to *************** *** 2264,2269 **** --- 2778,2784 ---- /* transfer converted data back to line_buf */ line_buf.len = 0; line_buf.data[0] = '\0'; + line_buf.cursor = 0; appendBinaryStringInfo(&line_buf, cvt, strlen(cvt)); } } *************** *** 2286,2291 **** --- 2801,3104 ---- return tolower(hex) - 'a' + 10; } + /* + * Detected the eol type by looking at the first data row. + * Possible eol types are NL, CR, or CRNL. If eol type was + * detected, it is set and a boolean true is returned to + * indicated detection was successful. If the first data row + * is longer than the input buffer, we return false and will + * try again in the next buffer. + */ + static bool + DetectLineEnd(size_t bytesread) + { + int bytes_remaining = COPY_BUF_SIZE; + char ch_found; /* the character found in the scan ('\n' or '\r' or escapec) */ + char *start = input_buf; + char *end; + + while(true) + { + bytes_remaining = COPY_BUF_SIZE - (input_buf - start); + + if (bytes_remaining <= 0) + return false; + + if ( (end = str2chrlen(start, '\n', '\r', bytes_remaining, &ch_found )) == NULL) + { + return false; + } + else + { + if (ch_found == '\n') + { + eol_type = EOL_NL; + eol_ch[0] = '\n'; + eol_ch[1] = '\0'; + + return true; + } + if (ch_found == '\r') + { + if (*(end + 1) == '\n') + { + eol_type = EOL_CRNL; + eol_ch[0] = '\r'; + eol_ch[1] = '\n'; + } + else + { + eol_type = EOL_CR; + eol_ch[0] = '\r'; + eol_ch[1] = '\0'; + } + + return true; + } + } + } + + } + + + /* + * Finds the next data line that is in the input buffer and loads it into line_buf. + * Returns an indication if the line that was read is complete (if an unescaped line-end was + * encountered). If we reached the end of buffer before the whole line was written into the + * line buffer then returns false. + */ + static bool + CopyReadLineBuffered(size_t bytesread) + { + int linesize; + bool transcode = (client_encoding != server_encoding); + char *cvt; + bool end_marker; + + /* mark that encoding conversion hasn't occurred yet */ + line_buf_converted = false; + + /* + * Detect end of line type if not already detected. + */ + if (eol_type == EOL_UNKNOWN) + { + bool eol_detected = DetectLineEnd(bytesread); + + if (!eol_detected) + { + /* load entire input buffer into line buf, and quit */ + appendBinaryStringInfo(&line_buf,input_buf,COPY_BUF_SIZE); + line_done = false; + buf_done = true; + + return line_done; + } + } + + /* + * Special case: eol is CRNL, last byte of previous buffer was an + * unescaped CR and 1st byte of current buffer is NL. We check for + * that here. + */ + if (eol_type == EOL_CRNL) + { + /* if we started scanning from the 1st byte of the buffer */ + if (begloc == input_buf) + { + /* and had a CR in last byte of prev buf */ + if (cr_in_prevbuf) + { + /* if this 1st byte in buffer is 2nd byte of line end sequence (linefeed) */ + if (*begloc == eol_ch[1]) + { + /* load that one linefeed byte and indicate we are done with the data line */ + appendBinaryStringInfo(&line_buf,begloc,1); + buffer_index++; + begloc++; + + line_done = true; + esc_in_prevbuf = false; + cr_in_prevbuf = false; + + return line_done; + } + } + + cr_in_prevbuf = false; + } + } + + /* (we need a loop so that if eol_ch is found, but prev ch is backslash, we can search + for the next eol_ch) */ + while (true) + { + /* reached end of buffer */ + if ( (endloc = strchrlen(begloc, eol_ch[0], bytesread - buffer_index)) == NULL ) + { + linesize = COPY_BUF_SIZE - (begloc - input_buf); + appendBinaryStringInfo(&line_buf,begloc,linesize); + + if (line_buf.len > 1) + { + char *last_ch = line_buf.data + line_buf.len - 1; /* before terminating \0 */ + if ( *last_ch == escapec ) + { + esc_in_prevbuf = true; + + if (line_buf.len > 2) + { + last_ch--; + if (*last_ch == escapec) + esc_in_prevbuf = false; + } + } + else if ( *last_ch == '\r' ) + { + if (eol_type == EOL_CRNL) + cr_in_prevbuf = true; + } + } + + line_done = false; + buf_done = true; + break; + } + else /* found the 1st eol ch in input_buf. */ + { + bool eol_found = true; + bool eol_escaped = true; + /* + * Load that piece of data (potentially a data line) into the line buffer, + * and update the pointers for the next scan. + */ + linesize = endloc - begloc + 1; + appendBinaryStringInfo(&line_buf,begloc,linesize); + buffer_index += linesize; + begloc = endloc + 1; + + if (eol_type == EOL_CRNL) + { + /* check if there is a '\n' after the '\r' */ + if (*(endloc + 1) == '\n') + { + /* this is a line end */ + appendBinaryStringInfo(&line_buf,begloc,1); /* load that '\n' */ + buffer_index++; + begloc++; + } + else /* just a CR, not a line end */ + eol_found = false; + } + + /* + * in some cases, this end of line char happens to be the + * last character in the buffer. we need to catch that. + */ + if (buffer_index >= bytesread) + buf_done = true; + + /* + * Check if the 1st end of line ch is escaped. + */ + if (endloc != input_buf) /* can we look 1 char back? */ + { + if (*(endloc - 1) != escapec) /* prev char is not an escape */ + eol_escaped = false; + else /* prev char is an escape */ + { + if (endloc != (input_buf + 1)) /* can we look another char back? */ + { + /* it's a double escape char, so it's not an escape */ + if (*(endloc - 2) == escapec) + eol_escaped = false; + /* else it's a single escape char, so EOL is ascaped */ + } + else + { + /* we need to check in the last buffer */ + if (esc_in_prevbuf) /* double escape char, so not an escape */ + eol_escaped = false; + } + } + } + else /* this eol ch is first ch in buffer, check for escape in prev buf */ + { + if (!esc_in_prevbuf) + eol_escaped = false; + } + + esc_in_prevbuf = false; /* reset variable */ + + /* + * if eol was found, and it isn't escaped, line is done + */ + if ((eol_escaped == false) && eol_found) + { + line_done = true; + break; + } + else /* stay in the loop and process some more data. */ + line_done = false; + + } /* end of found eol_ch */ + } + + /* + * Done reading the line. Convert it to server encoding. + */ + if (transcode) + { + cvt = (char *) pg_client_to_server((unsigned char *) line_buf.data, + line_buf.len); + if (cvt != line_buf.data) + { + /* transfer converted data back to line_buf */ + line_buf.len=0; + line_buf.data[0] = '\0'; + line_buf.cursor = 0; + appendBinaryStringInfo(&line_buf,cvt,strlen(cvt)); + } + } + + /* indicate that conversion had occured */ + line_buf_converted = true; + + /* + * check if this line is an end marker -- "\." + */ + end_marker = false; + + switch(eol_type) + { + case EOL_NL: + if (!strcmp(line_buf.data,"\\.\n")) + end_marker = true; + break; + case EOL_CR: + if (!strcmp(line_buf.data,"\\.\r")) + end_marker = true; + break; + case EOL_CRNL: + if (!strcmp(line_buf.data,"\\.\r\n")) + end_marker = true; + break; + case EOL_UNKNOWN: + break; + } + + if (end_marker) + { + fe_eof = true; + /* we don't want to process a \. as data line, want to quit. */ + line_done = false; + buf_done = true; + } + + return line_done; + } + + /*---------- * Read the value of a single attribute, performing de-escaping as needed. * *************** *** 2314,2322 **** int end_cursor; int input_len; ! /* reset attribute_buf to empty */ ! attribute_buf.len = 0; ! attribute_buf.data[0] = '\0'; /* set default status */ *result = END_OF_LINE; --- 3127,3136 ---- int end_cursor; int input_len; ! /* reset attr_buf to empty */ ! attr_buf.len = 0; ! attr_buf.data[0] = '\0'; ! attr_buf.cursor = 0; /* set default status */ *result = END_OF_LINE; *************** *** 2422,2428 **** */ } } ! appendStringInfoCharMacro(&attribute_buf, c); } /* check whether raw input matched null marker */ --- 3236,3242 ---- */ } } ! appendStringInfoCharMacro(&attr_buf, c); } /* check whether raw input matched null marker */ *************** *** 2433,2439 **** else *isnull = false; ! return attribute_buf.data; } --- 3247,3597 ---- else *isnull = false; ! return attr_buf.data; ! } ! ! /* ! * Read the first attribute. This is mainly used to maintain support ! * for an OID column. All the rest of the columns will be read at once with ! * CopyReadAllAttrs(). ! */ ! static char * ! CopyReadOidAttr(const char *delim, const char *null_print, int null_print_len, ! CopyReadResult *result, bool *isnull) ! { ! char delimc = delim[0]; ! char *start_loc = line_buf.data + line_buf.cursor; ! char *end_loc; ! int attr_len = 0; ! int bytes_remaining; ! ! /* reset attribute buf to empty */ ! attr_buf.len = 0; ! attr_buf.data[0] = '\0'; ! attr_buf.cursor = 0; ! ! /* set default status */ ! *result = END_OF_LINE; ! ! /* # of bytes that were not yet processed in this line */ ! bytes_remaining = line_buf.len - line_buf.cursor; ! ! /* got to end of line */ ! if ( (end_loc = strchrlen(start_loc, delimc, bytes_remaining )) == NULL) ! { ! attr_len = bytes_remaining - 1; /* don't count '\n' in len calculation */ ! appendBinaryStringInfo(&attr_buf,start_loc,attr_len); ! line_buf.cursor += attr_len + 2; /* skip '\n' and '\0' */ ! ! *result = END_OF_LINE; ! } ! else /* found a delimiter */ ! { ! /* (we don't care if delim was preceded with a backslash, because it's an ! invalid OID anyway) */ ! ! attr_len = end_loc - start_loc; /* we don't include the delimiter ch */ ! ! appendBinaryStringInfo(&attr_buf,start_loc,attr_len); ! line_buf.cursor += attr_len + 1; ! ! *result = NORMAL_ATTR; ! } ! ! ! /* check whether raw input matched null marker */ ! if (attr_len == null_print_len && strncmp(start_loc, null_print, attr_len) == 0) ! *isnull = true; ! else ! *isnull = false; ! ! return attr_buf.data; ! } ! ! ! /* ! * Read all attributes. Attributes are parsed from line_buf and ! * inserted (all at once) to attr_buf, while saving pointers to ! * each attribute's starting position. ! * ! * When this routine finishes execution both the nulls array and ! * the attr_offsets array are updated. The attr_offsets will include ! * the offset from the beginning of the attribute array of which ! * each attribute begins. If a specific attribute is not used for this ! * COPY command (ommitted from the column list), a value of 0 will be assigned. ! * For example: for table foo(a,b,c,d,e) and COPY foo(a,b,e) ! * attr_offsets may look something like this after this routine ! * returns: [0,20,0,0,55]. That means that column "a" value starts ! * at byte offset 0, "b" in 20 and "e" in 55, in attr_buf. ! * ! * In the attribute buffer (attr_buf) each attribute ! * is terminated with a '\0', and therefore by using the attr_offsets ! * array we could point to a beginning of an attribute and have it ! * behave as a C string, much like previously done in COPY. ! * ! * Another aspect to improving performance is reducing the frequency ! * of data load into buffers. The original COPY read attribute code ! * loaded a character at a time. In here we try to load a chunk of data ! * at a time. Usually a chunk will include a full data row ! * (unless we have an escaped delim). That effectively reduces the number of ! * loads by a factor of number of bytes per row. This improves performance ! * greatly, unfortunately it add more complexity to the code. ! * ! * Global participants in parsing logic: ! * ! * line_buf.cursor -- an offset from beginning of the line buffer ! * that indicates where we are about to begin the next scan. Note that ! * if we have WITH OIDS this cursor is already shifted past the first ! * OID attribute. ! * ! * attr_buf.cursor -- an offset from the beginning of the ! * attribute buffer that indicates where the current attribute begins. ! */ ! static void ! CopyReadAllAttrs(const char *delim, const char *null_print, int null_print_len, ! char *nulls, List *attnumlist , int *attr_offsets, ! int num_phys_attrs, Form_pg_attribute *attr) ! { ! char delimc = delim[0]; /* delimiter character */ ! char *scan_start; /* pointer to line buffer where scan should start. */ ! char *scan_end; /* pointer to line buffer where char was found */ ! char ch_found; /* the character found in the scan (delimc or escape) */ ! int attr_pre_len; /* current attr raw len, before processing escapes */ ! int attr_post_len; /* current attr len after escaping */ ! int m; /* attribute index being parsed */ ! int bytes_remaining; /* num of bytes remaining to be scanned in line buf */ ! int chunk_start; /* offset to beginning of line chunk to load */ ! int chunk_len; /* length of chunk of data to load to attr buf */ ! int oct_val; /* byte value for octal escapes */ ! int attnum; /* attribute number being parsed */ ! ListCell *cur; /* cursor to attribute list used for this COPY */ ! int attribute; ! ! /* ! * init variables for attribute scan ! */ ! attr_buf.len = 0; ! attr_buf.data[0] = '\0'; ! attr_buf.cursor = 0; ! /* cursor is now > 0 if we copy WITH OIDS */ ! scan_start = line_buf.data + line_buf.cursor; ! cur = list_head(attnumlist); ! attnum = lfirst_int(cur); ! m = attnum - 1; ! chunk_start = line_buf.cursor; ! chunk_len = 0; ! attr_pre_len = 0; ! attr_post_len = 0; ! ! /* ! * Scan through the line buffer to read all attributes data ! */ ! while(line_buf.cursor < line_buf.len) ! { ! bytes_remaining = line_buf.len - line_buf.cursor; ! ch_found = '\0'; ! ! if ( (scan_end = str2chrlen(scan_start, delimc, escapec, bytes_remaining, &ch_found )) ! == NULL) ! { ! /* GOT TO END OF LINE BUFFER */ ! ! if (cur == NULL) ! ereport(ERROR, ! (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), ! errmsg("extra data after last expected column"))); ! ! attnum = lfirst_int(cur); ! m = attnum - 1; ! ! /* don't count eol char(s) in attr and chunk len calculation */ ! if (eol_type == EOL_CRNL) ! { ! attr_pre_len += bytes_remaining - 2; ! chunk_len = line_buf.len - chunk_start - 2; ! } ! else ! { ! attr_pre_len += bytes_remaining - 1; ! chunk_len = line_buf.len - chunk_start - 1; ! } ! ! /* check if this is a NULL value or data value (assumed NULL) */ ! if (attr_pre_len == null_print_len ! && ! strncmp(line_buf.data + line_buf.len - attr_pre_len - 1, null_print, attr_pre_len) ! == 0) ! nulls[m] = 'n'; ! else ! nulls[m] = ' '; ! ! attr_offsets[m] = attr_buf.cursor; ! ! ! /* load the last chunk, the whole buffer in most cases */ ! appendBinaryStringInfo(&attr_buf,line_buf.data + chunk_start,chunk_len); ! ! line_buf.cursor += attr_pre_len + 2; /* skip eol char and '\0' to exit loop */ ! ! if (lnext(cur) != NULL) ! { ! /* ! * For an empty data line, the previous COPY code will ! * fail it during the conversion stage. We can fail it here ! * already, but then we will fail the regression tests b/c ! * of a different error message. that's why we return so we ! * can get the same error message that regress expects. ahh... ! * this conditional is unnecessary and should be removed soon. ! */ ! if (line_buf.len > 1) ! ereport(ERROR, ! (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), ! errmsg("missing data for column \"%s\"", ! NameStr(attr[m + 1]->attname)))); ! else ! return; ! } ! } ! else /* FOUND A DELIMITER OR ESCAPE */ ! { ! if (cur == NULL) ! ereport(ERROR, ! (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), ! errmsg("extra data after last expected column"))); ! ! if (ch_found == delimc) /* found a delimiter */ ! { ! attnum = lfirst_int(cur); ! m = attnum - 1; ! ! /* (we don't include the delimiter ch in length) */ ! attr_pre_len += scan_end - scan_start; ! /* (we don't include the delimiter ch in length) */ ! attr_post_len += scan_end - scan_start; ! ! /* check if this is a null print or data (assumed NULL) */ ! if (attr_pre_len == null_print_len ! && ! strncmp(scan_end - attr_pre_len, null_print, attr_pre_len) ! == 0) ! nulls[m] = 'n'; ! else ! nulls[m] = ' '; ! ! /* set the pointer to next attribute position */ ! attr_offsets[m] = attr_buf.cursor; ! ! /* update buffer cursors to our current location, +1 to skip the delimc */ ! line_buf.cursor = scan_end - line_buf.data + 1; ! attr_buf.cursor += attr_post_len + 1; ! ! /* prepare scan for next attr */ ! scan_start = line_buf.data + line_buf.cursor; ! cur = lnext(cur); ! attr_pre_len = 0; ! attr_post_len = 0; ! } ! else /* found an escape character */ ! { ! char nextc = *(scan_end + 1); ! char newc; ! int skip = 2; ! ! chunk_len = (scan_end - line_buf.data) - chunk_start + 1; ! ! /* load a chunk of data */ ! appendBinaryStringInfo(&attr_buf,line_buf.data + chunk_start,chunk_len); ! ! switch(nextc) ! { ! case '0': ! case '1': ! case '2': ! case '3': ! case '4': ! case '5': ! case '6': ! case '7': ! oct_val = OCTVALUE(nextc); ! nextc = *(scan_end + 2); ! /* (no need for out bad access check since line if buffered) */ ! if (ISOCTAL(nextc)) ! { ! skip++; ! oct_val = (oct_val << 3) + OCTVALUE(nextc); ! nextc = *(scan_end + 3); ! if (ISOCTAL(nextc)) ! { ! skip++; ! oct_val = (oct_val << 3) + OCTVALUE(nextc); ! } ! } ! newc = oct_val & 0377; /* the escaped byte value */ ! break; ! case 'b': ! newc = '\b'; ! break; ! case 'f': ! newc = '\f'; ! break; ! case 'n': ! newc = '\n'; ! break; ! case 'r': ! newc = '\r'; ! break; ! case 't': ! newc = '\t'; ! break; ! case 'v': ! newc = '\v'; ! break; ! default: ! if (nextc == delimc) ! newc = delimc; ! else if (nextc == escapec) ! newc = escapec; ! else /* no escape sequence, take next char literaly */ ! newc = nextc; ! break; ! } ! ! /* update to current length, add escape and escaped chars */ ! attr_pre_len += scan_end - scan_start + 2; ! /* update to current length, escaped char */ ! attr_post_len += scan_end - scan_start + 1; ! ! /* ! * Need to get rid of the escape character. This is done by ! * loading the chunk up to including the escape character ! * into the attribute buffer. Then overwritting the backslash ! * with the escaped sequence or char, and continuing to scan ! * from *after* the char than is after the escape in line buf. ! */ ! *(attr_buf.data + attr_buf.len - 1) = newc; ! line_buf.cursor = scan_end - line_buf.data + skip; ! scan_start = scan_end + skip; ! chunk_start = line_buf.cursor; ! chunk_len = 0; ! } ! ! } /* end delimiter/backslash */ ! ! } /* end line buffer scan. */ ! ! /* ! * Replace all delimiters with NULL for string termination. ! * NOTE: only delimiters (NOT necessarily all delimc) are replaced. ! * Example (delimc = '|'): ! * - Before: f 1 | f \| 2 | f 3 ! * - After : f 1 \0 f | 2 \0 f 3 ! */ ! for (attribute = 1; attribute < num_phys_attrs ; attribute++) ! { ! if (attr_offsets[attribute] != 0) ! *(attr_buf.data + attr_offsets[attribute] - 1) = '\0'; ! } ! } *************** *** 2477,2485 **** bool in_quote = false; bool saw_quote = false; ! /* reset attribute_buf to empty */ ! attribute_buf.len = 0; ! attribute_buf.data[0] = '\0'; /* set default status */ *result = END_OF_LINE; --- 3635,3644 ---- bool in_quote = false; bool saw_quote = false; ! /* reset attr_buf to empty */ ! attr_buf.len = 0; ! attr_buf.data[0] = '\0'; ! attr_buf.cursor = 0; /* set default status */ *result = END_OF_LINE; *************** *** 2519,2525 **** if (nextc == escapec || nextc == quotec) { ! appendStringInfoCharMacro(&attribute_buf, nextc); line_buf.cursor++; continue; } --- 3678,3684 ---- if (nextc == escapec || nextc == quotec) { ! appendStringInfoCharMacro(&attr_buf, nextc); line_buf.cursor++; continue; } *************** *** 2536,2542 **** in_quote = false; continue; } ! appendStringInfoCharMacro(&attribute_buf, c); } if (in_quote) --- 3695,3701 ---- in_quote = false; continue; } ! appendStringInfoCharMacro(&attr_buf, c); } if (in_quote) *************** *** 2550,2556 **** else *isnull = false; ! return attribute_buf.data; } /* --- 3709,3715 ---- else *isnull = false; ! return attr_buf.data; } /* *************** *** 2578,2606 **** (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), errmsg("invalid field size"))); ! /* reset attribute_buf to empty, and load raw data in it */ ! attribute_buf.len = 0; ! attribute_buf.data[0] = '\0'; ! attribute_buf.cursor = 0; ! enlargeStringInfo(&attribute_buf, fld_size); ! CopyGetData(attribute_buf.data, fld_size); if (CopyGetEof()) ereport(ERROR, (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), errmsg("unexpected EOF in COPY data"))); ! attribute_buf.len = fld_size; ! attribute_buf.data[fld_size] = '\0'; /* Call the column type's binary input converter */ result = FunctionCall2(flinfo, ! PointerGetDatum(&attribute_buf), ObjectIdGetDatum(typioparam)); /* Trouble if it didn't eat the whole buffer */ ! if (attribute_buf.cursor != attribute_buf.len) ereport(ERROR, (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), errmsg("incorrect binary data format"))); --- 3737,3765 ---- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), errmsg("invalid field size"))); ! /* reset attr_buf to empty, and load raw data in it */ ! attr_buf.len = 0; ! attr_buf.data[0] = '\0'; ! attr_buf.cursor = 0; ! enlargeStringInfo(&attr_buf, fld_size); ! CopyGetData(attr_buf.data, fld_size); if (CopyGetEof()) ereport(ERROR, (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), errmsg("unexpected EOF in COPY data"))); ! attr_buf.len = fld_size; ! attr_buf.data[fld_size] = '\0'; /* Call the column type's binary input converter */ result = FunctionCall2(flinfo, ! PointerGetDatum(&attr_buf), ObjectIdGetDatum(typioparam)); /* Trouble if it didn't eat the whole buffer */ ! if (attr_buf.cursor != attr_buf.len) ereport(ERROR, (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), errmsg("incorrect binary data format"))); *************** *** 2777,2783 **** /* Validate the user-supplied list and extract attnums */ ListCell *l; ! foreach(l, attnamelist) { char *name = strVal(lfirst(l)); int attnum; --- 3936,3942 ---- /* Validate the user-supplied list and extract attnums */ ListCell *l; ! foreach (l, attnamelist) { char *name = strVal(lfirst(l)); int attnum; *************** *** 2797,2799 **** --- 3956,4017 ---- return attnums; } + + /****************************************************** + * utility functions, should be placed elsewhere in src + ******************************************************/ + /* + * These are custom versions of the string function strchr(). + * As opposed to the original strchr which searches through + * a string until the target character is found, or a NULL is + * found, this version will not return when a NULL is found. + * Instead it will search through a pre-defined length of + * bytes and will return only if the target character(s) is reached. + * + * If our client encoding is not a supported server encoding, we + * know that it is not safe to look at each character as trailing + * byte in a multibyte character may be a 7-bit ASCII equivalent. + * Therefore we use pg_encoding_mblen to skip to the end of the + * character. + * + * parameters: + * s - string being searched. + * c(n) - char we are searching for. + * len - maximum # of bytes to search. + * + * returns: + * pointer to c - if c is located within the string. + * NULL - if c was not found in specified length of search. Note: + * this DOESN'T mean that a '\0' was reached. + */ + char *strchrlen(const char *s, int c, size_t len) + { + const char *start; + + if (client_encoding_only) + { + int mblen = pg_encoding_mblen(client_encoding, s); + + for (start = s; *s != c && s < start + len ; s += mblen) + mblen = pg_encoding_mblen(client_encoding, s); + } + else /* safe to scroll byte by byte */ + { + for (start = s; *s != c && s < start + len ; s++) + ; + } + + return ( (*s == c) ? (char *) s : NULL ); + } + + char *str2chrlen(const char *s, int c1, int c2, size_t len, char *c_found) + { + const char *start; + *c_found = '\0'; + + for (start = s; *s != c1 && *s != c2 && s < start + len ; s++) + ; + + *c_found = *s; + return ( *c_found != '\0' ? (char *) s : NULL ); + }