Index: src/backend/commands/copy.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/commands/copy.c,v
retrieving revision 1.245
diff -c -r1.245 copy.c
*** src/backend/commands/copy.c	2 Jun 2005 01:21:22 -0000	1.245
--- src/backend/commands/copy.c	27 Jun 2005 06:19:56 -0000
***************
*** 51,56 ****
--- 51,57 ----
  
  #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
  #define OCTVALUE(c) ((c) - '0')
+ #define COPY_BUF_SIZE 65536
  
  /*
   * Represents the different source/dest cases we need to worry about at
***************
*** 84,90 ****
  	EOL_CRNL
  } EolType;
  
- 
  static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
  
  /*
--- 85,90 ----
***************
*** 92,97 ****
--- 92,100 ----
   * never been reentrant...
   */
  static CopyDest copy_dest;
+ static int  eol_ch[2];          /* The byte values of the 1 or 2 eol bytes */
+ static char escapec;            /* escape char for delimited data format. */
+ static bool client_encoding_only;   /* true if client encoding is a non supported server encoding */
  static FILE *copy_file;			/* used if copy_dest == COPY_FILE */
  static StringInfo copy_msgbuf;	/* used if copy_dest == COPY_NEW_FE */
  static bool fe_eof;				/* true if detected end of copy data */
***************
*** 101,131 ****
  
  /* these are just for error messages, see copy_in_error_callback */
  static bool copy_binary;		/* is it a binary copy? */
! static const char *copy_relname;	/* table name for error messages */
  static int	copy_lineno;		/* line number for error messages */
! static const char *copy_attname;	/* current att for error messages */
  
  
  /*
   * These static variables are used to avoid incurring overhead for each
!  * attribute processed.  attribute_buf is reused on each CopyReadAttribute
   * call to hold the string being read in.  Under normal use it will soon
   * grow to a suitable size, and then we will avoid palloc/pfree overhead
   * for subsequent attributes.  Note that CopyReadAttribute returns a pointer
!  * to attribute_buf's data buffer!
   */
! static StringInfoData attribute_buf;
! 
  /*
   * Similarly, line_buf holds the whole input line being processed (its
   * cursor field points to the next character to be read by CopyReadAttribute).
   * The input cycle is first to read the whole line into line_buf, convert it
   * to server encoding, and then extract individual attribute fields into
!  * attribute_buf.  (We used to have CopyReadAttribute read the input source
   * directly, but that caused a lot of encoding issues and unnecessary logic
   * complexity.)
   */
! static StringInfoData line_buf;
  static bool line_buf_converted;
  
  /* non-export function prototypes */
--- 104,144 ----
  
  /* these are just for error messages, see copy_in_error_callback */
  static bool copy_binary;		/* is it a binary copy? */
! static const char *copy_relname;/* table name for error messages */
  static int	copy_lineno;		/* line number for error messages */
! static const char *copy_attname;/* current att for error messages */
! 
! /* Static variables for buffered input parsing */
! /* leave room for extra null (to enable use of string functions) */
! static char input_buf[COPY_BUF_SIZE + 1];
! static char *begloc;
! static char *endloc;
! static bool esc_in_prevbuf; /* backslash was last character of the data input buffer */
! static bool cr_in_prevbuf;  /* CR was last character of the data input buffer */
! static bool line_done;      /* finished processing the whole line or stopped in the middle */
! static bool buf_done;       /* finished processing the current buffer */
! static int buffer_index;    /* input buffer index */
  
  
  /*
   * These static variables are used to avoid incurring overhead for each
!  * attribute processed.  attr_buf is reused on each CopyReadAttribute
   * call to hold the string being read in.  Under normal use it will soon
   * grow to a suitable size, and then we will avoid palloc/pfree overhead
   * for subsequent attributes.  Note that CopyReadAttribute returns a pointer
!  * to attr_buf's data buffer!
   */
! static StringInfoData attr_buf; /* still used in CopyFrom()     */
  /*
   * Similarly, line_buf holds the whole input line being processed (its
   * cursor field points to the next character to be read by CopyReadAttribute).
   * The input cycle is first to read the whole line into line_buf, convert it
   * to server encoding, and then extract individual attribute fields into
!  * attr_buf.  (We used to have CopyReadAttribute read the input source
   * directly, but that caused a lot of encoding issues and unnecessary logic
   * complexity.)
   */
! static StringInfoData line_buf;  /* still used in CopyFromBinary/CSV() */
  static bool line_buf_converted;
  
  /* non-export function prototypes */
***************
*** 151,156 ****
--- 164,182 ----
  					char *escape, bool force_quote);
  static List *CopyGetAttnums(Relation rel, List *attnamelist);
  static void limit_printout_length(StringInfo buf);
+ static bool CopyReadLineBuffered(size_t bytesread);
+ static void CopyFromDelimited(Relation rel, List *attnumlist, bool binary, bool oids,
+ 							  char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
+ 							  List *force_notnull_atts);
+ static char *CopyReadAttribute(const char *delim, const char *null_print,
+ 							   CopyReadResult *result, bool *isnull);
+ static void CopyReadAllAttrs(const char *delim, const char *null_print, int null_print_len,
+ 				             char *nulls, List *attnumlist , int *attr_offsets, int num_phys_attrs, 
+ 							 Form_pg_attribute *attr);
+ static char *CopyReadOidAttr(const char *delim, const char *null_print, int null_print_len,
+                               CopyReadResult *result, bool *isnull);
+ static bool DetectLineEnd(size_t bytesread);
+ 
  
  /* Internal communications functions */
  static void SendCopyBegin(bool binary, int natts);
***************
*** 160,166 ****
  static void CopySendString(const char *str);
  static void CopySendChar(char c);
  static void CopySendEndOfRow(bool binary);
! static void CopyGetData(void *databuf, int datasize);
  static int	CopyGetChar(void);
  
  #define CopyGetEof()  (fe_eof)
--- 186,192 ----
  static void CopySendString(const char *str);
  static void CopySendChar(char c);
  static void CopySendEndOfRow(bool binary);
! static int  CopyGetData(void *databuf, int datasize);
  static int	CopyGetChar(void);
  
  #define CopyGetEof()  (fe_eof)
***************
*** 171,176 ****
--- 197,205 ----
  static void CopySendInt16(int16 val);
  static int16 CopyGetInt16(void);
  
+ /* new parsing utils */
+ static char *strchrlen(const char *s, int c, size_t len);
+ static char *str2chrlen(const char *s, int c1, int c2, size_t len, char *c_found);
  
  /*
   * Send copy start/stop messages for frontend copies.  These have changed
***************
*** 383,396 ****
   * It seems unwise to allow the COPY IN to complete normally in that case.
   *
   * NB: no data conversion is applied by these functions
   */
! static void
  CopyGetData(void *databuf, int datasize)
  {
  	switch (copy_dest)
  	{
  		case COPY_FILE:
! 			fread(databuf, datasize, 1, copy_file);
  			if (feof(copy_file))
  				fe_eof = true;
  			break;
--- 412,430 ----
   * It seems unwise to allow the COPY IN to complete normally in that case.
   *
   * NB: no data conversion is applied by these functions
+  *
+  * Returns: the number of bytes that were successfully read 
+  * into the data buffer.
   */
! static int
  CopyGetData(void *databuf, int datasize)
  {
+ 	size_t bytesread = 0;
+ 	
  	switch (copy_dest)
  	{
  		case COPY_FILE:
! 			bytesread = fread(databuf, 1, datasize, copy_file);
  			if (feof(copy_file))
  				fe_eof = true;
  			break;
***************
*** 402,407 ****
--- 436,442 ----
  						(errcode(ERRCODE_CONNECTION_FAILURE),
  						 errmsg("unexpected EOF on client connection")));
  			}
+             bytesread += datasize; /* update the count of bytes that were read so far */
  			break;
  		case COPY_NEW_FE:
  			while (datasize > 0 && !fe_eof)
***************
*** 413,419 ****
  					/* Try to receive another message */
  					int			mtype;
  
! 			readmessage:
  					mtype = pq_getbyte();
  					if (mtype == EOF)
  						ereport(ERROR,
--- 448,454 ----
  					/* Try to receive another message */
  					int			mtype;
  
! readmessage:
  					mtype = pq_getbyte();
  					if (mtype == EOF)
  						ereport(ERROR,
***************
*** 430,436 ****
  						case 'c':		/* CopyDone */
  							/* COPY IN correctly terminated by frontend */
  							fe_eof = true;
! 							return;
  						case 'f':		/* CopyFail */
  							ereport(ERROR,
  									(errcode(ERRCODE_QUERY_CANCELED),
--- 465,471 ----
  						case 'c':		/* CopyDone */
  							/* COPY IN correctly terminated by frontend */
  							fe_eof = true;
! 							return bytesread;
  						case 'f':		/* CopyFail */
  							ereport(ERROR,
  									(errcode(ERRCODE_QUERY_CANCELED),
***************
*** 460,469 ****
--- 495,507 ----
  					avail = datasize;
  				pq_copymsgbytes(copy_msgbuf, databuf, avail);
  				databuf = (void *) ((char *) databuf + avail);
+ 				bytesread += avail; /* update the count of bytes that were read so far */
  				datasize -= avail;
  			}
  			break;
  	}
+ 	
+ 	return bytesread;
  }
  
  static int
***************
*** 709,715 ****
  	AclResult	aclresult;
  
  	/* Extract options from the statement node tree */
! 	foreach(option, stmt->options)
  	{
  		DefElem    *defel = (DefElem *) lfirst(option);
  
--- 747,753 ----
  	AclResult	aclresult;
  
  	/* Extract options from the statement node tree */
! 	foreach (option, stmt->options)
  	{
  		DefElem    *defel = (DefElem *) lfirst(option);
  
***************
*** 828,833 ****
--- 866,873 ----
  			escape = quote;
  	}
  
+ 	escapec = '\\'; /* will be configurable in the future */
+ 
  	/* Only single-character delimiter strings are supported. */
  	if (strlen(delim) != 1)
  		ereport(ERROR,
***************
*** 935,941 ****
  
  		force_quote_atts = CopyGetAttnums(rel, force_quote);
  
! 		foreach(cur, force_quote_atts)
  		{
  			int			attnum = lfirst_int(cur);
  
--- 975,981 ----
  
  		force_quote_atts = CopyGetAttnums(rel, force_quote);
  
! 		foreach (cur, force_quote_atts)
  		{
  			int			attnum = lfirst_int(cur);
  
***************
*** 956,962 ****
  
  		force_notnull_atts = CopyGetAttnums(rel, force_notnull);
  
! 		foreach(cur, force_notnull_atts)
  		{
  			int			attnum = lfirst_int(cur);
  
--- 996,1002 ----
  
  		force_notnull_atts = CopyGetAttnums(rel, force_notnull);
  
! 		foreach (cur, force_notnull_atts)
  		{
  			int			attnum = lfirst_int(cur);
  
***************
*** 969,981 ****
  	}
  
  	/* Set up variables to avoid per-attribute overhead. */
! 	initStringInfo(&attribute_buf);
  	initStringInfo(&line_buf);
  	line_buf_converted = false;
  
  	client_encoding = pg_get_client_encoding();
  	server_encoding = GetDatabaseEncoding();
  
  	copy_dest = COPY_FILE;		/* default */
  	copy_file = NULL;
  	copy_msgbuf = NULL;
--- 1009,1039 ----
  	}
  
  	/* Set up variables to avoid per-attribute overhead. */
! 	initStringInfo(&attr_buf);
  	initStringInfo(&line_buf);
  	line_buf_converted = false;
  
  	client_encoding = pg_get_client_encoding();
  	server_encoding = GetDatabaseEncoding();
  
+ 	/*
+ 	 * check if the client encoding is one of the 5 encodings
+ 	 * that are not supported as a server encodings.
+ 	 */
+ 	switch(client_encoding)
+ 	{
+ 		case PG_SJIS:
+ 		case PG_BIG5:
+ 		case PG_GBK:
+ 		case PG_UHC:
+ 		case PG_GB18030:
+ 			client_encoding_only = true;
+ 			break;
+ 		default:
+ 			client_encoding_only = false;
+ 	}
+ 	
+ 
  	copy_dest = COPY_FILE;		/* default */
  	copy_file = NULL;
  	copy_msgbuf = NULL;
***************
*** 1029,1036 ****
--- 1087,1101 ----
  						 errmsg("\"%s\" is a directory", filename)));
  			}
  		}
+               
+               if (csv_mode || binary) /* old path */
  		CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
  				 quote, escape, force_notnull_atts, header_line);
+               /* new path for improved performance (only for delimited format for now) */
+               else
+         CopyFromDelimited(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
+                  quote, escape, force_notnull_atts);
+ 
  	}
  	else
  	{							/* copy from database to file */
***************
*** 1106,1114 ****
  					 errmsg("could not write to file \"%s\": %m",
  							filename)));
  	}
! 	pfree(attribute_buf.data);
  	pfree(line_buf.data);
  
  	/*
  	 * Close the relation.	If reading, we can release the AccessShareLock
  	 * we got; if writing, we should hold the lock until end of
--- 1171,1180 ----
  					 errmsg("could not write to file \"%s\": %m",
  							filename)));
  	}
! 	pfree(attr_buf.data);
  	pfree(line_buf.data);
  
+ 
  	/*
  	 * Close the relation.	If reading, we can release the AccessShareLock
  	 * we got; if writing, we should hold the lock until end of
***************
*** 1181,1187 ****
  	/* Get info about the columns we need to process. */
  	out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
  	force_quote = (bool *) palloc(num_phys_attrs * sizeof(bool));
! 	foreach(cur, attnumlist)
  	{
  		int			attnum = lfirst_int(cur);
  		Oid			out_func_oid;
--- 1247,1253 ----
  	/* Get info about the columns we need to process. */
  	out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
  	force_quote = (bool *) palloc(num_phys_attrs * sizeof(bool));
! 	foreach (cur, attnumlist)
  	{
  		int			attnum = lfirst_int(cur);
  		Oid			out_func_oid;
***************
*** 1248,1254 ****
  			bool hdr_delim = false;
  			char *colname;
  			
! 			foreach(cur, attnumlist)
  			{
  				int			attnum = lfirst_int(cur);
  
--- 1314,1320 ----
  			bool hdr_delim = false;
  			char *colname;
  			
! 			foreach (cur, attnumlist)
  			{
  				int			attnum = lfirst_int(cur);
  
***************
*** 1304,1310 ****
  			}
  		}
  
! 		foreach(cur, attnumlist)
  		{
  			int			attnum = lfirst_int(cur);
  			Datum		value;
--- 1370,1376 ----
  			}
  		}
  
! 		foreach (cur, attnumlist)
  		{
  			int			attnum = lfirst_int(cur);
  			Datum		value;
***************
*** 1346,1353 ****
  				{
  					bytea	   *outputbytes;
  
! 					outputbytes = DatumGetByteaP(FunctionCall1(&out_functions[attnum - 1],
! 															   value));
  					/* We assume the result will not have been toasted */
  					CopySendInt32(VARSIZE(outputbytes) - VARHDRSZ);
  					CopySendData(VARDATA(outputbytes),
--- 1412,1420 ----
  				{
  					bytea	   *outputbytes;
  
! 					outputbytes =
!                         DatumGetByteaP(FunctionCall1(&out_functions[attnum - 1],
! 												     value));
  					/* We assume the result will not have been toasted */
  					CopySendInt32(VARSIZE(outputbytes) - VARHDRSZ);
  					CopySendData(VARDATA(outputbytes),
***************
*** 1396,1405 ****
  		if (copy_attname)
  		{
  			/* error is relevant to a particular column */
! 			limit_printout_length(&attribute_buf);
  			errcontext("COPY %s, line %d, column %s: \"%s\"",
  					   copy_relname, copy_lineno, copy_attname,
! 					   attribute_buf.data);
  		}
  		else
  		{
--- 1463,1472 ----
  		if (copy_attname)
  		{
  			/* error is relevant to a particular column */
! 			limit_printout_length(&attr_buf);
  			errcontext("COPY %s, line %d, column %s: \"%s\"",
  					   copy_relname, copy_lineno, copy_attname,
! 					   attr_buf.data);
  		}
  		else
  		{
***************
*** 1407,1412 ****
--- 1474,1481 ----
  			if (line_buf_converted ||
  				client_encoding == server_encoding)
  			{
+                 /* Strip off the newline */
+ 				*(line_buf.data + line_buf.len - 1) ='\0';
  				limit_printout_length(&line_buf);
  				errcontext("COPY %s, line %d: \"%s\"",
  						   copy_relname, copy_lineno,
***************
*** 1460,1465 ****
--- 1529,1535 ----
  	appendStringInfoString(buf, "...");
  }
  
+ 
  /*
   * Copy FROM file to relation.
   */
***************
*** 1761,1767 ****
  			}
  
  			/* Loop to read the user attributes on the line. */
! 			foreach(cur, attnumlist)
  			{
  				int			attnum = lfirst_int(cur);
  				int			m = attnum - 1;
--- 1831,1837 ----
  			}
  
  			/* Loop to read the user attributes on the line. */
! 			foreach (cur, attnumlist)
  			{
  				int			attnum = lfirst_int(cur);
  				int			m = attnum - 1;
***************
*** 1855,1861 ****
  			}
  
  			i = 0;
! 			foreach(cur, attnumlist)
  			{
  				int			attnum = lfirst_int(cur);
  				int			m = attnum - 1;
--- 1925,1931 ----
  			}
  
  			i = 0;
! 			foreach (cur, attnumlist)
  			{
  				int			attnum = lfirst_int(cur);
  				int			m = attnum - 1;
***************
*** 1987,1992 ****
--- 2057,2506 ----
  	FreeExecutorState(estate);
  }
  
+ /*
+  * Copy FROM file to relation with faster processing.
+  */
+ static void
+ CopyFromDelimited(Relation rel, List *attnumlist, bool binary, bool oids,
+ 				  char *delim, char *null_print, bool csv_mode, char *quote,
+ 				  char *escape, List *force_notnull_atts)
+ {
+ 	HeapTuple	tuple;
+ 	TupleDesc	tupDesc;
+ 	Form_pg_attribute *attr;
+ 	AttrNumber	num_phys_attrs,
+ 		attr_count,
+ 		num_defaults;
+ 	FmgrInfo   *in_functions;
+ 	Oid		   *typioparams;
+ 	ExprState **constraintexprs;
+ 	bool	   *force_notnull;
+ 	bool		hasConstraints = false;
+ 	int			attnum;
+ 	int			i;
+ 	Oid			in_func_oid;
+ 	Datum	   *values;
+ 	char	   *nulls;
+     /* an array of offsets into attr_buf that point to beginning of attributes */
+ 	int	       *attr_offsets;
+ 	bool		isnull;
+ 	int        null_print_len; /* length of null print */
+ 	ResultRelInfo *resultRelInfo;
+ 	EState	   *estate = CreateExecutorState(); /* for ExecConstraints() */
+ 	TupleTableSlot *slot;
+ 	bool		file_has_oids;
+ 	int		   *defmap;
+ 	ExprState **defexprs;		/* array of default att expressions */
+ 	ExprContext *econtext;		/* used for ExecEvalExpr for default atts */
+ 	MemoryContext oldcontext = CurrentMemoryContext;
+ 	ErrorContextCallback errcontext;
+ 	bool	no_more_data;
+ 	CopyReadResult result;
+ 	ListCell   *cur;
+ 				
+ 	
+ 	tupDesc = RelationGetDescr(rel);
+ 	attr = tupDesc->attrs;
+ 	num_phys_attrs = tupDesc->natts;
+ 	attr_count = list_length(attnumlist);
+ 	num_defaults = 0;
+ 	
+     /*
+      * We need a ResultRelInfo so we can use the regular executor's
+      * index-entry-making machinery.  (There used to be a huge amount of
+      * code here that basically duplicated execUtils.c ...)
+      */
+     resultRelInfo = makeNode(ResultRelInfo);
+     resultRelInfo->ri_RangeTableIndex = 1;      /* dummy */
+     resultRelInfo->ri_RelationDesc = rel;
+     resultRelInfo->ri_TrigDesc = CopyTriggerDesc(rel->trigdesc);
+     if (resultRelInfo->ri_TrigDesc)
+         resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
+             palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(FmgrInfo));
+     resultRelInfo->ri_TrigInstrument = NULL;
+ 	
+ 	ExecOpenIndices(resultRelInfo);
+ 	
+ 	estate->es_result_relations = resultRelInfo;
+ 	estate->es_num_result_relations = 1;
+ 	estate->es_result_relation_info = resultRelInfo;
+ 	
+     /* Set up a tuple slot too */
+     slot = MakeSingleTupleTableSlot(tupDesc);
+ 	
+ 	econtext = GetPerTupleExprContext(estate);
+ 	
+ 	/*
+ 	 * Pick up the required catalog information for each attribute in the
+ 	 * relation, including the input function, the element type (to pass
+      * to the input function), and info about defaults and constraints.
+ 	 * (Which input function we use depends on text/binary format choice.)
+ 	 */
+ 	in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
+ 	typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
+ 	defmap = (int *) palloc(num_phys_attrs * sizeof(int));
+ 	defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
+ 	constraintexprs = (ExprState **) palloc0(num_phys_attrs * sizeof(ExprState *));
+ 	force_notnull = (bool *) palloc(num_phys_attrs * sizeof(bool));
+ 	
+ 	for (attnum = 1; attnum <= num_phys_attrs; attnum++)
+   	{
+ 		/* We don't need info for dropped attributes */
+ 		if (attr[attnum - 1]->attisdropped)
+ 			continue;
+ 		
+ 		getTypeInputInfo(attr[attnum - 1]->atttypid,
+ 						 &in_func_oid, &typioparams[attnum - 1]);
+ 		
+ 		fmgr_info(in_func_oid, &in_functions[attnum - 1]);
+ 		
+ 		if (list_member_int(force_notnull_atts, attnum))
+ 			force_notnull[attnum - 1] = true;
+ 		else
+ 			force_notnull[attnum - 1] = false;
+ 		
+ 		/* Get default info if needed */
+ 		if (!list_member_int(attnumlist, attnum))
+   		{
+ 			/* attribute is NOT to be copied from input */
+ 			/* use default value if one exists */
+ 			Node	   *defexpr = build_column_default(rel, attnum);
+ 			
+ 			if (defexpr != NULL)
+ 			{
+ 				defexprs[num_defaults] = ExecPrepareExpr((Expr *) defexpr,
+ 														 estate);
+ 				defmap[num_defaults] = attnum - 1;
+ 				num_defaults++;
+ 			}
+   		}
+ 		
+ 		/* If it's a domain type, set up to check domain constraints */
+ 		if (get_typtype(attr[attnum - 1]->atttypid) == 'd')
+ 		{
+ 			Param	   *prm;
+ 			Node	   *node;
+ 			
+ 			/*
+ 			 * Easiest way to do this is to use parse_coerce.c to set up
+ 			 * an expression that checks the constraints.  (At present,
+ 			 * the expression might contain a length-coercion-function
+ 			 * call and/or CoerceToDomain nodes.)  The bottom of the
+ 			 * expression is a Param node so that we can fill in the
+ 			 * actual datum during the data input loop.
+ 			 */
+ 			prm = makeNode(Param);
+ 			prm->paramkind = PARAM_EXEC;
+ 			prm->paramid = 0;
+ 			prm->paramtype = getBaseType(attr[attnum - 1]->atttypid);
+ 			
+ 			node = coerce_to_domain((Node *) prm,
+ 									prm->paramtype,
+ 									attr[attnum - 1]->atttypid,
+ 									COERCE_IMPLICIT_CAST, false, false);
+ 			
+ 			constraintexprs[attnum - 1] = ExecPrepareExpr((Expr *) node, estate);
+ 			hasConstraints = true;
+ 		}
+ 	}
+ 	
+ 	/*
+ 	 * Prepare to catch AFTER triggers.
+ 	 */
+ 	AfterTriggerBeginQuery();
+ 	
+ 	/*
+ 	 * Check BEFORE STATEMENT insertion triggers. It's debateable whether
+ 	 * we should do this for COPY, since it's not really an "INSERT"
+ 	 * statement as such. However, executing these triggers maintains
+ 	 * consistency with the EACH ROW triggers that we already fire on
+ 	 * COPY.
+ 	 */
+ 	ExecBSInsertTriggers(estate, resultRelInfo);
+ 				
+ 	file_has_oids = oids;	/* must rely on user to tell us this... */
+ 	
+ 	values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
+ 	nulls = (char *) palloc(num_phys_attrs * sizeof(char));
+ 	attr_offsets = (int *) palloc(num_phys_attrs * sizeof(int));
+ 	
+ 	/* Make room for a PARAM_EXEC value for domain constraint checks */
+ 	if (hasConstraints)
+ 		econtext->ecxt_param_exec_vals = (ParamExecData *)
+ 			palloc0(sizeof(ParamExecData));
+ 	
+ 	/* Initialize static variables */
+ 	fe_eof = false;
+ 	eol_type = EOL_UNKNOWN;
+ 	copy_binary = binary;
+ 	copy_relname = RelationGetRelationName(rel);
+ 	copy_lineno = 0;
+ 	copy_attname = NULL;
+ 	line_buf_converted = false;
+ 
+     /* it is MUCH faster to do this once here than in the attribute parse loop */
+ 	null_print_len = strlen(null_print);
+ 	
+ 	/* Set up callback to identify error line number */
+ 	errcontext.callback = copy_in_error_callback;
+ 	errcontext.arg = NULL;
+ 	errcontext.previous = error_context_stack;
+ 	error_context_stack = &errcontext;
+ 	
+ 	/* Set up data buffer to hold a chunk of data*/
+ 	MemSet(input_buf, ' ', COPY_BUF_SIZE * sizeof(char));
+ 	input_buf[COPY_BUF_SIZE] = '\0';
+ 	
+ 	no_more_data = false; /* no more input data to read from file or FE */
+ 	line_done = true;
+ 	
+ 	do
+ 	{
+ 		/* read a chunk of data into the buffer */
+ 		size_t bytesread = CopyGetData(input_buf, COPY_BUF_SIZE);
+ 		buf_done = false;
+ 		
+ 		/* set buffer pointers to beginning of the buffer */
+ 		begloc = input_buf;
+ 		buffer_index = 0; 
+ 		
+ 		/* continue if some bytes were read or if we didn't reach EOF. if we both *
+ 		 * reached EOF _and_ no bytes were read, quit the loop we are done        */
+         if (bytesread > 0 || !fe_eof)
+ 		{
+ 			
+ 			while (!buf_done)
+ 			{
+ 				bool		skip_tuple;
+ 				Oid			loaded_oid = InvalidOid;
+ 				
+ 				CHECK_FOR_INTERRUPTS();
+ 				
+ 				/* Reset the per-tuple exprcontext */
+ 				ResetPerTupleExprContext(estate);
+ 				
+ 				/* Switch into its memory context */
+ 				MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+ 				
+ 				/* Initialize all values for row to NULL */
+ 				MemSet(values, 0, num_phys_attrs * sizeof(Datum));
+ 				MemSet(nulls, 'n', num_phys_attrs * sizeof(char));
+                 /* reset attribute pointers */
+ 				MemSet(attr_offsets, 0, num_phys_attrs * sizeof(int));
+ 				
+ 				result = NORMAL_ATTR;
+ 				
+ 				/* Actually read the line into memory here */
+ 				line_done = CopyReadLineBuffered(bytesread);
+ 				copy_lineno++;
+ 				
+ 				/* if didn't finish processing data line, skip att parsing and read more data,
+ 				 * unless there is no more data to read... (which means that the original last
+ 				 * data line is missing attrs and we want to catch that error)
+ 				 */
+ 				if (!line_done)
+ 				{
+ 					copy_lineno--;
+ 					if (!fe_eof || buf_done)
+ 						break;
+ 				}
+ 				
+ 				if (file_has_oids)
+ 				{
+ 					char	   *oid_string;
+ 					/* can't be in CSV mode here */
+ 					oid_string = CopyReadOidAttr(delim, null_print, null_print_len,
+ 												 &result, &isnull);
+ 					
+ 					if (isnull)
+ 						ereport(ERROR,
+ 						        (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 								 errmsg("null OID in COPY data")));
+ 					else
+ 					{
+ 						copy_attname = "oid";
+ 						loaded_oid = DatumGetObjectId(DirectFunctionCall1(oidin,
+ 													  CStringGetDatum(oid_string)));
+ 						if (loaded_oid == InvalidOid)
+ 							ereport(ERROR,
+ 									(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 									 errmsg("invalid OID in COPY data")));
+ 						copy_attname = NULL;
+ 					}
+ 				}
+ 				
+ 				/* parse all the attribute in the data line */
+ 				CopyReadAllAttrs(delim, null_print, null_print_len,
+ 								 nulls, attnumlist, attr_offsets, num_phys_attrs, attr);
+ 				
+ 				/*
+ 				 * Loop to read the user attributes on the line.
+ 				 */
+ 				foreach (cur, attnumlist)
+ 				{
+ 					int			attnum = lfirst_int(cur);
+ 					int			m = attnum - 1;
+ 					
+ 					if (nulls[m] == ' ')
+ 						isnull = false;
+ 					else 
+ 						isnull = true;
+ 					
+ 					/* we read an SQL NULL, no need to do anything */
+ 					if (!isnull)
+ 					{
+ 						copy_attname = NameStr(attr[m]->attname);
+ 						values[m] = FunctionCall3(&in_functions[m],
+ 												  CStringGetDatum(attr_buf.data + attr_offsets[m]),
+ 												  ObjectIdGetDatum(typioparams[m]),
+ 												  Int32GetDatum(attr[m]->atttypmod));
+ 						copy_attname = NULL;
+ 					}
+ 				}
+ 				
+ 				/*
+ 				 * Now compute and insert any defaults available for the columns
+ 				 * not provided by the input data.	Anything not processed here or
+ 				 * above will remain NULL.
+ 				 */
+ 				for (i = 0; i < num_defaults; i++)
+ 				{
+ 					values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext, &isnull, NULL);
+ 					if (!isnull)
+ 						nulls[defmap[i]] = ' ';
+ 				}
+ 				
+ 				/*
+ 				 * Next apply any domain constraints
+ 				 */
+ 				if (hasConstraints)
+ 				{
+ 					ParamExecData *prmdata = &econtext->ecxt_param_exec_vals[0];
+ 					
+ 					for (i = 0; i < num_phys_attrs; i++)
+ 					{
+ 						ExprState  *exprstate = constraintexprs[i];
+ 						
+ 						if (exprstate == NULL)
+ 							continue;	/* no constraint for this attr */
+ 						
+ 						/* Insert current row's value into the Param value */
+ 						prmdata->value = values[i];
+ 						prmdata->isnull = (nulls[i] == 'n');
+ 						
+ 						/*
+ 						 * Execute the constraint expression.  Allow the
+ 						 * expression to replace the value (consider e.g. a
+ 						 * timestamp precision restriction).
+ 						 */
+ 						values[i] = ExecEvalExpr(exprstate, econtext,
+ 												 &isnull, NULL);
+ 						nulls[i] = isnull ? 'n' : ' ';
+ 					}
+ 				}
+ 				
+ 				/*
+ 				 * And now we can form the input tuple.
+ 				 */
+ 				tuple = heap_formtuple(tupDesc, values, nulls);
+ 				
+ 				if (oids && file_has_oids)
+ 					HeapTupleSetOid(tuple, loaded_oid);
+ 				
+ 				/*
+ 				 * Triggers and stuff need to be invoked in query context.
+ 				 */
+ 				MemoryContextSwitchTo(oldcontext);
+ 				
+ 				skip_tuple = false;
+ 				
+ 				/* BEFORE ROW INSERT Triggers */
+ 				if (resultRelInfo->ri_TrigDesc &&
+ 					resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
+ 				{
+ 					HeapTuple	newtuple;
+ 					
+ 					newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
+ 					
+ 					if (newtuple == NULL)		/* "do nothing" */
+ 						skip_tuple = true;
+ 					else if (newtuple != tuple) /* modified by Trigger(s) */
+ 					{
+ 						heap_freetuple(tuple);
+ 						tuple = newtuple;
+ 					}
+ 				}
+ 				
+ 				if (!skip_tuple)
+ 				{
+ 					/* Place tuple in tuple slot */
+ 					ExecStoreTuple(tuple, slot, InvalidBuffer, false);
+ 					
+ 					/*
+ 					 * Check the constraints of the tuple
+ 					 */
+ 					if (rel->rd_att->constr)
+ 						ExecConstraints(resultRelInfo, slot, estate);
+ 					
+ 					/*
+ 					 * OK, store the tuple and create index entries for it
+ 					 */
+ 					simple_heap_insert(rel, tuple);
+ 					
+ 					if (resultRelInfo->ri_NumIndices > 0)
+ 						ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
+ 					
+ 					/* AFTER ROW INSERT Triggers */
+ 					ExecARInsertTriggers(estate, resultRelInfo, tuple);
+ 				}
+ 				
+ 				line_buf.len=0; /* we can reset line buffer now. */
+ 	            line_buf.data[0] = '\0';
+ 	            line_buf.cursor = 0;
+ 			}   /* end while(!buf_done) */
+ 		}       /* end if (bytesread > 0 || !fe_eof) */
+ 		else /* no bytes read, end of data */
+ 		{
+ 			no_more_data = TRUE;
+ 		}
+ 	}while(!no_more_data);
+ 	
+ 	/*
+ 	 * Done, clean up
+ 	 */
+ 	error_context_stack = errcontext.previous;
+ 	
+ 	MemoryContextSwitchTo(oldcontext);
+ 	
+ 	/*
+ 	 * Execute AFTER STATEMENT insertion triggers
+ 	 */
+ 	ExecASInsertTriggers(estate, resultRelInfo);
+ 	
+ 	/*
+ 	 * Handle queued AFTER triggers
+ 	 */
+ 	AfterTriggerEndQuery(estate);
+ 	
+ 	pfree(values);
+ 	pfree(nulls);
+ 
+ 	pfree(attr_offsets);
+ 
+ 	pfree(in_functions);
+ 	pfree(typioparams);
+ 	pfree(defmap);
+ 	pfree(defexprs);
+ 	pfree(constraintexprs);
+ 	pfree(force_notnull);
+ 	
+ 	ExecDropSingleTupleTableSlot(slot);
+ 	
+ 	ExecCloseIndices(resultRelInfo);
+ 	
+ 	FreeExecutorState(estate);
+ }
+ 
  
  /*
   * Read the next input line and stash it in line_buf, with conversion to
***************
*** 2264,2269 ****
--- 2778,2784 ----
  			/* transfer converted data back to line_buf */
  			line_buf.len = 0;
  			line_buf.data[0] = '\0';
+             line_buf.cursor = 0;
  			appendBinaryStringInfo(&line_buf, cvt, strlen(cvt));
  		}
  	}
***************
*** 2286,2291 ****
--- 2801,3104 ----
  		return tolower(hex) - 'a' + 10;
  }
  
+ /*
+  * Detected the eol type by looking at the first data row.
+  * Possible eol types are NL, CR, or CRNL. If eol type was
+  * detected, it is set and a boolean true is returned to
+  * indicated detection was successful. If the first data row
+  * is longer than the input buffer, we return false and will
+  * try again in the next buffer.
+  */
+ static bool 
+ DetectLineEnd(size_t bytesread)
+ {   
+ 	int			bytes_remaining = COPY_BUF_SIZE;
+ 	char	    ch_found;  /* the character found in the scan ('\n' or '\r' or escapec) */
+ 	char	   *start = input_buf;
+ 	char	   *end;
+ 	
+ 	while(true)
+ 	{
+ 		bytes_remaining = COPY_BUF_SIZE - (input_buf - start);
+ 		
+ 		if (bytes_remaining <= 0)
+ 			return false;
+ 		
+ 		if ( (end = str2chrlen(start, '\n', '\r', bytes_remaining, &ch_found )) == NULL) 
+ 		{
+ 			return false;
+ 		}
+ 		else
+ 		{
+ 			if (ch_found == '\n')
+ 			{
+ 				eol_type = EOL_NL;
+ 				eol_ch[0] = '\n';
+ 				eol_ch[1] = '\0';
+ 				
+ 				return true;
+ 			}
+ 			if (ch_found == '\r')
+ 			{
+ 				if (*(end + 1) == '\n')
+ 				{
+ 					eol_type = EOL_CRNL;
+ 					eol_ch[0] = '\r';
+ 					eol_ch[1] = '\n';
+ 				}
+ 				else
+ 				{
+ 					eol_type = EOL_CR;
+ 					eol_ch[0] = '\r';
+ 					eol_ch[1] = '\0';
+ 				}
+ 				
+ 				return true;
+ 			}
+ 		}
+ 	}
+ 	
+ }
+ 
+ 
+ /*
+  * Finds the next data line that is in the input buffer and loads it into line_buf. 
+  * Returns an indication if the line that was read is complete (if an unescaped line-end was 
+  * encountered). If we reached the end of buffer before the whole line was written into the
+  * line buffer then returns false.
+  */
+ static bool
+ CopyReadLineBuffered(size_t bytesread)
+ {
+ 	int			linesize;
+ 	bool		transcode = (client_encoding != server_encoding);
+ 	char	   *cvt;
+ 	bool	end_marker;
+ 	
+ 	/* mark that encoding conversion hasn't occurred yet */
+ 	line_buf_converted = false;
+ 
+ 	/* 
+ 	 * Detect end of line type if not already detected.
+ 	 */
+ 	if (eol_type == EOL_UNKNOWN)
+ 	{
+ 		bool	eol_detected = DetectLineEnd(bytesread);
+ 		
+ 		if (!eol_detected)
+ 		{
+ 			/* load entire input buffer into line buf, and quit */
+             appendBinaryStringInfo(&line_buf,input_buf,COPY_BUF_SIZE);
+ 			line_done = false;
+ 			buf_done = true;
+ 			
+ 			return line_done;			
+ 		}
+ 	}
+ 	
+ 	/*
+ 	 * Special case: eol is CRNL, last byte of previous buffer was an 
+ 	 * unescaped CR and 1st byte of current buffer is NL. We check for 
+ 	 * that here.
+ 	 */
+ 	if (eol_type == EOL_CRNL)
+ 	{
+ 		/* if we started scanning from the 1st byte of the buffer */
+ 		if (begloc == input_buf)
+ 		{
+ 			/* and had a CR in last byte of prev buf */
+ 			if (cr_in_prevbuf)
+ 			{
+ 				/* if this 1st byte in buffer is 2nd byte of line end sequence (linefeed) */
+ 				if (*begloc == eol_ch[1])
+ 				{
+ 					/* load that one linefeed byte and indicate we are done with the data line */
+                     appendBinaryStringInfo(&line_buf,begloc,1);
+ 					buffer_index++;
+ 					begloc++;
+ 					
+ 					line_done = true;
+ 					esc_in_prevbuf = false;
+ 					cr_in_prevbuf = false;
+ 					
+ 					return line_done;
+ 				}
+ 			}
+ 			
+ 			cr_in_prevbuf = false;
+ 		}
+ 	}
+ 
+     /* (we need a loop so that if eol_ch is found, but prev ch is backslash, we can search
+        for the next eol_ch) */
+ 	while (true)
+ 	{
+         /* reached end of buffer */
+         if ( (endloc = strchrlen(begloc, eol_ch[0], bytesread - buffer_index)) == NULL )
+ 		{
+ 			linesize = COPY_BUF_SIZE - (begloc - input_buf);
+             appendBinaryStringInfo(&line_buf,begloc,linesize);
+ 			
+ 			if (line_buf.len > 1)
+ 			{
+ 				char *last_ch = line_buf.data + line_buf.len - 1; /* before terminating \0 */
+ 				if ( *last_ch == escapec )
+ 				{
+ 					esc_in_prevbuf = true;
+ 					
+ 					if (line_buf.len > 2)
+ 					{
+ 						last_ch--;
+ 						if (*last_ch == escapec)
+ 							esc_in_prevbuf = false;
+ 					}
+ 				}
+ 				else if ( *last_ch == '\r' )
+ 				{
+ 					if (eol_type == EOL_CRNL)
+ 						cr_in_prevbuf = true;
+ 				}
+ 			}
+ 			
+ 			line_done = false;
+ 			buf_done = true;
+ 			break;
+ 		}
+ 		else /* found the 1st eol ch in input_buf. */
+ 		{
+ 			bool	eol_found = true; 
+ 			bool	eol_escaped = true;
+ 			/*
+ 			 * Load that piece of data (potentially a data line) into the line buffer,
+ 			 * and update the pointers for the next scan.
+ 			 */
+ 			linesize = endloc - begloc + 1;
+             appendBinaryStringInfo(&line_buf,begloc,linesize);
+ 			buffer_index += linesize;
+ 			begloc = endloc + 1;				
+ 			
+ 			if (eol_type == EOL_CRNL)
+ 			{
+ 				/* check if there is a '\n' after the '\r' */
+ 				if (*(endloc + 1) == '\n')
+ 				{
+ 					/* this is a line end */
+                     appendBinaryStringInfo(&line_buf,begloc,1); /* load that '\n' */
+ 					buffer_index++;
+ 					begloc++;
+ 				}
+ 				else /* just a CR, not a line end */ 
+ 					eol_found = false;
+ 			}
+ 			
+ 			/*
+ 			 * in some cases, this end of line char happens to be the 
+ 			 * last character in the buffer. we need to catch that.
+ 			 */
+ 			if (buffer_index >= bytesread)
+ 				buf_done = true;
+ 
+ 			/*
+ 			 * Check if the 1st end of line ch is escaped.
+ 			 */
+ 			if (endloc != input_buf) /* can we look 1 char back? */
+ 			{
+ 				if (*(endloc - 1) != escapec) /* prev char is not an escape */
+ 					eol_escaped = false;
+ 				else /* prev char is an escape */
+ 				{
+ 					if (endloc != (input_buf + 1)) /* can we look another char back? */
+ 					{
+                         /* it's a double escape char, so it's not an escape */
+ 						if (*(endloc - 2) == escapec)
+ 							eol_escaped = false;
+ 						/* else it's a single escape char, so EOL is ascaped */
+ 					}
+ 				    else
+ 					{
+ 						/* we need to check in the last buffer */
+ 						if (esc_in_prevbuf) /* double escape char, so not an escape */
+ 							eol_escaped = false;
+ 					}
+ 				}
+ 			}
+ 			else /* this eol ch is first ch in buffer, check for escape in prev buf */
+ 			{
+ 				if (!esc_in_prevbuf)
+ 					eol_escaped = false;
+ 			}
+ 			
+ 			esc_in_prevbuf = false; /* reset variable */
+ 
+ 			/*
+ 			 * if eol was found, and it isn't escaped, line is done
+ 			 */
+ 			if ((eol_escaped == false) && eol_found)
+ 			{ 
+ 				line_done = true;
+ 				break;
+ 			}
+ 			else /* stay in the loop and process some more data. */
+ 				line_done = false;
+ 
+ 		} /* end of found eol_ch */
+ 	}
+     
+ 	/*
+ 	 * Done reading the line. Convert it to server encoding.
+ 	 */
+ 	if (transcode)
+ 	{
+ 		cvt = (char *) pg_client_to_server((unsigned char *) line_buf.data,
+ 										   line_buf.len);
+ 		if (cvt != line_buf.data)
+ 		{
+ 			/* transfer converted data back to line_buf */
+             line_buf.len=0;
+ 	        line_buf.data[0] = '\0';
+ 	        line_buf.cursor = 0;
+             appendBinaryStringInfo(&line_buf,cvt,strlen(cvt));
+ 		}
+ 	}
+ 
+     /* indicate that conversion had occured */
+ 	line_buf_converted = true;
+ 
+     /*
+ 	 * check if this line is an end marker -- "\."
+ 	 */
+ 	end_marker = false;
+ 
+     switch(eol_type)
+ 	{
+ 		case EOL_NL:
+ 			if (!strcmp(line_buf.data,"\\.\n"))
+ 				end_marker = true;
+ 			break;
+ 		case EOL_CR:
+ 			if (!strcmp(line_buf.data,"\\.\r"))
+ 				end_marker = true;
+ 			break;
+ 		case EOL_CRNL:
+ 			if (!strcmp(line_buf.data,"\\.\r\n"))
+ 				end_marker = true;			
+ 			break;
+ 		case EOL_UNKNOWN:
+ 			break;
+ 	}
+ 
+ 	if (end_marker)
+     {
+  	    fe_eof = true;
+         /* we don't want to process a \. as data line, want to quit. */
+ 	    line_done = false;
+ 	    buf_done = true;
+     }
+ 
+ 	return line_done;
+ }
+ 
+ 
  /*----------
   * Read the value of a single attribute, performing de-escaping as needed.
   *
***************
*** 2314,2322 ****
  	int			end_cursor;
  	int			input_len;
  
! 	/* reset attribute_buf to empty */
! 	attribute_buf.len = 0;
! 	attribute_buf.data[0] = '\0';
  
  	/* set default status */
  	*result = END_OF_LINE;
--- 3127,3136 ----
  	int			end_cursor;
  	int			input_len;
  
! 	/* reset attr_buf to empty */
! 	attr_buf.len = 0;
! 	attr_buf.data[0] = '\0';
! 	attr_buf.cursor = 0;
  
  	/* set default status */
  	*result = END_OF_LINE;
***************
*** 2422,2428 ****
  					 */
  			}
  		}
! 		appendStringInfoCharMacro(&attribute_buf, c);
  	}
  
  	/* check whether raw input matched null marker */
--- 3236,3242 ----
  					 */
  			}
  		}
! 		appendStringInfoCharMacro(&attr_buf, c);
  	}
  
  	/* check whether raw input matched null marker */
***************
*** 2433,2439 ****
  	else
  		*isnull = false;
  
! 	return attribute_buf.data;
  }
  
  
--- 3247,3597 ----
  	else
  		*isnull = false;
  
! 	return attr_buf.data;
! }
! 
! /*
!  * Read the first attribute. This is mainly used to maintain support
!  * for an OID column. All the rest of the columns will be read at once with
!  * CopyReadAllAttrs().
!  */
! static char *
! CopyReadOidAttr(const char *delim, const char *null_print, int null_print_len,
! 				CopyReadResult *result, bool *isnull)
! {
! 	char		delimc = delim[0];
! 	char		*start_loc = line_buf.data + line_buf.cursor;
! 	char		*end_loc;
! 	int			attr_len = 0;
! 	int bytes_remaining;
! 	
! 	/* reset attribute buf to empty */
! 	attr_buf.len = 0;
! 	attr_buf.data[0] = '\0';
! 	attr_buf.cursor = 0;
! 	
! 	/* set default status */
! 	*result = END_OF_LINE;
! 	
!     /* # of bytes that were not yet processed in this line */
! 	bytes_remaining = line_buf.len - line_buf.cursor;
! 	
!     /* got to end of line */
! 	if ( (end_loc = strchrlen(start_loc, delimc, bytes_remaining )) == NULL)
! 	{
! 		attr_len = bytes_remaining - 1; /* don't count '\n' in len calculation */
!         appendBinaryStringInfo(&attr_buf,start_loc,attr_len);
! 		line_buf.cursor += attr_len + 2; /* skip '\n' and '\0' */
! 		
! 		*result = END_OF_LINE;
! 	}
! 	else /* found a delimiter */
! 	{
! 		/* (we don't care if delim was preceded with a backslash, because it's an
!            invalid OID anyway) */
! 		
! 		attr_len = end_loc - start_loc; /* we don't include the delimiter ch */
! 		
!         appendBinaryStringInfo(&attr_buf,start_loc,attr_len);
! 		line_buf.cursor += attr_len + 1;
! 		
! 		*result = NORMAL_ATTR;
! 	}
! 
! 	
! 	/* check whether raw input matched null marker */
! 	if (attr_len == null_print_len && strncmp(start_loc, null_print, attr_len) == 0)
! 		*isnull = true;
! 	else
! 		*isnull = false;
! 	
! 	return attr_buf.data;
! }
! 
! 
! /*
!  * Read all attributes. Attributes are parsed from line_buf and
!  * inserted (all at once) to attr_buf, while saving pointers to 
!  * each attribute's starting position.
!  *
!  * When this routine finishes execution both the nulls array and
!  * the attr_offsets array are updated. The attr_offsets will include
!  * the offset from the beginning of the attribute array of which 
!  * each attribute begins. If a specific attribute is not used for this 
!  * COPY command (ommitted from the column list), a value of 0 will be assigned.
!  * For example: for table foo(a,b,c,d,e) and COPY foo(a,b,e) 
!  * attr_offsets may look something like this after this routine
!  * returns: [0,20,0,0,55]. That means that column "a" value starts
!  * at byte offset 0, "b" in 20 and "e" in 55, in attr_buf.
!  *
!  * In the attribute buffer (attr_buf) each attribute
!  * is terminated with a '\0', and therefore by using the attr_offsets
!  * array we could point to a beginning of an attribute and have it 
!  * behave as a C string, much like previously done in COPY.
!  *
!  * Another aspect to improving performance is reducing the frequency
!  * of data load into buffers. The original COPY read attribute code
!  * loaded a character at a time. In here we try to load a chunk of data
!  * at a time. Usually a chunk will include a full data row
!  * (unless we have an escaped delim). That effectively reduces the number of
!  * loads by a factor of number of bytes per row. This improves performance
!  * greatly, unfortunately it add more complexity to the code.
!  *
!  * Global participants in parsing logic:
!  *
!  * line_buf.cursor -- an offset from beginning of the line buffer
!  * that indicates where we are about to begin the next scan. Note that
!  * if we have WITH OIDS this cursor is already shifted past the first
!  * OID attribute.
!  *
!  * attr_buf.cursor -- an offset from the beginning of the 
!  * attribute buffer that indicates where the current attribute begins.
!  */
! static void
! CopyReadAllAttrs(const char *delim, const char *null_print, int null_print_len,
! 				 char *nulls, List *attnumlist , int *attr_offsets,
!                  int num_phys_attrs, Form_pg_attribute *attr)
! {
! 	char		delimc = delim[0];     /* delimiter character */
! 	char		*scan_start;           /* pointer to line buffer where scan should start. */ 
! 	char		*scan_end;             /* pointer to line buffer where char was found */
! 	char	    ch_found;       	   /* the character found in the scan (delimc or escape) */
! 	int         attr_pre_len;          /* current attr raw len, before processing escapes */
!     int			attr_post_len;         /* current attr len after escaping */
! 	int		    m;          		   /* attribute index being parsed */
! 	int		    bytes_remaining;	   /* num of bytes remaining to be scanned in line buf */
! 	int			chunk_start;           /* offset to beginning of line chunk to load */
! 	int			chunk_len;			   /* length of chunk of data to load to attr buf */
! 	int			oct_val;               /* byte value for octal escapes */
! 	int		    attnum;                /* attribute number being parsed */
! 	ListCell    *cur;                  /* cursor to attribute list used for this COPY */
! 	int attribute;
! 
! 	/* 
! 	 * init variables for attribute scan 
! 	 */
! 	attr_buf.len = 0;
! 	attr_buf.data[0] = '\0';
! 	attr_buf.cursor = 0;
!     /* cursor is now > 0 if we copy WITH OIDS */
! 	scan_start = line_buf.data + line_buf.cursor;
! 	cur = list_head(attnumlist);
! 	attnum = lfirst_int(cur);
! 	m = attnum - 1;
! 	chunk_start = line_buf.cursor;
! 	chunk_len = 0;
! 	attr_pre_len = 0;
! 	attr_post_len = 0;
! 	
!    /*
! 	* Scan through the line buffer to read all attributes data 
! 	*/
! 	while(line_buf.cursor < line_buf.len)
! 	{
! 		bytes_remaining = line_buf.len - line_buf.cursor;
! 	    ch_found = '\0';		
! 		
! 		if ( (scan_end = str2chrlen(scan_start, delimc, escapec, bytes_remaining, &ch_found ))
!              == NULL) 
! 		{	
! 			/* GOT TO END OF LINE BUFFER */ 
! 					
! 			if (cur == NULL)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("extra data after last expected column")));
! 			
! 			attnum = lfirst_int(cur);
! 			m = attnum - 1;		
! 			
! 			/* don't count eol char(s) in attr and chunk len calculation */
! 			if (eol_type == EOL_CRNL)
! 			{
! 				attr_pre_len += bytes_remaining - 2;
! 				chunk_len = line_buf.len - chunk_start - 2;
! 			}
! 			else
! 			{
! 				attr_pre_len += bytes_remaining - 1; 
! 				chunk_len = line_buf.len - chunk_start - 1;
! 			}
! 			
! 			/* check if this is a NULL value or data value (assumed NULL) */
! 			if (attr_pre_len == null_print_len 
!                 &&
!                 strncmp(line_buf.data + line_buf.len - attr_pre_len - 1, null_print, attr_pre_len)
!                 == 0)
! 				nulls[m] = 'n';
! 			else 
! 				nulls[m] = ' '; 
! 			
! 			attr_offsets[m] = attr_buf.cursor;
! 			
! 			 
! 			/* load the last chunk, the whole buffer in most cases */
!             appendBinaryStringInfo(&attr_buf,line_buf.data + chunk_start,chunk_len);
! 			
! 			line_buf.cursor += attr_pre_len + 2; /* skip eol char and '\0' to exit loop */
! 			
! 			if (lnext(cur) != NULL)
! 			{
! 				/*
! 				 * For an empty data line, the previous COPY code will 
! 				 * fail it during the conversion stage. We can fail it here
! 				 * already, but then we will fail the regression tests b/c
! 				 * of a different error message. that's why we return so we
! 				 * can get the same error message that regress expects. ahh... 
! 				 * this conditional is unnecessary and should be removed soon.
! 				 */
! 				if (line_buf.len > 1)
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("missing data for column \"%s\"",
! 									NameStr(attr[m + 1]->attname))));
! 				else 
! 					return; 
! 			}
! 		}
! 		else /* FOUND A DELIMITER OR ESCAPE */ 
! 		{
! 			if (cur == NULL)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("extra data after last expected column")));
! 				
! 			if (ch_found == delimc) /* found a delimiter */
! 			{
! 				attnum = lfirst_int(cur);
! 				m = attnum - 1;		
! 				
!                 /* (we don't include the delimiter ch in length) */
! 				attr_pre_len += scan_end - scan_start;
!                 /* (we don't include the delimiter ch in length) */
! 				attr_post_len += scan_end - scan_start;
! 				
! 				/* check if this is a null print or data (assumed NULL) */
! 				if (attr_pre_len == null_print_len
!                     &&
!                     strncmp(scan_end - attr_pre_len, null_print, attr_pre_len)
!                     == 0)
! 					nulls[m] = 'n'; 
! 				else 
! 					nulls[m] = ' '; 
! 				
!                 /* set the pointer to next attribute position */
!                 attr_offsets[m] = attr_buf.cursor;
! 				
! 				/* update buffer cursors to our current location, +1 to skip the delimc */
! 				line_buf.cursor = scan_end - line_buf.data + 1;
! 				attr_buf.cursor += attr_post_len + 1;
! 								
! 				/* prepare scan for next attr */
! 				scan_start = line_buf.data + line_buf.cursor; 
! 				cur = lnext(cur);
! 				attr_pre_len = 0;
! 				attr_post_len = 0;
! 			}
! 			else /* found an escape character */
! 			{
! 				char nextc = *(scan_end + 1);
! 				char newc;
! 				int  skip = 2;
! 				
! 				chunk_len = (scan_end - line_buf.data) - chunk_start + 1;
! 				
! 				/* load a chunk of data */
!                 appendBinaryStringInfo(&attr_buf,line_buf.data + chunk_start,chunk_len);
! 				
! 				switch(nextc)
! 				{
! 					case '0':
! 					case '1':
! 					case '2':
! 					case '3':
! 					case '4':
! 					case '5':
! 					case '6':
! 					case '7':
! 						oct_val = OCTVALUE(nextc);
! 						nextc = *(scan_end + 2);
!                         /* (no need for out bad access check since line if buffered) */
! 						if (ISOCTAL(nextc))
! 						{
! 							skip++;
! 							oct_val = (oct_val << 3) + OCTVALUE(nextc);
! 							nextc = *(scan_end + 3);
! 							if (ISOCTAL(nextc))
! 							{
! 								skip++;
! 								oct_val = (oct_val << 3) + OCTVALUE(nextc);
! 							}
! 						}
! 						newc = oct_val & 0377; /* the escaped byte value */
! 						break;
! 					case 'b':
! 						newc = '\b';
! 						break;
! 					case 'f':
! 						newc = '\f';
! 						break;
! 					case 'n':
! 						newc = '\n';
! 						break;
! 					case 'r':
! 						newc = '\r';
! 						break;
! 					case 't':
! 						newc = '\t';
! 						break;
! 					case 'v':
! 						newc = '\v';
! 						break;
! 					default:
! 						if (nextc == delimc)
! 							newc = delimc;
! 						else if (nextc == escapec)
! 							newc = escapec;
! 						else /* no escape sequence, take next char literaly */
! 							newc = nextc;
! 						break;
! 				}
! 				
!                 /* update to current length, add escape and escaped chars  */
! 				attr_pre_len += scan_end - scan_start + 2;
!                 /* update to current length, escaped char */
!                 attr_post_len += scan_end - scan_start + 1;
! 				
! 				/* 
! 				 * Need to get rid of the escape character. This is done by 
! 				 * loading the chunk up to including the escape character
! 				 * into the attribute buffer. Then overwritting the backslash 
! 				 * with the escaped sequence or char, and continuing to scan 
! 				 * from *after* the char than is after the escape in line buf.
! 				 */
! 				*(attr_buf.data + attr_buf.len - 1) = newc;
! 				line_buf.cursor = scan_end - line_buf.data + skip; 
! 				scan_start = scan_end + skip;  
! 				chunk_start = line_buf.cursor;
! 				chunk_len = 0;
! 			}
! 			
! 		} /* end delimiter/backslash */
! 
! 	} /* end line buffer scan. */
! 
!     /* 
! 	 * Replace all delimiters with NULL for string termination.
! 	 * NOTE: only delimiters (NOT necessarily all delimc) are replaced.
! 	 * Example (delimc = '|'):
! 	 * - Before:  f  1  |  f  \|  2  |  f  3  
! 	 * - After :  f  1 \0  f   |  2 \0  f  3
! 	 */
! 	for (attribute = 1; attribute < num_phys_attrs ; attribute++)
! 	{
! 		if (attr_offsets[attribute] != 0)
! 			*(attr_buf.data + attr_offsets[attribute] - 1) = '\0';
! 	}
! 	
  }
  
  
***************
*** 2477,2485 ****
  	bool		in_quote = false;
  	bool		saw_quote = false;
  
! 	/* reset attribute_buf to empty */
! 	attribute_buf.len = 0;
! 	attribute_buf.data[0] = '\0';
  
  	/* set default status */
  	*result = END_OF_LINE;
--- 3635,3644 ----
  	bool		in_quote = false;
  	bool		saw_quote = false;
  
! 	/* reset attr_buf to empty */
! 	attr_buf.len = 0;
! 	attr_buf.data[0] = '\0';
! 	attr_buf.cursor = 0;
  
  	/* set default status */
  	*result = END_OF_LINE;
***************
*** 2519,2525 ****
  
  				if (nextc == escapec || nextc == quotec)
  				{
! 					appendStringInfoCharMacro(&attribute_buf, nextc);
  					line_buf.cursor++;
  					continue;
  				}
--- 3678,3684 ----
  
  				if (nextc == escapec || nextc == quotec)
  				{
! 					appendStringInfoCharMacro(&attr_buf, nextc);
  					line_buf.cursor++;
  					continue;
  				}
***************
*** 2536,2542 ****
  			in_quote = false;
  			continue;
  		}
! 		appendStringInfoCharMacro(&attribute_buf, c);
  	}
  
  	if (in_quote)
--- 3695,3701 ----
  			in_quote = false;
  			continue;
  		}
! 		appendStringInfoCharMacro(&attr_buf, c);
  	}
  
  	if (in_quote)
***************
*** 2550,2556 ****
  	else
  		*isnull = false;
  
! 	return attribute_buf.data;
  }
  
  /*
--- 3709,3715 ----
  	else
  		*isnull = false;
  
! 	return attr_buf.data;
  }
  
  /*
***************
*** 2578,2606 ****
  				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
  				 errmsg("invalid field size")));
  
! 	/* reset attribute_buf to empty, and load raw data in it */
! 	attribute_buf.len = 0;
! 	attribute_buf.data[0] = '\0';
! 	attribute_buf.cursor = 0;
  
! 	enlargeStringInfo(&attribute_buf, fld_size);
  
! 	CopyGetData(attribute_buf.data, fld_size);
  	if (CopyGetEof())
  		ereport(ERROR,
  				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
  				 errmsg("unexpected EOF in COPY data")));
  
! 	attribute_buf.len = fld_size;
! 	attribute_buf.data[fld_size] = '\0';
  
  	/* Call the column type's binary input converter */
  	result = FunctionCall2(flinfo,
! 						   PointerGetDatum(&attribute_buf),
  						   ObjectIdGetDatum(typioparam));
  
  	/* Trouble if it didn't eat the whole buffer */
! 	if (attribute_buf.cursor != attribute_buf.len)
  		ereport(ERROR,
  				(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
  				 errmsg("incorrect binary data format")));
--- 3737,3765 ----
  				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
  				 errmsg("invalid field size")));
  
! 	/* reset attr_buf to empty, and load raw data in it */
! 	attr_buf.len = 0;
! 	attr_buf.data[0] = '\0';
! 	attr_buf.cursor = 0;
  
! 	enlargeStringInfo(&attr_buf, fld_size);
  
! 	CopyGetData(attr_buf.data, fld_size);
  	if (CopyGetEof())
  		ereport(ERROR,
  				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
  				 errmsg("unexpected EOF in COPY data")));
  
! 	attr_buf.len = fld_size;
! 	attr_buf.data[fld_size] = '\0';
  
  	/* Call the column type's binary input converter */
  	result = FunctionCall2(flinfo,
! 						   PointerGetDatum(&attr_buf),
  						   ObjectIdGetDatum(typioparam));
  
  	/* Trouble if it didn't eat the whole buffer */
! 	if (attr_buf.cursor != attr_buf.len)
  		ereport(ERROR,
  				(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
  				 errmsg("incorrect binary data format")));
***************
*** 2777,2783 ****
  		/* Validate the user-supplied list and extract attnums */
  		ListCell   *l;
  
! 		foreach(l, attnamelist)
  		{
  			char	   *name = strVal(lfirst(l));
  			int			attnum;
--- 3936,3942 ----
  		/* Validate the user-supplied list and extract attnums */
  		ListCell   *l;
  
! 		foreach (l, attnamelist)
  		{
  			char	   *name = strVal(lfirst(l));
  			int			attnum;
***************
*** 2797,2799 ****
--- 3956,4017 ----
  
  	return attnums;
  }
+ 
+ /******************************************************
+  * utility functions, should be placed elsewhere in src
+  ******************************************************/
+ /*
+  * These are custom versions of the string function strchr(). 
+  * As opposed to the original strchr which searches through
+  * a string until the target character is found, or a NULL is 
+  * found, this version will not return when a NULL is found.
+  * Instead it will search through a pre-defined length of 
+  * bytes and will return only if the target character(s) is reached.
+  * 
+  * If our client encoding is not a supported server encoding, we 
+  * know that it is not safe to look at each character as trailing
+  * byte in a multibyte character may be a 7-bit ASCII equivalent.
+  * Therefore we use pg_encoding_mblen to skip to the end of the
+  * character.
+  *
+  * parameters:
+  *   s    - string being searched.
+  *   c(n) - char we are searching for.
+  *   len  - maximum # of bytes to search.
+  *  
+  * returns:
+  *   pointer to c - if c is located within the string.
+  *   NULL - if c was not found in specified length of search. Note: 
+  *			this DOESN'T mean that a '\0' was reached.
+  */
+ char *strchrlen(const char *s, int c, size_t len)
+ {
+ 	const char *start;
+ 			
+ 	if (client_encoding_only)
+ 	{
+ 		int		mblen = pg_encoding_mblen(client_encoding, s);
+ 		
+ 		for (start = s; *s != c && s < start + len ; s += mblen)
+ 			mblen = pg_encoding_mblen(client_encoding, s);
+ 	}
+ 	else /* safe to scroll byte by byte */
+ 	{
+ 		for (start = s; *s != c && s < start + len ; s++)
+ 			;
+ 	}
+ 	
+     return ( (*s == c) ? (char *) s : NULL );
+ }
+ 
+ char *str2chrlen(const char *s, int c1, int c2, size_t len, char *c_found)
+ {
+ 	const char *start;
+ 	*c_found = '\0';
+ 	
+ 	for (start = s; *s != c1 && *s != c2 && s < start + len ; s++)
+ 		;
+ 	
+ 	*c_found = *s; 
+     return ( *c_found != '\0' ? (char *) s : NULL );
+ }