88 * 
99 * 
1010 * IDENTIFICATION 
11-  *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.236 2004/12/31 21:59:41 pgsql  Exp $ 
11+  *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.237 2005/03/12 05:41:34 momjian  Exp $ 
1212 * 
1313 *------------------------------------------------------------------------- 
1414 */ 
@@ -98,7 +98,6 @@ static bool fe_eof;				/* true if detected end of copy data */
9898static  EolType  eol_type ;		/* EOL type of input */ 
9999static  int 	client_encoding ;	/* remote side's character encoding */ 
100100static  int 	server_encoding ;	/* local encoding */ 
101- static  bool  embedded_line_warning ;
102101
103102/* these are just for error messages, see copy_in_error_callback */ 
104103static  bool  copy_binary ;		/* is it a binary copy? */ 
@@ -139,7 +138,7 @@ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
139138static  void  CopyFrom (Relation  rel , List  * attnumlist , bool  binary , bool  oids ,
140139 char  * delim , char  * null_print , bool  csv_mode , char  * quote , char  * escape ,
141140		 List  * force_notnull_atts );
142- static  bool  CopyReadLine (void );
141+ static  bool  CopyReadLine (char   *   quote ,  char   *   escape );
143142static  char  * CopyReadAttribute (const  char  * delim , const  char  * null_print ,
144143				  CopyReadResult  * result , bool  * isnull );
145144static  char  * CopyReadAttributeCSV (const  char  * delim , const  char  * null_print ,
@@ -1191,7 +1190,6 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
11911190	attr  =  tupDesc -> attrs ;
11921191	num_phys_attrs  =  tupDesc -> natts ;
11931192	attr_count  =  list_length (attnumlist );
1194- 	embedded_line_warning  =  false;
11951193
11961194	/* 
11971195	 * Get info about the columns we need to process. 
@@ -1718,7 +1716,8 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
17181716			ListCell    * cur ;
17191717
17201718			/* Actually read the line into memory here */ 
1721- 			done  =  CopyReadLine ();
1719+ 			done  =  csv_mode  ? 
1720+ 				CopyReadLine (quote , escape ) : CopyReadLine (NULL , NULL );
17221721
17231722			/* 
17241723			 * EOF at start of line means we're done.  If we see EOF after 
@@ -2006,7 +2005,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
20062005 * by newline. 
20072006 */ 
20082007static  bool 
2009- CopyReadLine (void )
2008+ CopyReadLine (char   *   quote ,  char   *   escape )
20102009{
20112010	bool 		result ;
20122011	bool 		change_encoding  =  (client_encoding  !=  server_encoding );
@@ -2015,6 +2014,19 @@ CopyReadLine(void)
20152014	int 			j ;
20162015	unsigned char   s [2 ];
20172016	char 	   * cvt ;
2017+ 	bool         in_quote  =  false, last_was_esc  =  false, csv_mode  =  false;
2018+ 	char         quotec  =  '\0' , escapec  =  '\0' ;
2019+ 
2020+ 	if  (quote )
2021+ 	{
2022+ 		csv_mode  =  true;
2023+ 		quotec  =  quote [0 ];
2024+ 		escapec  =  escape [0 ];
2025+ 		/* ignore special escape processing if it's the same as quotec */ 
2026+ 		if  (quotec  ==  escapec )
2027+ 			escapec  =  '\0' ;
2028+ 	}
2029+ 
20182030
20192031	s [1 ] =  0 ;
20202032
@@ -2031,11 +2043,20 @@ CopyReadLine(void)
20312043
20322044	/* 
20332045	 * In this loop we only care for detecting newlines (\r and/or \n) and 
2034- 	 * the end-of-copy marker (\.).  For backwards compatibility we allow 
2046+ 	 * the end-of-copy marker (\.).   
2047+ 	 * 
2048+ 	 * In Text mode, for backwards compatibility we allow 
20352049	 * backslashes to escape newline characters.  Backslashes other than 
20362050	 * the end marker get put into the line_buf, since CopyReadAttribute 
2037- 	 * does its own escape processing.	These four characters, and only 
2038- 	 * these four, are assumed the same in frontend and backend encodings. 
2051+ 	 * does its own escape processing.	 
2052+ 	 * 
2053+ 	 * In CSV mode, CR and NL inside q quoted field are just part of the 
2054+ 	 * data value and are put in line_buf. We keep just enough state 
2055+ 	 * to know if we are currently in a quoted field or not. 
2056+ 	 * 
2057+ 	 * These four characters, and only these four, are assumed the same in  
2058+ 	 * frontend and backend encodings. 
2059+ 	 * 
20392060	 * We do not assume that second and later bytes of a frontend 
20402061	 * multibyte character couldn't look like ASCII characters. 
20412062	 */ 
@@ -2047,13 +2068,49 @@ CopyReadLine(void)
20472068			result  =  true;
20482069			break ;
20492070		}
2050- 		if  (c  ==  '\r' )
2071+ 
2072+ 		if  (csv_mode )
2073+ 		{
2074+ 			/*   
2075+ 			 * Dealing with quotes and escapes here is mildly tricky. If the 
2076+ 			 * quote char is also the escape char, there's no problem - we   
2077+ 			 * just use the char as a toggle. If they are different, we need 
2078+ 			 * to ensure that we only take account of an escape inside a quoted 
2079+ 			 * field and immediately preceding a quote char, and not the 
2080+ 			 * second in a escape-escape sequence. 
2081+ 			 */  
2082+ 
2083+ 			if  (in_quote  &&  c  ==  escapec )
2084+ 				last_was_esc  =  ! last_was_esc ;
2085+ 			if  (c  ==  quotec  &&  ! last_was_esc )
2086+ 				in_quote  =  ! in_quote ;
2087+ 			if  (c  !=  escapec )
2088+ 				last_was_esc  =  false;
2089+ 
2090+ 			/* 
2091+ 			 * updating the line count for embedded CR and/or LF chars is  
2092+ 			 * necessarily a little fragile - this test is probably about  
2093+ 			 * the best we can do. 
2094+ 			 */  
2095+ 			if  (in_quote  &&  c  ==  (eol_type  ==  EOL_CR  ? '\r'  : '\n' )) 
2096+ 				copy_lineno ++ ; 
2097+ 		}
2098+ 
2099+ 		if  (!in_quote  &&  c  ==  '\r' )
20512100		{
20522101			if  (eol_type  ==  EOL_NL )
2053- 				ereport (ERROR ,
2054- 						(errcode (ERRCODE_BAD_COPY_FILE_FORMAT ),
2055- 						 errmsg ("literal carriage return found in data" ),
2056- 				  errhint ("Use \"\\r\" to represent carriage return." )));
2102+ 			{
2103+ 				if  (! csv_mode )
2104+ 					ereport (ERROR ,
2105+ 							(errcode (ERRCODE_BAD_COPY_FILE_FORMAT ),
2106+ 							 errmsg ("literal carriage return found in data" ),
2107+ 							 errhint ("Use \"\\r\" to represent carriage return." )));
2108+ 				else 
2109+ 					ereport (ERROR ,
2110+ 							(errcode (ERRCODE_BAD_COPY_FILE_FORMAT ),
2111+ 							 errmsg ("unquoted carriage return found in CSV data" ),
2112+ 							 errhint ("Use quoted CSV field to represent carriage return." )));
2113+ 			}
20572114			/* Check for \r\n on first line, _and_ handle \r\n. */ 
20582115			if  (eol_type  ==  EOL_UNKNOWN  ||  eol_type  ==  EOL_CRNL )
20592116			{
@@ -2068,10 +2125,19 @@ CopyReadLine(void)
20682125				{
20692126					/* found \r, but no \n */ 
20702127					if  (eol_type  ==  EOL_CRNL )
2071- 						ereport (ERROR ,
2072- 								(errcode (ERRCODE_BAD_COPY_FILE_FORMAT ),
2073- 						 errmsg ("literal carriage return found in data" ),
2074- 								 errhint ("Use \"\\r\" to represent carriage return." )));
2128+ 					{
2129+ 						if  (!csv_mode )
2130+ 							ereport (ERROR ,
2131+ 									(errcode (ERRCODE_BAD_COPY_FILE_FORMAT ),
2132+ 									 errmsg ("literal carriage return found in data" ),
2133+ 									 errhint ("Use \"\\r\" to represent carriage return." )));
2134+ 						else 
2135+ 							ereport (ERROR ,
2136+ 									(errcode (ERRCODE_BAD_COPY_FILE_FORMAT ),
2137+ 									 errmsg ("unquoted carriage return found in data" ),
2138+ 									 errhint ("Use quoted CSV field to represent carriage return." )));
2139+ 
2140+ 					}
20752141
20762142					/* 
20772143					 * if we got here, it is the first line and we didn't 
@@ -2083,26 +2149,47 @@ CopyReadLine(void)
20832149			}
20842150			break ;
20852151		}
2086- 		if  (c  ==  '\n' )
2152+ 		if  (! in_quote   &&   c  ==  '\n' )
20872153		{
20882154			if  (eol_type  ==  EOL_CR  ||  eol_type  ==  EOL_CRNL )
2089- 				ereport (ERROR ,
2090- 						(errcode (ERRCODE_BAD_COPY_FILE_FORMAT ),
2091- 						 errmsg ("literal newline found in data" ),
2092- 						 errhint ("Use \"\\n\" to represent newline." )));
2155+ 			{
2156+ 				if  (!csv_mode )
2157+ 					ereport (ERROR ,
2158+ 							(errcode (ERRCODE_BAD_COPY_FILE_FORMAT ),
2159+ 							 errmsg ("literal newline found in data" ),
2160+ 							 errhint ("Use \"\\n\" to represent newline." )));
2161+ 				else 
2162+ 					ereport (ERROR ,
2163+ 							(errcode (ERRCODE_BAD_COPY_FILE_FORMAT ),
2164+ 							 errmsg ("unquoted newline found in data" ),
2165+ 							 errhint ("Use quoted CSV field to represent newline." )));
2166+ 					
2167+ 			}
20932168			eol_type  =  EOL_NL ;
20942169			break ;
20952170		}
2096- 		if  (c  ==  '\\' )
2171+ 
2172+ 		if  ((line_buf .len  ==  0  ||  !csv_mode ) &&  c  ==  '\\' )
20972173		{
2098- 			c  =  CopyGetChar ();
2099- 			if  (c  ==  EOF )
2174+ 			int  c2 ;
2175+ 			
2176+ 			if  (csv_mode )
2177+ 				c2  =  CopyPeekChar ();
2178+ 			else 
2179+ 				c2  =  c  =  CopyGetChar ();
2180+ 
2181+ 			if  (c2  ==  EOF )
21002182			{
21012183				result  =  true;
2184+ 				if  (csv_mode )
2185+ 					CopyDonePeek (c2 , true);
21022186				break ;
21032187			}
2104- 			if  (c  ==  '.' )
2188+ 			if  (c2  ==  '.' )
21052189			{
2190+ 				if  (csv_mode )
2191+ 					CopyDonePeek (c2 , true); /* allow keep calling GetChar() */ 
2192+ 
21062193				if  (eol_type  ==  EOL_CRNL )
21072194				{
21082195					c  =  CopyGetChar ();
@@ -2140,8 +2227,12 @@ CopyReadLine(void)
21402227				result  =  true;	/* report EOF */ 
21412228				break ;
21422229			}
2143- 			/* not EOF mark, so emit \ and following char literally */ 
2144- 			appendStringInfoCharMacro (& line_buf , '\\' );
2230+ 			
2231+ 			if  (csv_mode )
2232+ 				CopyDonePeek (c2 , false); /* not a dot, so put it back */  
2233+ 			else 
2234+ 				/* not EOF mark, so emit \ and following char literally */ 
2235+ 				appendStringInfoCharMacro (& line_buf , '\\' );
21452236		}
21462237
21472238		appendStringInfoCharMacro (& line_buf , c );
@@ -2369,34 +2460,6 @@ CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote,
23692460
23702461	for  (;;)
23712462	{
2372- 		/* handle multiline quoted fields */ 
2373- 		if  (in_quote  &&  line_buf .cursor  >= line_buf .len )
2374- 		{
2375- 			bool 		done ;
2376- 
2377- 			switch  (eol_type )
2378- 			{
2379- 				case  EOL_NL :
2380- 					appendStringInfoString (& attribute_buf , "\n" );
2381- 					break ;
2382- 				case  EOL_CR :
2383- 					appendStringInfoString (& attribute_buf , "\r" );
2384- 					break ;
2385- 				case  EOL_CRNL :
2386- 					appendStringInfoString (& attribute_buf , "\r\n" );
2387- 					break ;
2388- 				case  EOL_UNKNOWN :
2389- 					/* shouldn't happen - just keep going */ 
2390- 					break ;
2391- 			}
2392- 
2393- 			copy_lineno ++ ;
2394- 			done  =  CopyReadLine ();
2395- 			if  (done  &&  line_buf .len  ==  0 )
2396- 				break ;
2397- 			start_cursor  =  line_buf .cursor ;
2398- 		}
2399- 
24002463		end_cursor  =  line_buf .cursor ;
24012464		if  (line_buf .cursor  >= line_buf .len )
24022465			break ;
@@ -2629,25 +2692,6 @@ CopyAttributeOutCSV(char *server_string, char *delim, char *quote,
26292692		 !use_quote  &&  (c  =  * test_string ) !=  '\0' ;
26302693		 test_string  +=  mblen )
26312694	{
2632- 		/* 
2633- 		 * We don't know here what the surrounding line end characters 
2634- 		 * might be. It might not even be under postgres' control. So 
2635- 		 * we simple warn on ANY embedded line ending character. 
2636- 		 * 
2637- 		 * This warning will disappear when we make line parsing field-aware, 
2638- 		 * so that we can reliably read in embedded line ending characters 
2639- 		 * regardless of the file's line-end context. 
2640- 		 * 
2641- 		 */ 
2642- 
2643- 		if  (!embedded_line_warning   &&  (c  ==  '\n'  ||  c  ==  '\r' ) )
2644- 		{
2645- 			embedded_line_warning  =  true;
2646- 			elog (WARNING ,
2647- 				 "CSV fields with embedded linefeed or carriage return " 
2648- 				 "characters might not be able to be reimported" );
2649- 		}
2650- 
26512695		if  (c  ==  delimc  ||  c  ==  quotec  ||  c  ==  '\n'  ||  c  ==  '\r' )
26522696			use_quote  =  true;
26532697		if  (!same_encoding )
0 commit comments