/*------------------------------------------------------------------------- * * copyops.c * Functions related to remote COPY data manipulation and materialization * of data redistribution * * Copyright (c) 2010-2012 Postgres-XC Development Group * * * IDENTIFICATION * src/backend/pgxc/copy/copyops.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "miscadmin.h" #include "fmgr.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "pgxc/copyops.h" #include "utils/lsyscache.h" /* NULL print marker */ #define COPYOPS_NULL_PRINT "\\N" /* Some octal operations */ #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7')) #define OCTVALUE(c) ((c) - '0') /* Send text representation of one attribute, with conversion and escaping */ #define DUMPSOFAR() \ do { \ if (ptr > start) \ appendBinaryStringInfo(buf, (char *) start, ptr - start); \ } while (0) static int get_decimal_from_hex(char hex); static void attribute_out_text(StringInfo buf, char *string); /* * Return decimal value for a hexadecimal digit */ static int get_decimal_from_hex(char hex) { if (isdigit((unsigned char) hex)) return hex - '0'; else return tolower((unsigned char) hex) - 'a' + 10; } /* * Output an attribute to text * This takes portions of the code of CopyAttributeOutText */ static void attribute_out_text(StringInfo buf, char *string) { char *ptr; char c; char *start; char delimc = COPYOPS_DELIMITER; bool need_transcoding, encoding_embeds_ascii; int file_encoding = pg_get_client_encoding(); need_transcoding = (file_encoding != GetDatabaseEncoding() || pg_database_encoding_max_length() > 1); encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(file_encoding); if (need_transcoding) ptr = pg_server_to_any(string, strlen(string), file_encoding); else ptr = string; /* * We have to grovel through the string searching for control characters * and instances of the delimiter character. In most cases, though, these * are infrequent. To avoid overhead from calling CopySendData once per * character, we dump out all characters between escaped characters in a * single call. The loop invariant is that the data from "start" to "ptr" * can be sent literally, but hasn't yet been. * * We can skip pg_encoding_mblen() overhead when encoding is safe, because * in valid backend encodings, extra bytes of a multibyte character never * look like ASCII. This loop is sufficiently performance-critical that * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out * of the normal safe-encoding path. */ if (encoding_embeds_ascii) { start = ptr; while ((c = *ptr) != '\0') { if ((unsigned char) c < (unsigned char) 0x20) { /* * \r and \n must be escaped, the others are traditional. We * prefer to dump these using the C-like notation, rather than * a backslash and the literal character, because it makes the * dump file a bit more proof against Microsoftish data * mangling. */ switch (c) { case '\b': c = 'b'; break; case '\f': c = 'f'; break; case '\n': c = 'n'; break; case '\r': c = 'r'; break; case '\t': c = 't'; break; case '\v': c = 'v'; break; default: /* If it's the delimiter, must backslash it */ if (c == delimc) break; /* All ASCII control chars are length 1 */ ptr++; continue; /* fall to end of loop */ } /* if we get here, we need to convert the control char */ DUMPSOFAR(); appendStringInfoCharMacro(buf, '\\'); appendStringInfoCharMacro(buf, c); start = ++ptr; } else if (c == '\\' || c == delimc) { DUMPSOFAR(); appendStringInfoCharMacro(buf, '\\'); start = ++ptr; } else if (IS_HIGHBIT_SET(c)) ptr += pg_encoding_mblen(file_encoding, ptr); else ptr++; } } else { start = ptr; while ((c = *ptr) != '\0') { if ((unsigned char) c < (unsigned char) 0x20) { /* * \r and \n must be escaped, the others are traditional. We * prefer to dump these using the C-like notation, rather than * a backslash and the literal character, because it makes the * dump file a bit more proof against Microsoftish data * mangling. */ switch (c) { case '\b': c = 'b'; break; case '\f': c = 'f'; break; case '\n': c = 'n'; break; case '\r': c = 'r'; break; case '\t': c = 't'; break; case '\v': c = 'v'; break; default: /* If it's the delimiter, must backslash it */ if (c == delimc) break; /* All ASCII control chars are length 1 */ ptr++; continue; /* fall to end of loop */ } /* if we get here, we need to convert the control char */ DUMPSOFAR(); appendStringInfoCharMacro(buf, '\\'); appendStringInfoCharMacro(buf, c); start = ++ptr; } else if (c == '\\' || c == delimc) { DUMPSOFAR(); appendStringInfoCharMacro(buf, '\\'); start = ++ptr; } else ptr++; } } DUMPSOFAR(); } /* * CopyOps_RawDataToArrayField * Convert the raw output of COPY TO to an array of fields. * This is a simplified version of CopyReadAttributesText used for data * redistribution and storage of tuple data into a tuple store. */ char ** CopyOps_RawDataToArrayField(TupleDesc tupdesc, char *message, int len, char **tmpbuf) { char delimc = COPYOPS_DELIMITER; int fieldno; int null_print_len = strlen(COPYOPS_NULL_PRINT); char *origin_ptr; char *output_ptr; char *cur_ptr; char *line_end_ptr; int fields = tupdesc->natts; char **raw_fields; Form_pg_attribute *attr = tupdesc->attrs; /* Adjust number of fields depending on dropped attributes */ for (fieldno = 0; fieldno < tupdesc->natts; fieldno++) { if (attr[fieldno]->attisdropped) fields--; } /* Then alloc necessary space */ raw_fields = (char **) palloc(fields * sizeof(char *)); /* Take a copy of message to manipulate */ *tmpbuf = origin_ptr = (char *) palloc0(sizeof(char) * (len + 1)); memcpy(origin_ptr, message, len + 1); /* Add clean separator '\0' at the end of message */ origin_ptr[len] = '\0'; /* Keep track of original pointer */ output_ptr = origin_ptr; /* set pointer variables for loop */ cur_ptr = message; line_end_ptr = message + len; /* Outer loop iterates over fields */ fieldno = 0; for (;;) { char *start_ptr; char *end_ptr; int input_len; bool found_delim = false; bool saw_non_ascii = false; /* Make sure there is enough space for the next value */ if (fieldno >= fields) { fields *= 2; raw_fields = repalloc(raw_fields, fields * sizeof(char *)); } /* Remember start of field on output side */ start_ptr = cur_ptr; raw_fields[fieldno] = output_ptr; /* Scan data for field */ for (;;) { char c; end_ptr = cur_ptr; if (cur_ptr >= line_end_ptr) break; c = *cur_ptr++; if (c == delimc) { found_delim = true; break; } if (c == '\\') { if (cur_ptr >= line_end_ptr) break; c = *cur_ptr++; switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { /* handle \013 */ int val; val = OCTVALUE(c); if (cur_ptr < line_end_ptr) { c = *cur_ptr; if (ISOCTAL(c)) { cur_ptr++; val = (val << 3) + OCTVALUE(c); if (cur_ptr < line_end_ptr) { c = *cur_ptr; if (ISOCTAL(c)) { cur_ptr++; val = (val << 3) + OCTVALUE(c); } } } } c = val & 0377; if (c == '\0' || IS_HIGHBIT_SET(c)) saw_non_ascii = true; } break; case 'x': /* Handle \x3F */ if (cur_ptr < line_end_ptr) { char hexchar = *cur_ptr; if (isxdigit((unsigned char) hexchar)) { int val = get_decimal_from_hex(hexchar); cur_ptr++; if (cur_ptr < line_end_ptr) { hexchar = *cur_ptr; if (isxdigit((unsigned char) hexchar)) { cur_ptr++; val = (val << 4) + get_decimal_from_hex(hexchar); } } c = val & 0xff; if (c == '\0' || IS_HIGHBIT_SET(c)) saw_non_ascii = true; } } break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; /* * in all other cases, take the char after '\' * literally */ } } /* Add c to output string */ *output_ptr++ = c; } /* Terminate attribute value in output area */ *output_ptr++ = '\0'; /* * If we de-escaped a non-7-bit-ASCII char, make sure we still have * valid data for the db encoding. Avoid calling strlen here for the * sake of efficiency. */ if (saw_non_ascii) { char *fld = raw_fields[fieldno]; pg_verifymbstr(fld, output_ptr - (fld + 1), false); } /* Check whether raw input matched null marker */ input_len = end_ptr - start_ptr; if (input_len == null_print_len && strncmp(start_ptr, COPYOPS_NULL_PRINT, input_len) == 0) raw_fields[fieldno] = NULL; fieldno++; /* Done if we hit EOL instead of a delim */ if (!found_delim) break; } /* Clean up state of attribute_buf */ output_ptr--; Assert(*output_ptr == '\0'); return raw_fields; } /* * CopyOps_BuildOneRowTo * Build one row message to be sent to remote nodes through COPY protocol */ char * CopyOps_BuildOneRowTo(TupleDesc tupdesc, Datum *values, bool *nulls, int *len) { bool need_delim = false; char *res; int i; FmgrInfo *out_functions; Form_pg_attribute *attr = tupdesc->attrs; StringInfo buf; /* Get info about the columns we need to process. */ out_functions = (FmgrInfo *) palloc(tupdesc->natts * sizeof(FmgrInfo)); for (i = 0; i < tupdesc->natts; i++) { Oid out_func_oid; bool isvarlena; /* Do not need any information for dropped attributes */ if (attr[i]->attisdropped) continue; getTypeOutputInfo(attr[i]->atttypid, &out_func_oid, &isvarlena); fmgr_info(out_func_oid, &out_functions[i]); } /* Initialize output buffer */ buf = makeStringInfo(); for (i = 0; i < tupdesc->natts; i++) { Datum value = values[i]; bool isnull = nulls[i]; /* Do not need any information for dropped attributes */ if (attr[i]->attisdropped) continue; if (need_delim) appendStringInfoCharMacro(buf, COPYOPS_DELIMITER); need_delim = true; if (isnull) { /* Null print value to client */ appendBinaryStringInfo(buf, "\\N", strlen("\\N")); } else { char *string; string = OutputFunctionCall(&out_functions[i], value); attribute_out_text(buf, string); pfree(string); } } /* Record length of message */ *len = buf->len; res = pstrdup(buf->data); pfree(out_functions); pfree(buf->data); pfree(buf); return res; }