summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeikki Linnakangas2021-07-14 10:08:28 +0000
committerHeikki Linnakangas2021-07-14 10:08:28 +0000
commiteec57115e4c866f26bdc8bcbe3e2e7be4c6d0450 (patch)
tree90b277e1b9f9f7354818d8b1e1f28ce726828e6f
parentb4deefc39b933b9808645667117f2d8208092794 (diff)
In psql \copy from, send data to server in larger chunks.
Previously, we would send each line as a separate CopyData message. That's pretty wasteful if the table is narrow, as each CopyData message has 5 bytes of overhead. For efficiency, buffer up and pack 8 kB of input data into each CopyData message. The server also sends each line as a separate CopyData message in COPY TO STDOUT, and that's similarly wasteful. But that's documented in the FE/BE protocol description, so changing that would be a wire protocol break. Reviewed-by: Aleksander Alekseev Discussion: https://www.postgresql.org/message-id/40b2cec0-d0fb-3191-2ae1-9a3fe16a7e48%40iki.fi
-rw-r--r--src/bin/psql/copy.c99
1 files changed, 56 insertions, 43 deletions
diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c
index e1fee8e099..64ab40c4f7 100644
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -581,13 +581,21 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
else
{
bool copydone = false;
+ int buflen;
+ bool at_line_begin = true;
+ /*
+ * In text mode, we have to read the input one line at a time, so that
+ * we can stop reading at the EOF marker (\.). We mustn't read beyond
+ * the EOF marker, because if the data was inlined in a SQL script, we
+ * would eat up the commands after the EOF marker.
+ */
+ buflen = 0;
while (!copydone)
- { /* for each input line ... */
- bool firstload;
- bool linedone;
+ {
+ char *fgresult;
- if (showprompt)
+ if (at_line_begin && showprompt)
{
const char *prompt = get_prompt(PROMPT_COPY, NULL);
@@ -595,63 +603,68 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
fflush(stdout);
}
- firstload = true;
- linedone = false;
-
- while (!linedone)
- { /* for each bufferload in line ... */
- int linelen;
- char *fgresult;
-
- /* enable longjmp while waiting for input */
- sigint_interrupt_enabled = true;
+ /* enable longjmp while waiting for input */
+ sigint_interrupt_enabled = true;
- fgresult = fgets(buf, sizeof(buf), copystream);
+ fgresult = fgets(&buf[buflen], COPYBUFSIZ - buflen, copystream);
- sigint_interrupt_enabled = false;
+ sigint_interrupt_enabled = false;
- if (!fgresult)
- {
- copydone = true;
- break;
- }
+ if (!fgresult)
+ copydone = true;
+ else
+ {
+ int linelen;
- linelen = strlen(buf);
+ linelen = strlen(fgresult);
+ buflen += linelen;
/* current line is done? */
- if (linelen > 0 && buf[linelen - 1] == '\n')
- linedone = true;
-
- /* check for EOF marker, but not on a partial line */
- if (firstload)
+ if (buf[buflen - 1] == '\n')
{
- /*
- * This code erroneously assumes '\.' on a line alone
- * inside a quoted CSV string terminates the \copy.
- * https://www.postgresql.org/message-id/[email protected]
- */
- if (strcmp(buf, "\\.\n") == 0 ||
- strcmp(buf, "\\.\r\n") == 0)
+ /* check for EOF marker, but not on a partial line */
+ if (at_line_begin)
{
- copydone = true;
- break;
+ /*
+ * This code erroneously assumes '\.' on a line alone
+ * inside a quoted CSV string terminates the \copy.
+ * https://www.postgresql.org/message-id/[email protected]
+ */
+ if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) ||
+ (linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0))
+ {
+ copydone = true;
+ }
}
- firstload = false;
+ if (copystream == pset.cur_cmd_source)
+ {
+ pset.lineno++;
+ pset.stmt_lineno++;
+ }
+ at_line_begin = true;
}
+ else
+ at_line_begin = false;
+ }
- if (PQputCopyData(conn, buf, linelen) <= 0)
+ /*
+ * If the buffer is full, or we've reached the EOF, flush it.
+ *
+ * Make sure there's always space for four more bytes in the
+ * buffer, plus a NUL terminator. That way, an EOF marker is
+ * never split across two fgets() calls, which simplies the logic.
+ */
+ if (buflen >= COPYBUFSIZ - 5 || (copydone && buflen > 0))
+ {
+ if (PQputCopyData(conn, buf, buflen) <= 0)
{
OK = false;
copydone = true;
break;
}
- }
- if (copystream == pset.cur_cmd_source)
- {
- pset.lineno++;
- pset.stmt_lineno++;
+ buflen = 0;
}
}
}