Skip to content

Commit 8e3c58e

Browse files
committedSep 6, 2020
Refactor pg_get_line() to expose an alternative StringInfo-based API.
Letting the caller provide a StringInfo to read into is helpful when the caller needs to merge lines or otherwise modify the data after it's been read. Notably, now the code added by commit 8f8154a can use pg_get_line_append() instead of having its own copy of that logic. A follow-on commit will also make use of this. Also, since StringInfo buffers are a minimum of 1KB long, blindly using pg_get_line() in a loop can eat a lot more memory than one would expect. I discovered for instance that commit e0f05cd caused initdb to consume circa 10MB to read postgres.bki, even though that's under 1MB worth of data. A less memory-hungry alternative is to re-use the same StringInfo for all lines and pg_strdup the results. Discussion: https://postgr.es/m/1315832.1599345736@sss.pgh.pa.us

File tree

4 files changed

+75
-50
lines changed

4 files changed

+75
-50
lines changed
 

‎src/backend/libpq/hba.c

+14-26
Original file line numberDiff line numberDiff line change
@@ -502,33 +502,8 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines, int elevel)
502502
/* Collect the next input line, handling backslash continuations */
503503
resetStringInfo(&buf);
504504

505-
while (!feof(file) && !ferror(file))
505+
while (pg_get_line_append(file, &buf))
506506
{
507-
/* Make sure there's a reasonable amount of room in the buffer */
508-
enlargeStringInfo(&buf, 128);
509-
510-
/* Read some data, appending it to what we already have */
511-
if (fgets(buf.data + buf.len, buf.maxlen - buf.len, file) == NULL)
512-
{
513-
int save_errno = errno;
514-
515-
if (!ferror(file))
516-
break; /* normal EOF */
517-
/* I/O error! */
518-
ereport(elevel,
519-
(errcode_for_file_access(),
520-
errmsg("could not read file \"%s\": %m", filename)));
521-
err_msg = psprintf("could not read file \"%s\": %s",
522-
filename, strerror(save_errno));
523-
resetStringInfo(&buf);
524-
break;
525-
}
526-
buf.len += strlen(buf.data + buf.len);
527-
528-
/* If we haven't got a whole line, loop to read more */
529-
if (!(buf.len > 0 && buf.data[buf.len - 1] == '\n'))
530-
continue;
531-
532507
/* Strip trailing newline, including \r in case we're on Windows */
533508
buf.len = pg_strip_crlf(buf.data);
534509

@@ -551,6 +526,19 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines, int elevel)
551526
break;
552527
}
553528

529+
if (ferror(file))
530+
{
531+
/* I/O error! */
532+
int save_errno = errno;
533+
534+
ereport(elevel,
535+
(errcode_for_file_access(),
536+
errmsg("could not read file \"%s\": %m", filename)));
537+
err_msg = psprintf("could not read file \"%s\": %s",
538+
filename, strerror(save_errno));
539+
break;
540+
}
541+
554542
/* Parse fields */
555543
lineptr = buf.data;
556544
while (*lineptr && err_msg == NULL)

‎src/bin/initdb/initdb.c

+9-3
Original file line numberDiff line numberDiff line change
@@ -470,21 +470,23 @@ readfile(const char *path)
470470
{
471471
char **result;
472472
FILE *infile;
473+
StringInfoData line;
473474
int maxlines;
474475
int n;
475-
char *ln;
476476

477477
if ((infile = fopen(path, "r")) == NULL)
478478
{
479479
pg_log_error("could not open file \"%s\" for reading: %m", path);
480480
exit(1);
481481
}
482482

483+
initStringInfo(&line);
484+
483485
maxlines = 1024;
484486
result = (char **) pg_malloc(maxlines * sizeof(char *));
485487

486488
n = 0;
487-
while ((ln = pg_get_line(infile)) != NULL)
489+
while (pg_get_line_append(infile, &line))
488490
{
489491
/* make sure there will be room for a trailing NULL pointer */
490492
if (n >= maxlines - 1)
@@ -493,10 +495,14 @@ readfile(const char *path)
493495
result = (char **) pg_realloc(result, maxlines * sizeof(char *));
494496
}
495497

496-
result[n++] = ln;
498+
result[n++] = pg_strdup(line.data);
499+
500+
resetStringInfo(&line);
497501
}
498502
result[n] = NULL;
499503

504+
pfree(line.data);
505+
500506
fclose(infile);
501507

502508
return result;

‎src/common/pg_get_line.c

+49-21
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@
4141
* Note that while I/O errors are reflected back to the caller to be
4242
* dealt with, an OOM condition for the palloc'd buffer will not be;
4343
* there'll be an ereport(ERROR) or exit(1) inside stringinfo.c.
44+
*
45+
* Also note that the palloc'd buffer is usually a lot longer than
46+
* strictly necessary, so it may be inadvisable to use this function
47+
* to collect lots of long-lived data. A less memory-hungry option
48+
* is to use pg_get_line_append() in a loop, then pstrdup() each line.
4449
*/
4550
char *
4651
pg_get_line(FILE *stream)
@@ -49,21 +54,7 @@ pg_get_line(FILE *stream)
4954

5055
initStringInfo(&buf);
5156

52-
/* Read some data, appending it to whatever we already have */
53-
while (fgets(buf.data + buf.len, buf.maxlen - buf.len, stream) != NULL)
54-
{
55-
buf.len += strlen(buf.data + buf.len);
56-
57-
/* Done if we have collected a newline */
58-
if (buf.len > 0 && buf.data[buf.len - 1] == '\n')
59-
return buf.data;
60-
61-
/* Make some more room in the buffer, and loop to read more data */
62-
enlargeStringInfo(&buf, 128);
63-
}
64-
65-
/* Did fgets() fail because of an I/O error? */
66-
if (ferror(stream))
57+
if (!pg_get_line_append(stream, &buf))
6758
{
6859
/* ensure that free() doesn't mess up errno */
6960
int save_errno = errno;
@@ -73,13 +64,50 @@ pg_get_line(FILE *stream)
7364
return NULL;
7465
}
7566

76-
/* If we read no data before reaching EOF, we should return NULL */
77-
if (buf.len == 0)
67+
return buf.data;
68+
}
69+
70+
/*
71+
* pg_get_line_append()
72+
*
73+
* This has similar behavior to pg_get_line(), and thence to fgets(),
74+
* except that the collected data is appended to whatever is in *buf.
75+
*
76+
* Returns true if a line was successfully collected (including the
77+
* case of a non-newline-terminated line at EOF). Returns false if
78+
* there was an I/O error or no data was available before EOF.
79+
* (Check ferror(stream) to distinguish these cases.)
80+
*
81+
* In the false-result case, the contents of *buf are logically unmodified,
82+
* though it's possible that the buffer has been resized.
83+
*/
84+
bool
85+
pg_get_line_append(FILE *stream, StringInfo buf)
86+
{
87+
int orig_len = buf->len;
88+
89+
/* Read some data, appending it to whatever we already have */
90+
while (fgets(buf->data + buf->len, buf->maxlen - buf->len, stream) != NULL)
91+
{
92+
buf->len += strlen(buf->data + buf->len);
93+
94+
/* Done if we have collected a newline */
95+
if (buf->len > orig_len && buf->data[buf->len - 1] == '\n')
96+
return true;
97+
98+
/* Make some more room in the buffer, and loop to read more data */
99+
enlargeStringInfo(buf, 128);
100+
}
101+
102+
/* Check for I/O errors and EOF */
103+
if (ferror(stream) || buf->len == orig_len)
78104
{
79-
pfree(buf.data);
80-
return NULL;
105+
/* Discard any data we collected before detecting error */
106+
buf->len = orig_len;
107+
buf->data[orig_len] = '\0';
108+
return false;
81109
}
82110

83-
/* No newline at EOF ... so return what we have */
84-
return buf.data;
111+
/* No newline at EOF, but we did collect some data */
112+
return true;
85113
}

‎src/include/common/string.h

+3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#ifndef COMMON_STRING_H
1111
#define COMMON_STRING_H
1212

13+
struct StringInfoData; /* avoid including stringinfo.h here */
14+
1315
/* functions in src/common/string.c */
1416
extern bool pg_str_endswith(const char *str, const char *end);
1517
extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr,
@@ -19,6 +21,7 @@ extern int pg_strip_crlf(char *str);
1921

2022
/* functions in src/common/pg_get_line.c */
2123
extern char *pg_get_line(FILE *stream);
24+
extern bool pg_get_line_append(FILE *stream, struct StringInfoData *buf);
2225

2326
/* functions in src/common/sprompt.c */
2427
extern char *simple_prompt(const char *prompt, bool echo);

0 commit comments

Comments
 (0)
Please sign in to comment.