diff --git a/src/bin/pgbench/exprscan.l b/src/bin/pgbench/exprscan.l index 8943a52e9f04..b48009376917 100644 --- a/src/bin/pgbench/exprscan.l +++ b/src/bin/pgbench/exprscan.l @@ -271,10 +271,14 @@ void expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more) { PsqlScanState state = yyget_extra(yyscanner); - int error_detection_offset = expr_scanner_offset(state) - 1; + int lineno; + int error_detection_offset; YYSTYPE lval; char *full_line; + psql_scan_get_location(state, &lineno, &error_detection_offset); + error_detection_offset--; + /* * While parsing an expression, we may not have collected the whole line * yet from the input source. Lex till EOL so we can report whole line. @@ -289,7 +293,6 @@ expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more) /* Extract the line, trimming trailing newline if any */ full_line = expr_scanner_get_substring(state, expr_start_offset, - expr_scanner_offset(state), true); syntax_error(expr_source, expr_lineno, full_line, expr_command, @@ -336,12 +339,15 @@ expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset) /* And lex. */ lexresult = yylex(&lval, state->scanner); - /* - * Save start offset of word, if any. We could do this more efficiently, - * but for now this seems fine. - */ + /* Save start offset of word, if any. */ if (lexresult) - *offset = expr_scanner_offset(state) - word_buf->len; + { + int lineno; + int end_offset; + + psql_scan_get_location(state, &lineno, &end_offset); + *offset = end_offset - word_buf->len; + } else *offset = -1; @@ -404,65 +410,35 @@ expr_scanner_finish(yyscan_t yyscanner) } /* - * Get offset from start of string to end of current lexer token. + * Get a malloc'd copy of the lexer input string from start_offset + * to end of current lexer token. If chomp is true, drop any trailing + * newline(s). * * We rely on the knowledge that flex modifies the scan buffer by storing * a NUL at the end of the current token (yytext). Note that this might * not work quite right if we were parsing a sub-buffer, but since pgbench - * never invokes that functionality, it doesn't matter. - */ -int -expr_scanner_offset(PsqlScanState state) -{ - return strlen(state->scanbuf); -} - -/* - * Get a malloc'd copy of the lexer input string from start_offset - * to just before end_offset. If chomp is true, drop any trailing - * newline(s). + * never invokes that functionality, it doesn't matter. Also, this will + * give the wrong answer (the whole remainder of the input) if called + * before any yylex() call has been done. */ char * expr_scanner_get_substring(PsqlScanState state, - int start_offset, int end_offset, + int start_offset, bool chomp) { char *result; - const char *scanptr = state->scanbuf + start_offset; - int slen = end_offset - start_offset; - Assert(slen >= 0); - Assert(end_offset <= strlen(state->scanbuf)); + result = pg_strdup(state->scanbuf + start_offset); if (chomp) { + size_t slen = strlen(result); + while (slen > 0 && - (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r')) + (result[slen - 1] == '\n' || result[slen - 1] == '\r')) slen--; + result[slen] = '\0'; } - result = (char *) pg_malloc(slen + 1); - memcpy(result, scanptr, slen); - result[slen] = '\0'; - return result; } - -/* - * Get the line number associated with the given string offset - * (which must not be past the end of where we've lexed to). - */ -int -expr_scanner_get_lineno(PsqlScanState state, int offset) -{ - int lineno = 1; - const char *p = state->scanbuf; - - while (*p && offset > 0) - { - if (*p == '\n') - lineno++; - p++, offset--; - } - return lineno; -} diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index fdc957fa34dd..bf099aab2787 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -5677,22 +5677,17 @@ postprocess_sql_command(Command *my_command) * At call, we have scanned only the initial backslash. */ static Command * -process_backslash_command(PsqlScanState sstate, const char *source) +process_backslash_command(PsqlScanState sstate, const char *source, + int lineno, int start_offset) { Command *my_command; PQExpBufferData word_buf; int word_offset; int offsets[MAX_ARGS]; /* offsets of argument words */ - int start_offset; - int lineno; int j; initPQExpBuffer(&word_buf); - /* Remember location of the backslash */ - start_offset = expr_scanner_offset(sstate) - 1; - lineno = expr_scanner_get_lineno(sstate, start_offset); - /* Collect first word of command */ if (!expr_lex_one_word(sstate, &word_buf, &word_offset)) { @@ -5747,7 +5742,6 @@ process_backslash_command(PsqlScanState sstate, const char *source) my_command->first_line = expr_scanner_get_substring(sstate, start_offset, - expr_scanner_offset(sstate), true); expr_scanner_finish(yyscanner); @@ -5777,7 +5771,6 @@ process_backslash_command(PsqlScanState sstate, const char *source) my_command->first_line = expr_scanner_get_substring(sstate, start_offset, - expr_scanner_offset(sstate), true); if (my_command->meta == META_SLEEP) @@ -5952,8 +5945,6 @@ ParseScript(const char *script, const char *desc, int weight) PQExpBufferData line_buf; int alloc_num; int index; - int lineno; - int start_offset; #define COMMANDS_ALLOC_NUM 128 alloc_num = COMMANDS_ALLOC_NUM; @@ -5977,7 +5968,6 @@ ParseScript(const char *script, const char *desc, int weight) * stdstrings should be true, which is a bit riskier. */ psql_scan_setup(sstate, script, strlen(script), 0, true); - start_offset = expr_scanner_offset(sstate) - 1; initPQExpBuffer(&line_buf); @@ -5990,7 +5980,6 @@ ParseScript(const char *script, const char *desc, int weight) Command *command = NULL; resetPQExpBuffer(&line_buf); - lineno = expr_scanner_get_lineno(sstate, start_offset); sr = psql_scan(sstate, &line_buf, &prompt); @@ -6004,7 +5993,15 @@ ParseScript(const char *script, const char *desc, int weight) /* If we reached a backslash, process that */ if (sr == PSCAN_BACKSLASH) { - command = process_backslash_command(sstate, desc); + int lineno; + int start_offset; + + /* Capture location of the backslash */ + psql_scan_get_location(sstate, &lineno, &start_offset); + start_offset--; + + command = process_backslash_command(sstate, desc, + lineno, start_offset); if (command) { diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index f6a883611c51..0ba216e5f72b 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -149,11 +149,9 @@ extern yyscan_t expr_scanner_init(PsqlScanState state, const char *source, int lineno, int start_offset, const char *command); extern void expr_scanner_finish(yyscan_t yyscanner); -extern int expr_scanner_offset(PsqlScanState state); extern char *expr_scanner_get_substring(PsqlScanState state, - int start_offset, int end_offset, + int start_offset, bool chomp); -extern int expr_scanner_get_lineno(PsqlScanState state, int offset); extern void syntax_error(const char *source, int lineno, const char *line, const char *command, const char *msg, diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index b3c6b88e9ca8..0978a515b7ab 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -1079,6 +1079,10 @@ psql_scan_setup(PsqlScanState state, /* Set lookaside data in case we have to map unsafe encoding */ state->curline = state->scanbuf; state->refline = state->scanline; + + /* Initialize state for psql_scan_get_location() */ + state->cur_line_no = 0; /* yylex not called yet */ + state->cur_line_ptr = state->scanbuf; } /* @@ -1136,6 +1140,10 @@ psql_scan(PsqlScanState state, /* And lex. */ lexresult = yylex(NULL, state->scanner); + /* Notify psql_scan_get_location() that a yylex call has been made. */ + if (state->cur_line_no == 0) + state->cur_line_no = 1; + /* * Check termination state and return appropriate result info. */ @@ -1311,6 +1319,52 @@ psql_scan_in_quote(PsqlScanState state) state->start_state != xqs; } +/* + * Return the current scanning location (end+1 of last scanned token), + * as a line number counted from 1 and an offset from string start. + * + * This considers only the outermost input string, and therefore is of + * limited use for programs that use psqlscan_push_new_buffer(). + * + * It would be a bit easier probably to use "%option yylineno" to count + * lines, but the flex manual says that has a performance cost, and only + * a minority of programs using psqlscan have need for this functionality. + * So we implement it ourselves without adding overhead to the lexer itself. + */ +void +psql_scan_get_location(PsqlScanState state, + int *lineno, int *offset) +{ + const char *line_end; + + /* + * We rely on flex's having stored a NUL after the current token in + * scanbuf. Therefore we must specially handle the state before yylex() + * has been called, when obviously that won't have happened yet. + */ + if (state->cur_line_no == 0) + { + *lineno = 1; + *offset = 0; + return; + } + + /* + * Advance cur_line_no/cur_line_ptr past whatever has been lexed so far. + * Doing this prevents the operation from being O(N^2) for long inputs. + */ + while ((line_end = strchr(state->cur_line_ptr, '\n')) != NULL) + { + state->cur_line_no++; + state->cur_line_ptr = line_end + 1; + } + state->cur_line_ptr += strlen(state->cur_line_ptr); + + /* Report current location. */ + *lineno = state->cur_line_no; + *offset = state->cur_line_ptr - state->scanbuf; +} + /* * Push the given string onto the stack of stuff to scan. * diff --git a/src/include/fe_utils/psqlscan.h b/src/include/fe_utils/psqlscan.h index 81f792b1733b..39d2065fe987 100644 --- a/src/include/fe_utils/psqlscan.h +++ b/src/include/fe_utils/psqlscan.h @@ -87,4 +87,7 @@ extern void psql_scan_reselect_sql_lexer(PsqlScanState state); extern bool psql_scan_in_quote(PsqlScanState state); +extern void psql_scan_get_location(PsqlScanState state, + int *lineno, int *offset); + #endif /* PSQLSCAN_H */ diff --git a/src/include/fe_utils/psqlscan_int.h b/src/include/fe_utils/psqlscan_int.h index 37798114873e..2a3a9d7c82aa 100644 --- a/src/include/fe_utils/psqlscan_int.h +++ b/src/include/fe_utils/psqlscan_int.h @@ -104,6 +104,10 @@ typedef struct PsqlScanStateData const char *curline; /* actual flex input string for cur buf */ const char *refline; /* original data for cur buffer */ + /* status for psql_scan_get_location() */ + int cur_line_no; /* current line#, or 0 if no yylex done */ + const char *cur_line_ptr; /* points into cur_line_no'th line in scanbuf */ + /* * All this state lives across successive input lines, until explicitly * reset by psql_scan_reset. start_state is adopted by yylex() on entry,