#define BLOCKS_PER_READ 512
/*
- * we expect the find the last lines of the manifest, including the checksum,
+ * We expect to find the last lines of the manifest, including the checksum,
* in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
* parse step if we are about to overflow MAX_CHUNK bytes.
*/
* Files extracted from the backup manifest.
*
* We don't really need this information, because we use WAL summaries to
- * figure what's changed. It would be unsafe to just rely on the list of
- * files that existed before, because it's possible for a file to be
+ * figure out what's changed. It would be unsafe to just rely on the list
+ * of files that existed before, because it's possible for a file to be
* removed and a new one created with the same name and different
* contents. In such cases, the whole file must still be sent. We can tell
* from the WAL summaries whether that happened, but not from the file
* need explicit stacks for predictions, field names and null indicators, but
* we don't need the input, that will be handed in bit by bit to the
* parse routine. We also need an accumulator for partial tokens in case
- * the boundary between chunks happns to fall in the middle of a token.
+ * the boundary between chunks happens to fall in the middle of a token.
*/
#define JS_STACK_CHUNK_SIZE 64
#define JS_MAX_PROD_LEN 10 /* more than we need */
}
/*
- * Add any remaining alpha_numeric chars. This takes care of the
+ * Add any remaining alphanumeric chars. This takes care of the
* {null, false, true} literals as well as any trailing
- * alpha-numeric junk on non-string tokens.
+ * alphanumeric junk on non-string tokens.
*/
for (int i = added; i < lex->input_length; i++)
{
* the rest of the file.
*
* For an incremental parse, this will just be called on the last chunk of the
- * manifest, and the cryptohash context paswed in. For a non-incremental
+ * manifest, and the cryptohash context passed in. For a non-incremental
* parse incr_ctx will be NULL.
*/
static void
This module contains two programs for testing the json parsers.
- `test_json_parser_incremental` is for testing the incremental parser, It
- reads in a file and pases it in very small chunks (60 bytes at a time) to
- the incremental parser. It's not meant to be a speed test but to test the
- accuracy of the incremental parser. It takes one argument: the name of the
- input file.
+ reads in a file and passes it in very small chunks (default is 60 bytes at a
+ time) to the incremental parser. It's not meant to be a speed test but to
+ test the accuracy of the incremental parser. There are two option arguments,
+ "-c nn" specifies an alternative chunk size, and "-s" specifies using
+ semantic routines. The semantic routines re-output the json, although not in
+ a very pretty form. The required non-option argument is the input file name.
- `test_json_parser_perf` is for speed testing both the standard
recursive descent parser and the non-recursive incremental
parser. If given the `-i` flag it uses the non-recursive parser,
- otherwise the stardard parser. The remaining flags are the number of
+ otherwise the standard parser. The remaining flags are the number of
parsing iterations and the file containing the input. Even when
using the non-recursive parser, the input is passed to the parser in a
single chunk. The results are thus comparable to those of the
standard parser.
-The easiest way to use these is to run `make check` and `make speed-check`
-
-The sample input file is a small extract from a list of `delicious`
+The sample input file is a small, sanitized extract from a list of `delicious`
bookmarks taken some years ago, all wrapped in a single json
-array. 10,000 iterations of parsing this file gives a reasonable
-benchmark, and that is what the `speed-check` target does.
+array.
* test_json_parser_incremental.c
* Test program for incremental JSON parser
*
- * Copyright (c) 2023, PostgreSQL Global Development Group
+ * Copyright (c) 2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/test/modules/test_json_parser/test_json_parser_incremental.c
*
- * This progam tests incremental parsing of json. The input is fed into
+ * This program tests incremental parsing of json. The input is fed into
* the parser in very small chunks. In practice you would normally use
* much larger chunks, but doing this makes it more likely that the
- * full range of incement handling, especially in the lexer, is exercised.
- * If the "-c SIZE" option is provided, that chunk size is used instead.
+ * full range of increment handling, especially in the lexer, is exercised.
+ * If the "-c SIZE" option is provided, that chunk size is used instead
+ * of the default of 60.
*
* The argument specifies the file containing the JSON input.
*
#include "mb/pg_wchar.h"
#include "pg_getopt.h"
+#define BUFSIZE 6000
+#define DEFAULT_CHUNK_SIZE 60
+
typedef struct DoState
{
JsonLexContext *lex;
int
main(int argc, char **argv)
{
- /* max delicious line length is less than this */
- char buff[6001];
+ char buff[BUFSIZE];
FILE *json_file;
JsonParseErrorType result;
JsonLexContext lex;
StringInfoData json;
int n_read;
- size_t chunk_size = 60;
+ size_t chunk_size = DEFAULT_CHUNK_SIZE;
struct stat statbuf;
off_t bytes_left;
JsonSemAction *testsem = &nullSemAction;
{
case 'c': /* chunksize */
sscanf(optarg, "%zu", &chunk_size);
+ if (chunk_size > BUFSIZE)
+ {
+ fprintf(stderr, "chunk size cannot exceed %d\n", BUFSIZE);
+ exit(1);
+ }
break;
case 's': /* do semantic processing */
testsem = &sem;
{
n_read = fread(buff, 1, chunk_size, json_file);
appendBinaryStringInfo(&json, buff, n_read);
+
+ /*
+ * Append some trailing junk to the buffer passed to the parser. This
+ * helps us ensure that the parser does the right thing even if the
+ * chunk isn't terminated with a '\0'.
+ */
appendStringInfoString(&json, "1+23 trailing junk");
bytes_left -= n_read;
if (bytes_left > 0)
/*-------------------------------------------------------------------------
*
* test_json_parser_perf.c
- * Performancet est program for both flavors of the JSON parser
+ * Performance test program for both flavors of the JSON parser
*
- * Copyright (c) 2023, PostgreSQL Global Development Group
+ * Copyright (c) 2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/test/modules/test_json_parser/test_json_parser_perf.c
*
- * This progam tests either the standard (recursive descent) JSON parser
+ * This program tests either the standard (recursive descent) JSON parser
* or the incremental (table driven) parser, but without breaking the input
* into chunks in the latter case. Thus it can be used to compare the pure
* parsing speed of the two parsers. If the "-i" option is used, then the
#include <stdio.h>
#include <string.h>
+#define BUFSIZE 6000
+
int
main(int argc, char **argv)
{
- /* max delicious line length is less than this */
- char buff[6001];
+ char buff[BUFSIZE];
FILE *json_file;
JsonParseErrorType result;
JsonLexContext *lex;