/*------------------------------------------------------------------------- * * jsonpath.c * JSONPath implementation routines for JSON data type support. * * Copyright (c) 2010, PostgreSQL Global Development Group * Written by Joey Adams . * *------------------------------------------------------------------------- */ #include "jsonpath.h" #include "util.h" #include /* NB: These macros evaluate their argument multiple times. */ #define isletter(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z')) /* isalpha() is locale-specific. This simply matches [A-Za-z] . */ #define isextended(c) ((unsigned char)(c) > 127) /* Note that Unicode characters are allowed in identifiers. */ #define identifier_start(c) (isletter(c) || (c) == '_' || (c) == '$' || isextended(c)) #define identifier_char(c) (identifier_start(c) || isdigit(c)) #define integer_start(c) (isdigit(c) || (c) == '+' || (c) == '-') /* * In a valid JSONPath list, the first element is always of type JP_ROOT. * This element is used so an otherwise empty JSONPath list won't be NULL. * This allows us to use NULL to indicate an invalid JSONPath. * * This function returns the first cell, * making sure it is of type JP_ROOT. */ static ListCell * jp_root(JSONPath * jp) { ListCell *cell; jp_element *elem; Assert(jp != NULL); cell = list_head(jp); elem = lfirst(cell); Assert(elem->type == JP_ROOT); return cell; } /* * This function returns the second cell of a JSONPath list * (the first cell after the JP_ROOT). */ static ListCell * jp_head(JSONPath * jp) { return lnext(jp_root(jp)); } /* * Note that skip_spaces differs from skip_whitespace in json.c * in that this function treats '\f' and '\v' as whitespace. * This is because JSON does not accept these characters as * whitespace, but since this is JSONPath, * we can do whatever we want here :-) */ static void skip_spaces(const char **sp) { const char *s = *sp; while (isspace(*s)) s++; *sp = s; } static jp_element * mkElement(jp_element_type type, bool rd) { jp_element *elem = palloc0(sizeof(*elem)); elem->type = type; elem->recursive_descent = rd; return elem; } static jp_element * mkRoot(void) { jp_element *elem = mkElement(JP_ROOT, false); return elem; } static jp_element * mkWildcard(bool rd) { jp_element *elem = mkElement(JP_WILDCARD, rd); return elem; } static jp_element * mkIndexSubscript(long index, bool rd) { jp_element *elem = mkElement(JP_INDEX_SUBSCRIPT, rd); elem->data.index = index; return elem; } static jp_element * mkKeySubscript(char *key, size_t length, bool rd) { jp_element *elem = mkElement(JP_KEY_SUBSCRIPT, rd); elem->data.key.ptr = key; elem->data.key.length = length; return elem; } static jp_element * mkCallChar(long index, bool rd) { jp_element *elem = mkElement(JP_CALL_CHAR, rd); elem->data.index = index; return elem; } static JPRef * mkRef(JPRefType type) { JPRef *ref = palloc0(sizeof(*ref)); ref->type = type; return ref; } static JPRef * mkRefNode(JSON * node) { JPRef *ref = mkRef(JP_REF_NODE); ref->u.node = node; return ref; } static JPRef * mkRefChar(const char *bytes, size_t length) { JPRef *ref = mkRef(JP_REF_CHAR); ref->u.chr. bytes = bytes; ref->u.chr. length = length; return ref; } /* * jp_show * Unparse a JSONPath expression. This is used by parse_json_path * to stringify successfully parsed JSONPaths. */ char * jp_show(JSONPath * jp) { StringInfoData string[1]; ListCell *cell; jp_element *elem; bool rd; char *tmp; initStringInfo(string); foreach(cell, jp) { elem = lfirst(cell); rd = elem->recursive_descent; switch (elem->type) { case JP_ROOT: appendStringInfoChar(string, '$'); break; case JP_WILDCARD: appendStringInfoString(string, rd ? "..[*]" : "[*]"); break; case JP_INDEX_SUBSCRIPT: appendStringInfo(string, "%s[%ld]", rd ? ".." : "", elem->data.index); break; case JP_KEY_SUBSCRIPT: tmp = json_encode_string(elem->data.key.ptr, elem->data.key.length, '"', false); appendStringInfo(string, "%s[%s]", rd ? ".." : "", tmp); pfree(tmp); break; case JP_CALL_CHAR: appendStringInfo(string, "%s(%ld)", rd ? "..char" : ".char", elem->data.index); break; default: Assert(false); } } return string->data; } /* * Parse a long starting at *s . * * On success, return true and update *s to point to the end of the number. * On failure, return false and leave *s untouched. */ static bool parse_long(const char **s, long *out) { const char *p = *s; errno = 0; *out = strtol(*s, (char **) &p, 10); if (p <= *s || errno != 0) return false; *s = p; return true; } /* * jp_parse * Parse a JSONPath expression (into a List of jp_element items). * * TODO: Get rid of all those gotos. The parser uses constant space, * so there's no chance of a stack overflow anyway. */ JSONPath * jp_parse(const char *pattern) { JSONPath *jp = NIL; const char *s = pattern; const char *start; const char *end; bool recursive_descent = false; bool bracket = false; const char *err_msg = NULL; long index; char *key; size_t key_length; skip_spaces(&s); /* pattern may not be empty */ if (*s == '\0') return NULL; jp = lappend(jp, mkRoot()); if (*s == '$') { s++; goto begin_element; } else if (*s != '.') { goto dot_subscript; /* implicit '.' at beginning */ } begin_element: skip_spaces(&s); begin_element_noskip: recursive_descent = false; bracket = false; if (*s == '\0') goto end; if (s[0] == '.' && s[1] == '.') { recursive_descent = true; s += 2; goto dot_subscript; } if (s[0] == '.') { s++; goto dot_subscript; } if (s[0] == '[') { s++; goto bracket_subscript; } goto failed; next_element: if (bracket) { skip_spaces(&s); if (*s != ']') goto failed; s++; } goto begin_element; dot_subscript: skip_spaces(&s); if (*s == '*') goto wildcard; if (integer_start(*s)) goto integer; if (identifier_start(*s)) goto identifier; if (*s == '"' || *s == '\'') goto string; if (*s == '[') { s++; goto bracket_subscript; } goto failed; bracket_subscript: skip_spaces(&s); bracket = true; if (*s == '*') goto wildcard; if (integer_start(*s)) goto integer; if (identifier_start(*s)) { err_msg = "Identifiers may not be bracketed. This syntax is reserved for future use."; goto failed; } if (*s == '"' || *s == '\'') goto string; goto failed; wildcard: s++; jp = lappend(jp, mkWildcard(recursive_descent)); goto next_element; integer: if (!parse_long(&s, &index)) goto failed; jp = lappend(jp, mkIndexSubscript(index, recursive_descent)); goto next_element; identifier: start = s; while (identifier_char(*s)) s++; end = s; skip_spaces(&s); if (*s == '(') { if (end - start == 4 && memcmp(start, "char", 4) == 0) { s++; skip_spaces(&s); goto call_char; } goto failed; } key = pnstrdup(start, end - start); key_length = end - start; jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent)); goto begin_element_noskip; string: key = json_decode_string(&s, &key_length, false); if (key == NULL) goto failed; jp = lappend(jp, mkKeySubscript(key, key_length, recursive_descent)); goto next_element; call_char: if (!parse_long(&s, &index)) goto failed; skip_spaces(&s); if (*s++ != ')') goto failed; jp = lappend(jp, mkCallChar(index, recursive_descent)); goto begin_element; end: return jp; failed: return NULL; } /* * Currently, a lot of JPRef nodes are allocated just to pass JSON pointers * to match_recurse. If this becomes a memory/performance issue in the future, * JPRef could merged with JSON by adding JPRef's specialty types to the * json_type enum and JSON union. JPRef is currently not merged with * JSON in an attempt to keep the codebase tidy and easier to extend. */ static void match_recurse(void on_match(void *ctx, JPRef * ref), void *ctx, ListCell *path, JPRef * ref) { jp_element *elem; JSON *json, *child; if (path == NULL) { /* The end of the JSONPath list is the "accept" state. */ on_match(ctx, ref); return; } elem = lfirst(path); if (ref->type == JP_REF_NODE) json = ref->u.node; else json = NULL; switch (elem->type) { case JP_WILDCARD: if (json != NULL) { json_foreach(child, json) match_recurse(on_match, ctx, lnext(path), mkRefNode(child)); } break; case JP_INDEX_SUBSCRIPT: if (json != NULL && json->type == JSON_ARRAY && elem->data.index >= 0 && (size_t) elem->data.index < json->v.children.count) { size_t i; for (child = json->v.children.head, i = 0; child != NULL && i < (size_t) elem->data.index; child = child->next, i++) { } /* * If this fails, it means json->v.children.count was greater * than the actual number of children. */ Assert(i == elem->data.index && child != NULL); match_recurse(on_match, ctx, lnext(path), mkRefNode(child)); } break; case JP_KEY_SUBSCRIPT: if (json != NULL && json->type == JSON_OBJECT) { json_foreach(child, json) { if (child->key != NULL && child->key_length == elem->data.key.length && memcmp(child->key, elem->data.key.ptr, child->key_length) == 0) { match_recurse(on_match, ctx, lnext(path), mkRefNode(child)); } } } break; case JP_CALL_CHAR: if (json != NULL && json->type == JSON_STRING && elem->data.index >= 0) { const char *sub_start; size_t sub_bytes; size_t sub_length; sub_length = utf8_substring( json->v.string.str, json->v.string.length, elem->data.index, 1, &sub_start, &sub_bytes); if (sub_length == 1) match_recurse(on_match, ctx, lnext(path), mkRefChar(sub_start, sub_bytes)); } else if (ref->type == JP_REF_CHAR && elem->data.index == 0) { /* char(0) on a character yields itself. */ match_recurse(on_match, ctx, lnext(path), ref); } break; default:; } if (elem->recursive_descent && json != NULL) { json_foreach(child, json) { if (!child->jp_changed) match_recurse(on_match, ctx, path, mkRefNode(child)); } } } static void jp_match_callback(List **results, JPRef * ref) { *results = lappend(*results, ref); } /* * jp_match * Match a parsed JSONPath expression against a JSON tree, * yielding a List of JPRef* items. * * To convert the JPRef* items to JSON-formatted strings, use jpref_encode. */ List * jp_match(JSONPath * jp, JSON * json) { ListCell *lc = jp_head(jp); List *results = NIL; match_recurse((void *) jp_match_callback, &results, lc, mkRefNode(json)); return results; } static void jp_set_callback(JSON * value, JPRef * ref) { switch (ref->type) { case JP_REF_NODE: json_replace_value(ref->u.node, value); ref->u.node->jp_changed = true; break; default:; /* Do nothing if ref is immutable. */ } } /* * jp_set * Set all elements that match a parsed JSONPath expression * in a JSON tree to a new value. * * Note that jp_set uses json_replace_value so it doesn't have to deep-copy * on every assignment if @value is a tree. This means that parent pointers * of the resulting tree will not be trustworthy. */ void jp_set(JSONPath * jp, JSON * json, JSON * value) { ListCell *lc = jp_head(jp); match_recurse((void *) jp_set_callback, value, lc, mkRefNode(json)); } /* * jpref_encode * Convert a JPRef to a JSON-formatted string. */ char * jpref_encode(JPRef * ref) { switch (ref->type) { case JP_REF_NODE: return json_encode(ref->u.node, JSONOPT_USE_ORIG); case JP_REF_CHAR: return json_encode_string(ref->u.chr.bytes, ref->u.chr.length, '"', false); default: Assert(false); return NULL; } }