/* * stringutil.c - some tools for string handling * * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include "stringutil.h" #ifndef SET_VARSIZE #define SET_VARSIZE(x, len) VARATT_SIZEP(x) = len #endif StringInfo pgq_init_varbuf(void) { StringInfo buf; buf = makeStringInfo(); appendStringInfoString(buf, "XXXX"); return buf; } Datum pgq_finish_varbuf(StringInfo buf) { if (!buf) return (Datum)0; SET_VARSIZE(buf->data, buf->len); return PointerGetDatum(buf->data); } /* * Find a string in comma-separated list. * * It does not support space inside tokens. */ bool pgq_strlist_contains(const char *liststr, const char *str) { int c, len = strlen(str); const char *p, *listpos = liststr; loop: /* find string fragment, later check if actual token */ p = strstr(listpos, str); if (p == NULL) return false; /* move listpos further */ listpos = p + len; /* survive len=0 and avoid unneccesary compare */ if (*listpos) listpos++; /* check previous symbol */ if (p > liststr) { c = *(p - 1); if (!isspace(c) && c != ',') goto loop; } /* check following symbol */ c = p[len]; if (c != 0 && !isspace(c) && c != ',') goto loop; return true; } /* * quoting */ static int pgq_urlencode(char *dst, const uint8 *src, int srclen) { static const char hextbl[] = "0123456789abcdef"; const uint8 *end = src + srclen; char *p = dst; while (src < end) { unsigned c = *src++; if (c == ' ') { *p++ = '+'; } else if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || c == '.' || c == '-') { *p++ = c; } else { *p++ = '%'; *p++ = hextbl[c >> 4]; *p++ = hextbl[c & 15]; } } return p - dst; } static int pgq_quote_literal(char *dst, const uint8 *src, int srclen) { const uint8 *cp1 = src, *src_end = src + srclen; char *cp2 = dst; bool is_ext = false; *cp2++ = '\''; while (cp1 < src_end) { int wl = pg_mblen((const char *)cp1); if (wl != 1) { while (wl-- > 0 && cp1 < src_end) *cp2++ = *cp1++; continue; } if (*cp1 == '\'') { *cp2++ = '\''; } else if (*cp1 == '\\') { if (!is_ext) { /* make room for 'E' */ memmove(dst + 1, dst, cp2 - dst); *dst = 'E'; is_ext = true; cp2++; } *cp2++ = '\\'; } *cp2++ = *cp1++; } *cp2++ = '\''; return cp2 - dst; } /* check if ident is keyword that needs quoting */ static bool is_keyword(const char *ident) { const ScanKeyword *kw; /* do the lookup */ #if PG_VERSION_NUM >= 80500 kw = ScanKeywordLookup(ident, ScanKeywords, NumScanKeywords); #else kw = ScanKeywordLookup(ident); #endif /* unreserved? */ #if PG_VERSION_NUM >= 80300 if (kw && kw->category == UNRESERVED_KEYWORD) return false; #endif /* found anything? */ return kw != NULL; } /* * pgq_quote_ident - Quote an identifier only if needed */ static int pgq_quote_ident(char *dst, const uint8 *src, int srclen) { /* * Can avoid quoting if ident starts with a lowercase letter or * underscore and contains only lowercase letters, digits, and * underscores, *and* is not any SQL keyword. Otherwise, supply * quotes. */ int nquotes = 0; bool safe; const char *ptr; char *optr; char ident[NAMEDATALEN + 1]; /* expect idents be not bigger than NAMEDATALEN */ if (srclen > NAMEDATALEN) srclen = NAMEDATALEN; memcpy(ident, src, srclen); ident[srclen] = 0; /* * would like to use macros here, but they might yield * unwanted locale-specific results... */ safe = ((ident[0] >= 'a' && ident[0] <= 'z') || ident[0] == '_'); for (ptr = ident; *ptr; ptr++) { char ch = *ptr; if ((ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || (ch == '_')) continue; /* okay */ safe = false; if (ch == '"') nquotes++; } if (safe) { if (is_keyword(ident)) safe = false; } optr = dst; if (!safe) *optr++ = '"'; for (ptr = ident; *ptr; ptr++) { char ch = *ptr; if (ch == '"') *optr++ = '"'; *optr++ = ch; } if (!safe) *optr++ = '"'; return optr - dst; } static char *start_append(StringInfo buf, int alloc_len) { enlargeStringInfo(buf, alloc_len); return buf->data + buf->len; } static void finish_append(StringInfo buf, int final_len) { if (buf->len + final_len > buf->maxlen) elog(FATAL, "buffer overflow"); buf->len += final_len; } static void tbuf_encode_data(StringInfo buf, const uint8 *data, int len, enum PgqEncode encoding) { int dlen = 0; char *dst; switch (encoding) { case TBUF_QUOTE_LITERAL: dst = start_append(buf, len * 2 + 3); dlen = pgq_quote_literal(dst, data, len); break; case TBUF_QUOTE_IDENT: dst = start_append(buf, len * 2 + 2); dlen = pgq_quote_ident(dst, data, len); break; case TBUF_QUOTE_URLENC: dst = start_append(buf, len * 3 + 2); dlen = pgq_urlencode(dst, data, len); break; default: elog(ERROR, "bad encoding"); } finish_append(buf, dlen); } void pgq_encode_cstring(StringInfo tbuf, const char *str, enum PgqEncode encoding) { if (str == NULL) elog(ERROR, "tbuf_encode_cstring: NULL"); tbuf_encode_data(tbuf, (const uint8 *)str, strlen(str), encoding); }