summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeodor Sigaev2008-06-30 18:30:48 +0000
committerTeodor Sigaev2008-06-30 18:30:48 +0000
commitd90c40dc6624458fe2fdbcf214feada1e6793dc4 (patch)
tree92ae9348715013928a8741976419d81cad86c1db
parent13928f0e4ccea0d3a5e6aafa2a7f3e4e2142df9b (diff)
ltree support for multibyte encodings. Patch was made by
laser <[email protected]> with some editorization by me.
-rw-r--r--contrib/ltree/lquery_op.c33
-rw-r--r--contrib/ltree/ltree.h23
-rw-r--r--contrib/ltree/ltree_io.c120
-rw-r--r--contrib/ltree/ltxtquery_io.c35
-rw-r--r--contrib/ltree/ltxtquery_op.c2
5 files changed, 131 insertions, 82 deletions
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 51d372cdba..0752e6380d 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -8,6 +8,7 @@
#include <ctype.h>
#include "utils/array.h"
+#include "utils/formatting.h"
#include "ltree.h"
PG_FUNCTION_INFO_V1(ltq_regex);
@@ -32,23 +33,24 @@ static char *
getlexeme(char *start, char *end, int *len)
{
char *ptr;
-
- while (start < end && *start == '_')
- start++;
+ int charlen;
+
+ while (start < end && (charlen = pg_mblen(start)) == 1 && t_iseq(start,'_') )
+ start += charlen;
ptr = start;
- if (ptr == end)
+ if (ptr >= end)
return NULL;
- while (ptr < end && *ptr != '_')
- ptr++;
+ while (ptr < end && !( (charlen = pg_mblen(ptr)) == 1 && t_iseq(ptr, '_') ) )
+ ptr += charlen;
*len = ptr - start;
return start;
}
bool
- compare_subnode(ltree_level * t, char *qn, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend)
+compare_subnode(ltree_level * t, char *qn, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend)
{
char *endt = t->name + t->len;
char *endq = qn + len;
@@ -85,6 +87,21 @@ bool
return true;
}
+int
+ltree_strncasecmp(const char *a, const char *b, size_t s)
+{
+ char *al = str_tolower(a, s);
+ char *bl = str_tolower(b, s);
+ int res;
+
+ res = strncmp(al, bl,s);
+
+ pfree(al);
+ pfree(bl);
+
+ return res;
+}
+
static bool
checkLevel(lquery_level * curq, ltree_level * curt)
{
@@ -94,7 +111,7 @@ checkLevel(lquery_level * curq, ltree_level * curt)
for (i = 0; i < curq->numvar; i++)
{
- cmpptr = (curvar->flag & LVAR_INCASE) ? pg_strncasecmp : strncmp;
+ cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
if (curvar->flag & LVAR_SUBLEXEME)
{
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 8d39c9ec39..a1a5f0ea7f 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -3,15 +3,17 @@
#ifndef __LTREE_H__
#define __LTREE_H__
+#include "postgres.h"
#include "fmgr.h"
+#include "tsearch/ts_locale.h"
typedef struct
{
- uint8 len;
+ uint16 len;
char name[1];
} ltree_level;
-#define LEVEL_HDRSIZE (sizeof(uint8))
+#define LEVEL_HDRSIZE (offsetof(ltree_level,name))
#define LEVEL_NEXT(x) ( (ltree_level*)( ((char*)(x)) + MAXALIGN(((ltree_level*)(x))->len + LEVEL_HDRSIZE) ) )
typedef struct
@@ -21,7 +23,7 @@ typedef struct
char data[1];
} ltree;
-#define LTREE_HDRSIZE MAXALIGN(VARHDRSZ + sizeof(uint16))
+#define LTREE_HDRSIZE MAXALIGN( offsetof(ltree, data) )
#define LTREE_FIRST(x) ( (ltree_level*)( ((char*)(x))+LTREE_HDRSIZE ) )
@@ -30,12 +32,12 @@ typedef struct
typedef struct
{
int4 val;
- uint8 len;
+ uint16 len;
uint8 flag;
char name[1];
} lquery_variant;
-#define LVAR_HDRSIZE MAXALIGN(sizeof(uint8)*2 + sizeof(int4))
+#define LVAR_HDRSIZE MAXALIGN(offsetof(lquery_variant, name))
#define LVAR_NEXT(x) ( (lquery_variant*)( ((char*)(x)) + MAXALIGN(((lquery_variant*)(x))->len) + LVAR_HDRSIZE ) )
#define LVAR_ANYEND 0x01
@@ -52,7 +54,7 @@ typedef struct
char variants[1];
} lquery_level;
-#define LQL_HDRSIZE MAXALIGN( sizeof(uint16)*5 )
+#define LQL_HDRSIZE MAXALIGN( offsetof(lquery_level,variants) )
#define LQL_NEXT(x) ( (lquery_level*)( ((char*)(x)) + MAXALIGN(((lquery_level*)(x))->totallen) ) )
#define LQL_FIRST(x) ( (lquery_variant*)( ((char*)(x))+LQL_HDRSIZE ) )
@@ -73,12 +75,12 @@ typedef struct
char data[1];
} lquery;
-#define LQUERY_HDRSIZE MAXALIGN(VARHDRSZ + 3*sizeof(uint16))
+#define LQUERY_HDRSIZE MAXALIGN( offsetof(lquery, data) )
#define LQUERY_FIRST(x) ( (lquery_level*)( ((char*)(x))+LQUERY_HDRSIZE ) )
#define LQUERY_HASNOT 0x01
-#define ISALNUM(x) ( isalnum((unsigned char)(x)) || (x) == '_' )
+#define ISALNUM(x) ( t_isalpha(x) || t_isdigit(x) || ( pg_mblen(x) == 1 && t_iseq((x), '_') ) )
/* full text query */
@@ -156,9 +158,10 @@ bool ltree_execute(ITEM * curitem, void *checkval,
int ltree_compare(const ltree * a, const ltree * b);
bool inner_isparent(const ltree * c, const ltree * p);
-bool compare_subnode(ltree_level * t, char *q, int len,
- int (*cmpptr) (const char *, const char *, size_t), bool anyend);
+bool compare_subnode(ltree_level * t, char *q, int len,
+ int (*cmpptr) (const char *, const char *, size_t), bool anyend);
ltree *lca_inner(ltree ** a, int len);
+int ltree_strncasecmp(const char *a, const char *b, size_t s);
#define PG_GETARG_LTREE(x) ((ltree*)DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(x))))
#define PG_GETARG_LTREE_COPY(x) ((ltree*)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(x))))
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index bd50105487..db77579215 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -25,15 +25,16 @@ Datum lquery_out(PG_FUNCTION_ARGS);
#define UNCHAR ereport(ERROR, \
(errcode(ERRCODE_SYNTAX_ERROR), \
- errmsg("syntax error at position %d near \"%c\"", \
- (int)(ptr-buf), *ptr)));
+ errmsg("syntax error at position %d", \
+ pos)));
typedef struct
{
char *start;
- int len;
+ int len; /* length in bytes */
int flag;
+ int wlen; /* length in characters */
} nodeitem;
#define LTPRS_WAITNAME 0
@@ -51,24 +52,30 @@ ltree_in(PG_FUNCTION_ARGS)
int state = LTPRS_WAITNAME;
ltree *result;
ltree_level *curlevel;
+ int charlen;
+ int pos=0;
ptr = buf;
while (*ptr)
{
- if (*ptr == '.')
+ charlen = pg_mblen(ptr);
+ if ( charlen == 1 && t_iseq(ptr, '.') )
num++;
- ptr++;
+ ptr+=charlen;
}
list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
ptr = buf;
while (*ptr)
{
+ charlen = pg_mblen(ptr);
+
if (state == LTPRS_WAITNAME)
{
- if (ISALNUM(*ptr))
+ if (ISALNUM(ptr))
{
lptr->start = ptr;
+ lptr->wlen = 0;
state = LTPRS_WAITDELIM;
}
else
@@ -76,40 +83,43 @@ ltree_in(PG_FUNCTION_ARGS)
}
else if (state == LTPRS_WAITDELIM)
{
- if (*ptr == '.')
+ if ( charlen == 1 && t_iseq(ptr, '.') )
{
lptr->len = ptr - lptr->start;
- if (lptr->len > 255)
+ if (lptr->wlen > 255)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("name of level is too long"),
errdetail("Name length is %d, must "
"be < 256, in position %d.",
- lptr->len, (int) (lptr->start - buf))));
+ lptr->wlen, pos)));
totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
lptr++;
state = LTPRS_WAITNAME;
}
- else if (!ISALNUM(*ptr))
+ else if (!ISALNUM(ptr))
UNCHAR;
}
else
/* internal error */
elog(ERROR, "internal error in parser");
- ptr++;
+
+ ptr+=charlen;
+ lptr->wlen++;
+ pos++;
}
if (state == LTPRS_WAITDELIM)
{
lptr->len = ptr - lptr->start;
- if (lptr->len > 255)
+ if (lptr->wlen > 255)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("name of level is too long"),
errdetail("Name length is %d, must "
"be < 256, in position %d.",
- lptr->len, (int) (lptr->start - buf))));
+ lptr->wlen, pos)));
totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
lptr++;
@@ -127,7 +137,7 @@ ltree_in(PG_FUNCTION_ARGS)
lptr = list;
while (lptr - list < result->numlevel)
{
- curlevel->len = (uint8) lptr->len;
+ curlevel->len = (uint16) lptr->len;
memcpy(curlevel->name, lptr->start, lptr->len);
curlevel = LEVEL_NEXT(curlevel);
lptr++;
@@ -198,15 +208,23 @@ lquery_in(PG_FUNCTION_ARGS)
lquery_variant *lrptr = NULL;
bool hasnot = false;
bool wasbad = false;
+ int charlen;
+ int pos=0;
ptr = buf;
while (*ptr)
{
- if (*ptr == '.')
- num++;
- else if (*ptr == '|')
- numOR++;
- ptr++;
+ charlen = pg_mblen(ptr);
+
+ if ( charlen == 1 )
+ {
+ if (t_iseq(ptr, '.'))
+ num++;
+ else if (t_iseq(ptr, '|'))
+ numOR++;
+ }
+
+ ptr+=charlen;
}
num++;
@@ -214,16 +232,18 @@ lquery_in(PG_FUNCTION_ARGS)
ptr = buf;
while (*ptr)
{
+ charlen = pg_mblen(ptr);
+
if (state == LQPRS_WAITLEVEL)
{
- if (ISALNUM(*ptr))
+ if (ISALNUM(ptr))
{
GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
lptr->start = ptr;
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
}
- else if (*ptr == '!')
+ else if (charlen==1 && t_iseq(ptr, '!'))
{
GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
lptr->start = ptr + 1;
@@ -232,14 +252,14 @@ lquery_in(PG_FUNCTION_ARGS)
curqlevel->flag |= LQL_NOT;
hasnot = true;
}
- else if (*ptr == '*')
+ else if (charlen==1 && t_iseq(ptr, '*'))
state = LQPRS_WAITOPEN;
else
UNCHAR;
}
else if (state == LQPRS_WAITVAR)
{
- if (ISALNUM(*ptr))
+ if (ISALNUM(ptr))
{
lptr++;
lptr->start = ptr;
@@ -251,61 +271,61 @@ lquery_in(PG_FUNCTION_ARGS)
}
else if (state == LQPRS_WAITDELIM)
{
- if (*ptr == '@')
+ if (charlen==1 && t_iseq(ptr, '@'))
{
if (lptr->start == ptr)
UNCHAR;
lptr->flag |= LVAR_INCASE;
curqlevel->flag |= LVAR_INCASE;
}
- else if (*ptr == '*')
+ else if (charlen==1 && t_iseq(ptr, '*'))
{
if (lptr->start == ptr)
UNCHAR;
lptr->flag |= LVAR_ANYEND;
curqlevel->flag |= LVAR_ANYEND;
}
- else if (*ptr == '%')
+ else if (charlen==1 && t_iseq(ptr, '%'))
{
if (lptr->start == ptr)
UNCHAR;
lptr->flag |= LVAR_SUBLEXEME;
curqlevel->flag |= LVAR_SUBLEXEME;
}
- else if (*ptr == '|')
+ else if (charlen==1 && t_iseq(ptr, '|'))
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
- if (lptr->len > 255)
+ if (lptr->wlen > 255)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("name of level is too long"),
errdetail("Name length is %d, must "
"be < 256, in position %d.",
- lptr->len, (int) (lptr->start - buf))));
+ lptr->wlen, pos)));
state = LQPRS_WAITVAR;
}
- else if (*ptr == '.')
+ else if (charlen==1 && t_iseq(ptr, '.'))
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
- if (lptr->len > 255)
+ if (lptr->wlen > 255)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("name of level is too long"),
errdetail("Name length is %d, must "
"be < 256, in position %d.",
- lptr->len, (int) (lptr->start - buf))));
+ lptr->wlen, pos)));
state = LQPRS_WAITLEVEL;
curqlevel = NEXTLEV(curqlevel);
}
- else if (ISALNUM(*ptr))
+ else if (ISALNUM(ptr))
{
if (lptr->flag)
UNCHAR;
@@ -315,9 +335,9 @@ lquery_in(PG_FUNCTION_ARGS)
}
else if (state == LQPRS_WAITOPEN)
{
- if (*ptr == '{')
+ if (charlen==1 && t_iseq(ptr, '{'))
state = LQPRS_WAITFNUM;
- else if (*ptr == '.')
+ else if (charlen==1 && t_iseq(ptr, '.'))
{
curqlevel->low = 0;
curqlevel->high = 0xffff;
@@ -329,9 +349,9 @@ lquery_in(PG_FUNCTION_ARGS)
}
else if (state == LQPRS_WAITFNUM)
{
- if (*ptr == ',')
+ if (charlen==1 && t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
- else if (isdigit((unsigned char) *ptr))
+ else if (t_isdigit(ptr))
{
curqlevel->low = atoi(ptr);
state = LQPRS_WAITND;
@@ -341,12 +361,12 @@ lquery_in(PG_FUNCTION_ARGS)
}
else if (state == LQPRS_WAITSNUM)
{
- if (isdigit((unsigned char) *ptr))
+ if (t_isdigit(ptr))
{
curqlevel->high = atoi(ptr);
state = LQPRS_WAITCLOSE;
}
- else if (*ptr == '}')
+ else if (charlen==1 && t_iseq(ptr, '}'))
{
curqlevel->high = 0xffff;
state = LQPRS_WAITEND;
@@ -356,26 +376,26 @@ lquery_in(PG_FUNCTION_ARGS)
}
else if (state == LQPRS_WAITCLOSE)
{
- if (*ptr == '}')
+ if (charlen==1 && t_iseq(ptr, '}'))
state = LQPRS_WAITEND;
- else if (!isdigit((unsigned char) *ptr))
+ else if (!t_isdigit(ptr))
UNCHAR;
}
else if (state == LQPRS_WAITND)
{
- if (*ptr == '}')
+ if (charlen==1 && t_iseq(ptr, '}'))
{
curqlevel->high = curqlevel->low;
state = LQPRS_WAITEND;
}
- else if (*ptr == ',')
+ else if (charlen==1 && t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
- else if (!isdigit((unsigned char) *ptr))
+ else if (!t_isdigit(ptr))
UNCHAR;
}
else if (state == LQPRS_WAITEND)
{
- if (*ptr == '.')
+ if (charlen==1 && t_iseq(ptr, '.'))
{
state = LQPRS_WAITLEVEL;
curqlevel = NEXTLEV(curqlevel);
@@ -386,7 +406,11 @@ lquery_in(PG_FUNCTION_ARGS)
else
/* internal error */
elog(ERROR, "internal error in parser");
- ptr++;
+
+ ptr+=charlen;
+ if ( state == LQPRS_WAITDELIM )
+ lptr->wlen++;
+ pos++;
}
if (state == LQPRS_WAITDELIM)
@@ -407,13 +431,13 @@ lquery_in(PG_FUNCTION_ARGS)
errmsg("syntax error"),
errdetail("Unexpected end of line.")));
- if (lptr->len > 255)
+ if (lptr->wlen > 255)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("name of level is too long"),
errdetail("Name length is %d, must "
"be < 256, in position %d.",
- lptr->len, (int) (lptr->start - buf))));
+ lptr->wlen, pos)));
}
else if (state == LQPRS_WAITOPEN)
curqlevel->high = 0xffff;
diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c
index 340370810b..12994d46a8 100644
--- a/contrib/ltree/ltxtquery_io.c
+++ b/contrib/ltree/ltxtquery_io.c
@@ -59,49 +59,53 @@ typedef struct
static int4
gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, uint16 *flag)
{
- while (1)
+ int charlen;
+
+ for(;;)
{
+ charlen = pg_mblen(state->buf);
+
switch (state->state)
{
case WAITOPERAND:
- if (*(state->buf) == '!')
+ if (charlen==1 && t_iseq(state->buf, '!'))
{
(state->buf)++;
*val = (int4) '!';
return OPR;
}
- else if (*(state->buf) == '(')
+ else if (charlen==1 && t_iseq(state->buf, '('))
{
state->count++;
(state->buf)++;
return OPEN;
}
- else if (ISALNUM(*(state->buf)))
+ else if (ISALNUM(state->buf))
{
state->state = INOPERAND;
*strval = state->buf;
- *lenval = 1;
+ *lenval = charlen;
*flag = 0;
}
- else if (!isspace((unsigned char) *(state->buf)))
+ else if (!t_isspace(state->buf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand syntax error")));
break;
case INOPERAND:
- if (ISALNUM(*(state->buf)))
+ if (ISALNUM(state->buf))
{
if (*flag)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("modificators syntax error")));
- (*lenval)++;
+ *lenval += charlen;
}
- else if (*(state->buf) == '%')
+ else if (charlen==1 && t_iseq(state->buf, '%'))
*flag |= LVAR_SUBLEXEME;
- else if (*(state->buf) == '@')
+ else if (charlen==1 && t_iseq(state->buf, '@'))
*flag |= LVAR_INCASE;
- else if (*(state->buf) == '*')
+ else if (charlen==1 && t_iseq(state->buf, '*'))
*flag |= LVAR_ANYEND;
else
{
@@ -110,14 +114,14 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, uint1
}
break;
case WAITOPERATOR:
- if (*(state->buf) == '&' || *(state->buf) == '|')
+ if (charlen==1 && ( t_iseq(state->buf, '&') || t_iseq(state->buf, '|') ))
{
state->state = WAITOPERAND;
*val = (int4) *(state->buf);
(state->buf)++;
return OPR;
}
- else if (*(state->buf) == ')')
+ else if (charlen==1 && t_iseq(state->buf, ')'))
{
(state->buf)++;
state->count--;
@@ -125,14 +129,15 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, uint1
}
else if (*(state->buf) == '\0')
return (state->count) ? ERR : END;
- else if (*(state->buf) != ' ')
+ else if (charlen==1 && !t_iseq(state->buf, ' '))
return ERR;
break;
default:
return ERR;
break;
}
- (state->buf)++;
+
+ state->buf += charlen;
}
return END;
}
diff --git a/contrib/ltree/ltxtquery_op.c b/contrib/ltree/ltxtquery_op.c
index 3e73005e12..bc67489147 100644
--- a/contrib/ltree/ltxtquery_op.c
+++ b/contrib/ltree/ltxtquery_op.c
@@ -57,7 +57,7 @@ checkcondition_str(void *checkval, ITEM * val)
char *op = ((CHKVAL *) checkval)->operand + val->distance;
int (*cmpptr) (const char *, const char *, size_t);
- cmpptr = (val->flag & LVAR_INCASE) ? pg_strncasecmp : strncmp;
+ cmpptr = (val->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
while (tlen > 0)
{
if (val->flag & LVAR_SUBLEXEME)