summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2007-08-25 00:03:59 +0000
committerTom Lane2007-08-25 00:03:59 +0000
commit427cf5034bb7ff9c6893cf51b81bf09f26d5632f (patch)
tree44af6fe58a7435c0dc2f0ae6bf0cc407c2bd2a2c
parent59ecd36f65eca6cb7c0be25bcbe21d3462c74ded (diff)
Cleanup for some problems in tsearch patch:
- ispell initialization crashed on empty dictionary file - ispell initialization crashed on affix file with prefixes but no suffixes - stop words file was run through pg_verify_mbstr, with database encoding, but it's supposed to be UTF-8; similar bug for synonym files - bunch of comments added, typos fixed, and other cleanup Introduced consistent encoding checking/conversion of data read from tsearch configuration files, by doing this in a single t_readline() subroutine (replacing direct usages of fgets). Cleaned up API for readstopwords too. Heikki Linnakangas
-rw-r--r--src/backend/snowball/dict_snowball.c4
-rw-r--r--src/backend/tsearch/dict_ispell.c4
-rw-r--r--src/backend/tsearch/dict_simple.c17
-rw-r--r--src/backend/tsearch/dict_synonym.c82
-rw-r--r--src/backend/tsearch/dict_thesaurus.c28
-rw-r--r--src/backend/tsearch/spell.c261
-rw-r--r--src/backend/tsearch/ts_locale.c48
-rw-r--r--src/backend/tsearch/ts_parse.c24
-rw-r--r--src/backend/tsearch/ts_utils.c69
-rw-r--r--src/backend/tsearch/wparser.c8
-rw-r--r--src/include/tsearch/dicts/spell.h38
-rw-r--r--src/include/tsearch/ts_locale.h10
-rw-r--r--src/include/tsearch/ts_public.h12
-rw-r--r--src/include/tsearch/ts_utils.h36
14 files changed, 327 insertions, 314 deletions
diff --git a/src/backend/snowball/dict_snowball.c b/src/backend/snowball/dict_snowball.c
index 0a211abd3f..92010e3e9e 100644
--- a/src/backend/snowball/dict_snowball.c
+++ b/src/backend/snowball/dict_snowball.c
@@ -192,7 +192,6 @@ dsnowball_init(PG_FUNCTION_ARGS)
ListCell *l;
d = (DictSnowball *) palloc0(sizeof(DictSnowball));
- d->stoplist.wordop = recode_and_lowerstr;
foreach(l, dictoptions)
{
@@ -204,8 +203,7 @@ dsnowball_init(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
- readstoplist(defGetString(defel), &d->stoplist);
- sortstoplist(&d->stoplist);
+ readstoplist(defGetString(defel), &d->stoplist, lowerstr);
stoploaded = true;
}
else if (pg_strcasecmp("Language", defel->defname) == 0)
diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c
index 8ebda3711e..e54bb87b93 100644
--- a/src/backend/tsearch/dict_ispell.c
+++ b/src/backend/tsearch/dict_ispell.c
@@ -39,7 +39,6 @@ dispell_init(PG_FUNCTION_ARGS)
ListCell *l;
d = (DictISpell *) palloc0(sizeof(DictISpell));
- d->stoplist.wordop = recode_and_lowerstr;
foreach(l, dictoptions)
{
@@ -73,8 +72,7 @@ dispell_init(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
- readstoplist(defGetString(defel), &(d->stoplist));
- sortstoplist(&(d->stoplist));
+ readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
stoploaded = true;
}
else
diff --git a/src/backend/tsearch/dict_simple.c b/src/backend/tsearch/dict_simple.c
index 78ec1214a6..29ddd1e8f3 100644
--- a/src/backend/tsearch/dict_simple.c
+++ b/src/backend/tsearch/dict_simple.c
@@ -23,19 +23,17 @@
typedef struct
{
StopList stoplist;
-} DictExample;
+} DictSimple;
Datum
dsimple_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
- DictExample *d = (DictExample *) palloc0(sizeof(DictExample));
+ DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple));
bool stoploaded = false;
ListCell *l;
- d->stoplist.wordop = recode_and_lowerstr;
-
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
@@ -46,8 +44,7 @@ dsimple_init(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
- readstoplist(defGetString(defel), &d->stoplist);
- sortstoplist(&d->stoplist);
+ readstoplist(defGetString(defel), &d->stoplist, lowerstr);
stoploaded = true;
}
else
@@ -65,16 +62,16 @@ dsimple_init(PG_FUNCTION_ARGS)
Datum
dsimple_lexize(PG_FUNCTION_ARGS)
{
- DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
+ DictSimple *d = (DictSimple *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
- char *txt = lowerstr_with_len(in, len);
+ char *txt;
TSLexeme *res = palloc0(sizeof(TSLexeme) * 2);
+ txt = lowerstr_with_len(in, len);
+
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
- {
pfree(txt);
- }
else
res[0].lexeme = txt;
diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c
index 33876fe7a2..46f6edae2d 100644
--- a/src/backend/tsearch/dict_synonym.c
+++ b/src/backend/tsearch/dict_synonym.c
@@ -20,9 +20,6 @@
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
-
-#define SYNBUFLEN 4096
-
typedef struct
{
char *in;
@@ -31,23 +28,34 @@ typedef struct
typedef struct
{
- int len;
+ int len; /* length of syn array */
Syn *syn;
} DictSyn;
+/*
+ * Finds the next whitespace-delimited word within the 'in' string.
+ * Returns a pointer to the first character of the word, and a pointer
+ * to the next byte after the last character in the word (in *end).
+ */
static char *
findwrd(char *in, char **end)
{
char *start;
- *end = NULL;
+ /* Skip leading spaces */
while (*in && t_isspace(in))
in += pg_mblen(in);
+ /* Return NULL on empty lines */
if (*in == '\0')
+ {
+ *end = NULL;
return NULL;
+ }
+
start = in;
+ /* Find end of word */
while (*in && !t_isspace(in))
in += pg_mblen(in);
@@ -70,12 +78,11 @@ dsynonym_init(PG_FUNCTION_ARGS)
ListCell *l;
char *filename = NULL;
FILE *fin;
- char buf[SYNBUFLEN];
char *starti,
*starto,
*end = NULL;
int cur = 0;
- int slen;
+ char *line = NULL;
foreach(l, dictoptions)
{
@@ -105,10 +112,33 @@ dsynonym_init(PG_FUNCTION_ARGS)
d = (DictSyn *) palloc0(sizeof(DictSyn));
- while (fgets(buf, SYNBUFLEN, fin))
+ while ((line = t_readline(fin)) != NULL)
{
- slen = strlen(buf);
- pg_verifymbstr(buf, slen, false);
+ starti = findwrd(line, &end);
+ if (!starti)
+ {
+ /* Empty line */
+ goto skipline;
+ }
+ *end = '\0';
+ if (end >= line + strlen(line))
+ {
+ /* A line with only one word. Ignore silently. */
+ goto skipline;
+ }
+
+ starto = findwrd(end + 1, &end);
+ if (!starto)
+ {
+ /* A line with only one word. Ignore silently. */
+ goto skipline;
+ }
+ *end = '\0';
+
+ /* starti now points to the first word, and starto to the second
+ * word on the line, with a \0 terminator at the end of both words.
+ */
+
if (cur == d->len)
{
if (d->len == 0)
@@ -123,36 +153,19 @@ dsynonym_init(PG_FUNCTION_ARGS)
}
}
- starti = findwrd(buf, &end);
- if (!starti)
- continue;
- *end = '\0';
- if (end >= buf + slen)
- continue;
-
- starto = findwrd(end + 1, &end);
- if (!starto)
- continue;
- *end = '\0';
-
- d->syn[cur].in = recode_and_lowerstr(starti);
- d->syn[cur].out = recode_and_lowerstr(starto);
- if (!(d->syn[cur].in && d->syn[cur].out))
- {
- FreeFile(fin);
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
- }
+ d->syn[cur].in = lowerstr(starti);
+ d->syn[cur].out = lowerstr(starto);
cur++;
+
+ skipline:
+ pfree(line);
}
FreeFile(fin);
d->len = cur;
- if (cur > 1)
- qsort(d->syn, d->len, sizeof(Syn), compareSyn);
+ qsort(d->syn, d->len, sizeof(Syn), compareSyn);
PG_RETURN_POINTER(d);
}
@@ -179,8 +192,7 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
if (!found)
PG_RETURN_POINTER(NULL);
- res = palloc(sizeof(TSLexeme) * 2);
- memset(res, 0, sizeof(TSLexeme) * 2);
+ res = palloc0(sizeof(TSLexeme) * 2);
res[0].lexeme = pstrdup(found->out);
PG_RETURN_POINTER(res);
diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c
index 3428fef2fb..dbe5fdb25e 100644
--- a/src/backend/tsearch/dict_thesaurus.c
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -170,10 +170,10 @@ static void
thesaurusRead(char *filename, DictThesaurus * d)
{
FILE *fh;
- char str[BUFSIZ];
int lineno = 0;
uint16 idsubst = 0;
bool useasis = false;
+ char *line;
filename = get_tsearch_config_filename(filename, "ths");
fh = AllocateFile(filename, "r");
@@ -183,27 +183,28 @@ thesaurusRead(char *filename, DictThesaurus * d)
errmsg("could not open thesaurus file \"%s\": %m",
filename)));
- while (fgets(str, sizeof(str), fh))
+ while ((line = t_readline(fh)) != NULL)
{
- char *ptr,
- *recoded;
+ char *ptr;
int state = TR_WAITLEX;
char *beginwrd = NULL;
uint16 posinsubst = 0;
uint16 nwrd = 0;
- ptr = recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
- GetDatabaseEncoding(), PG_UTF8);
- if (recoded == NULL)
- elog(ERROR, "encoding conversion failed");
-
lineno++;
- /* is it comment ? */
- while (t_isspace(ptr))
+ ptr = line;
+
+ /* is it a comment? */
+ while (*ptr && t_isspace(ptr))
ptr += pg_mblen(ptr);
- if (t_iseq(recoded, '#') || *recoded == '\0' || t_iseq(recoded, '\n') || t_iseq(recoded, '\r'))
+
+ if (t_iseq(ptr, '#') || *ptr == '\0' ||
+ t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
+ {
+ pfree(line);
continue;
+ }
while (*ptr)
{
@@ -301,8 +302,7 @@ thesaurusRead(char *filename, DictThesaurus * d)
lineno, filename)));
}
- if (recoded != str)
- pfree(recoded);
+ pfree(line);
}
d->nsubst = idsubst;
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
index 3644ab7db3..6ff3034ddf 100644
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -21,8 +21,11 @@
/*
- * during initialization dictionary requires a lot
- * of memory, so it will use temporary context
+ * Initialization requires a lot of memory that's not needed
+ * after the initialization is done. In init function,
+ * CurrentMemoryContext is a long lived memory context associated
+ * with the dictionary cache entry, so we use a temporary context
+ * for the short-lived stuff.
*/
static MemoryContext tmpCtx = NULL;
@@ -32,6 +35,9 @@ static MemoryContext tmpCtx = NULL;
static void
checkTmpCtx(void)
{
+ /* XXX: This assumes that CurrentMemoryContext doesn't have
+ * any children other than the one we create here.
+ */
if (CurrentMemoryContext->firstchild == NULL)
{
tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
@@ -74,17 +80,7 @@ cmpspell(const void *s1, const void *s2)
static int
cmpspellaffix(const void *s1, const void *s2)
{
- return (strcmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag));
-}
-
-static char *
-strnduplicate(char *s, int len)
-{
- char *d = (char *) palloc(len + 1);
-
- memcpy(d, s, len);
- d[len] = '\0';
- return d;
+ return (strncmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag, MAXFLAGLEN));
}
static char *
@@ -185,7 +181,7 @@ NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
}
Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
strcpy(Conf->Spell[Conf->nspell]->word, word);
- strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, 16);
+ strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, MAXFLAGLEN);
Conf->nspell++;
}
@@ -197,9 +193,8 @@ NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
void
NIImportDictionary(IspellDict * Conf, const char *filename)
{
- char str[BUFSIZ],
- *pstr;
FILE *dict;
+ char *line;
checkTmpCtx();
@@ -209,19 +204,14 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
errmsg("could not open dictionary file \"%s\": %m",
filename)));
- while (fgets(str, sizeof(str), dict))
+ while ((line = t_readline(dict)) != NULL)
{
- char *s,
- *recoded;
+ char *s, *pstr;
const char *flag;
- recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
- PG_UTF8, GetDatabaseEncoding());
- if (recoded == NULL)
- elog(ERROR, "encoding conversion failed");
-
+ /* Extract flag from the line */
flag = NULL;
- if ((s = findchar(recoded, '/')))
+ if ((s = findchar(line, '/')))
{
*s++ = '\0';
flag = s;
@@ -240,8 +230,8 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
else
flag = "";
-
- s = recoded;
+ /* Remove trailing spaces */
+ s = line;
while (*s)
{
if (t_isspace(s))
@@ -251,13 +241,12 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
}
s += pg_mblen(s);
}
- pstr = lowerstr_ctx(recoded);
+ pstr = lowerstr_ctx(line);
NIAddSpell(Conf, pstr, flag);
pfree(pstr);
- if (recoded != str)
- pfree(recoded);
+ pfree(line);
}
FreeFile(dict);
}
@@ -402,7 +391,7 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
static bool
parse_affentry(char *str, char *mask, char *find, char *repl,
- const char *filename, int line)
+ const char *filename, int lineno)
{
int state = PAE_WAIT_MASK;
char *pmask = mask,
@@ -453,7 +442,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl,
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
}
else if (state == PAE_INFIND)
{
@@ -471,7 +460,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl,
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
}
else if (state == PAE_WAIT_REPL)
{
@@ -489,7 +478,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl,
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
}
else if (state == PAE_INREPL)
{
@@ -507,7 +496,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl,
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
}
else
elog(ERROR, "unknown state in parse_affentry: %d", state);
@@ -522,7 +511,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl,
static void
addFlagValue(IspellDict * Conf, char *s, uint32 val,
- const char *filename, int line)
+ const char *filename, int lineno)
{
while (*s && t_isspace(s))
s++;
@@ -531,13 +520,13 @@ addFlagValue(IspellDict * Conf, char *s, uint32 val,
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
if (pg_mblen(s) != 1)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("multibyte flag character is not allowed at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
Conf->flagval[(unsigned int) *s] = (unsigned char) val;
Conf->usecompound = true;
@@ -546,7 +535,6 @@ addFlagValue(IspellDict * Conf, char *s, uint32 val,
static void
NIImportOOAffixes(IspellDict * Conf, const char *filename)
{
- char str[BUFSIZ];
char type[BUFSIZ],
*ptype = NULL;
char sflag[BUFSIZ];
@@ -560,9 +548,10 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
int flag = 0;
char flagflags = 0;
FILE *affix;
- int line = 0;
+ int lineno = 0;
int scanread = 0;
char scanbuf[BUFSIZ];
+ char *recoded;
checkTmpCtx();
@@ -576,45 +565,41 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
errmsg("could not open affix file \"%s\": %m",
filename)));
- while (fgets(str, sizeof(str), affix))
+ while ((recoded = t_readline(affix)) != NULL)
{
- char *recoded;
-
- recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
- PG_UTF8, GetDatabaseEncoding());
- if (recoded == NULL)
- elog(ERROR, "encoding conversion failed");
-
- line++;
+ lineno++;
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
+ {
+ pfree(recoded);
continue;
+ }
if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
addFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
- FF_COMPOUNDFLAG, filename, line);
+ FF_COMPOUNDFLAG, filename, lineno);
else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
addFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
- FF_COMPOUNDBEGIN, filename, line);
+ FF_COMPOUNDBEGIN, filename, lineno);
else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
addFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
- FF_COMPOUNDLAST, filename, line);
+ FF_COMPOUNDLAST, filename, lineno);
/* COMPOUNDLAST and COMPOUNDEND are synonyms */
else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
addFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
- FF_COMPOUNDLAST, filename, line);
+ FF_COMPOUNDLAST, filename, lineno);
else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
addFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
- FF_COMPOUNDMIDDLE, filename, line);
+ FF_COMPOUNDMIDDLE, filename, lineno);
else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
addFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
- FF_COMPOUNDONLY, filename, line);
+ FF_COMPOUNDONLY, filename, lineno);
else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
addFlagValue(Conf, recoded + strlen("COMPOUNDPERMITFLAG"),
- FF_COMPOUNDPERMITFLAG, filename, line);
+ FF_COMPOUNDPERMITFLAG, filename, lineno);
else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
addFlagValue(Conf, recoded + strlen("COMPOUNDFORBIDFLAG"),
- FF_COMPOUNDFORBIDFLAG, filename, line);
+ FF_COMPOUNDFORBIDFLAG, filename, lineno);
else if (STRNCMP(recoded, "FLAG") == 0)
{
char *s = recoded + strlen("FLAG");
@@ -626,14 +611,13 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("Ispell dictionary supports only default flag value at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
}
- if (recoded != str)
- pfree(recoded);
+ pfree(recoded);
}
FreeFile(affix);
- line = 0;
+ lineno = 0;
sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5);
@@ -643,18 +627,11 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
errmsg("could not open affix file \"%s\": %m",
filename)));
- while (fgets(str, sizeof(str), affix))
+ while ((recoded = t_readline(affix)) != NULL)
{
- char *recoded;
-
- recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
- PG_UTF8, GetDatabaseEncoding());
- if (recoded == NULL)
- elog(ERROR, "encoding conversion failed");
-
- line++;
+ lineno++;
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
- continue;
+ goto nextline;
scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask);
@@ -662,12 +639,12 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
pfree(ptype);
ptype = lowerstr_ctx(type);
if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
- continue;
+ goto nextline;
if (scanread == 4)
{
if (strlen(sflag) != 1)
- continue;
+ goto nextline;
flag = *sflag;
isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false;
pfind = lowerstr_ctx(find);
@@ -683,7 +660,7 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
int aflg = 0;
if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
- continue;
+ goto nextline;
prepl = lowerstr_ctx(repl);
/* affix flag */
if ((ptr = strchr(prepl, '/')) != NULL)
@@ -710,8 +687,8 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
pfree(pmask);
}
- if (recoded != str)
- pfree(recoded);
+ nextline:
+ pfree(recoded);
}
if (ptype)
@@ -733,13 +710,14 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
char find[BUFSIZ];
char repl[BUFSIZ];
char *s;
- int suffixes = 0;
- int prefixes = 0;
+ bool suffixes = false;
+ bool prefixes = false;
int flag = 0;
char flagflags = 0;
FILE *affix;
- int line = 0;
- int oldformat = 0;
+ int lineno = 0;
+ bool oldformat = false;
+ char *recoded = NULL;
checkTmpCtx();
@@ -752,16 +730,16 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
memset(Conf->flagval, 0, sizeof(Conf->flagval));
Conf->usecompound = false;
- while (fgets(str, sizeof(str), affix))
+ while ((recoded = t_readline(affix)) != NULL)
{
- if (pstr)
- pfree(pstr);
+ pstr = lowerstr(recoded);
+ pfree(recoded);
- pstr = recode_and_lowerstr(str);
+ lineno++;
- line++;
+ /* Skip comments and empty lines */
if (*pstr == '#' || *pstr == '\n')
- continue;
+ goto nextline;
if (STRNCMP(pstr, "compoundwords") == 0)
{
@@ -777,23 +755,23 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
Conf->flagval[(unsigned int) *s] = FF_COMPOUNDFLAG;
Conf->usecompound = true;
}
- oldformat++;
- continue;
+ oldformat = true;
+ goto nextline;
}
}
if (STRNCMP(pstr, "suffixes") == 0)
{
- suffixes = 1;
- prefixes = 0;
- oldformat++;
- continue;
+ suffixes = true;
+ prefixes = false;
+ oldformat = true;
+ goto nextline;
}
if (STRNCMP(pstr, "prefixes") == 0)
{
- suffixes = 0;
- prefixes = 1;
- oldformat++;
- continue;
+ suffixes = false;
+ prefixes = true;
+ oldformat = true;
+ goto nextline;
}
if (STRNCMP(pstr, "flag") == 0)
{
@@ -802,14 +780,14 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
while (*s && t_isspace(s))
s++;
- oldformat++;
+ oldformat = true;
/* allow only single-encoded flags */
if (pg_mblen(s) != 1)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("multibyte flag character is not allowed at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
if (*s == '*')
{
@@ -830,10 +808,10 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("multibyte flag character is not allowed at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
flag = (unsigned char) *s;
- continue;
+ goto nextline;
}
if (STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 ||
STRNCMP(str, "PFX") == 0 || STRNCMP(str, "SFX") == 0)
@@ -842,23 +820,23 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("wrong affix file format for flag at line %d of affix file \"%s\"",
- line, filename)));
+ lineno, filename)));
FreeFile(affix);
NIImportOOAffixes(Conf, filename);
return;
}
if ((!suffixes) && (!prefixes))
- continue;
+ goto nextline;
- if (!parse_affentry(pstr, mask, find, repl, filename, line))
- continue;
+ if (!parse_affentry(pstr, mask, find, repl, filename, lineno))
+ goto nextline;
NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
- }
- FreeFile(affix);
- if (pstr)
+ nextline:
pfree(pstr);
+ }
+ FreeFile(affix);
}
static int
@@ -975,38 +953,55 @@ mkSPNode(IspellDict * Conf, int low, int high, int level)
return rs;
}
+/*
+ * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
+ * and affixes.
+ */
void
NISortDictionary(IspellDict * Conf)
{
- size_t i;
- int naffix = 3;
+ int i;
+ int naffix = 0;
+ int curaffix;
checkTmpCtx();
/* compress affixes */
+
+ /* Count the number of different flags used in the dictionary */
+
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);
- for (i = 1; i < Conf->nspell; i++)
- if (strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag))
+
+ naffix = 0;
+ for (i = 0; i < Conf->nspell; i++)
+ {
+ if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag, MAXFLAGLEN))
naffix++;
+ }
+ /*
+ * Fill in Conf->AffixData with the affixes that were used
+ * in the dictionary. Replace textual flag-field of Conf->Spell
+ * entries with indexes into Conf->AffixData array.
+ */
Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
- naffix = 1;
- Conf->AffixData[0] = pstrdup("");
- Conf->AffixData[1] = pstrdup(Conf->Spell[0]->p.flag);
- Conf->Spell[0]->p.d.affix = 1;
- Conf->Spell[0]->p.d.len = strlen(Conf->Spell[0]->word);
- for (i = 1; i < Conf->nspell; i++)
+
+ curaffix = -1;
+ for (i = 0; i < Conf->nspell; i++)
{
- if (strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[naffix]))
+ if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix], MAXFLAGLEN))
{
- naffix++;
- Conf->AffixData[naffix] = pstrdup(Conf->Spell[i]->p.flag);
+ curaffix++;
+ Assert(curaffix < naffix);
+ Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag);
}
- Conf->Spell[i]->p.d.affix = naffix;
+
+ Conf->Spell[i]->p.d.affix = curaffix;
Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
}
Conf->lenAffixData = Conf->nAffixData = naffix;
+
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
@@ -1085,7 +1080,7 @@ mkANode(IspellDict * Conf, int low, int high, int level, int type)
}
static void
-mkVoidAffix(IspellDict * Conf, int issuffix, int startsuffix)
+mkVoidAffix(IspellDict * Conf, bool issuffix, int startsuffix)
{
int i,
cnt = 0;
@@ -1145,7 +1140,7 @@ NISortAffixes(IspellDict * Conf)
AFFIX *Affix;
size_t i;
CMPDAffix *ptr;
- int firstsuffix = -1;
+ int firstsuffix = Conf->naffixes;
checkTmpCtx();
@@ -1160,7 +1155,7 @@ NISortAffixes(IspellDict * Conf)
for (i = 0; i < Conf->naffixes; i++)
{
Affix = &(((AFFIX *) Conf->Affix)[i]);
- if (Affix->type == FF_SUFFIX && firstsuffix < 0)
+ if (Affix->type == FF_SUFFIX && i < firstsuffix)
firstsuffix = i;
if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
@@ -1185,12 +1180,12 @@ NISortAffixes(IspellDict * Conf)
Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
- mkVoidAffix(Conf, 1, firstsuffix);
- mkVoidAffix(Conf, 0, firstsuffix);
+ mkVoidAffix(Conf, true, firstsuffix);
+ mkVoidAffix(Conf, false, firstsuffix);
}
static AffixNodeData *
-FinfAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type)
+FindAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type)
{
AffixNodeData *StopLow,
*StopHigh,
@@ -1374,7 +1369,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
plevel = 0;
while (pnode)
{
- prefix = FinfAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
+ prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
if (!prefix)
break;
for (j = 0; j < prefix->naff; j++)
@@ -1398,7 +1393,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
int baselen = 0;
/* find possible suffix */
- suffix = FinfAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
+ suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
if (!suffix)
break;
/* foreach suffix check affix */
@@ -1416,7 +1411,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
swrdlen = strlen(newword);
while (pnode)
{
- prefix = FinfAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
+ prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
if (!prefix)
break;
for (j = 0; j < prefix->naff; j++)
@@ -1626,7 +1621,7 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
if (wordlen == level + 1)
{
/* well, it was last word */
- var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);
+ var->stem[var->nstem] = pnstrdup(word + startpos, wordlen - startpos);
var->nstem++;
pfree(notprobed);
return var;
@@ -1641,7 +1636,7 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
/* we can find next word */
level++;
- var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos);
+ var->stem[var->nstem] = pnstrdup(word + startpos, level - startpos);
var->nstem++;
node = Conf->Dictionary;
startpos = level;
@@ -1656,7 +1651,7 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
level++;
}
- var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);
+ var->stem[var->nstem] = pnstrdup(word + startpos, wordlen - startpos);
var->nstem++;
pfree(notprobed);
return var;
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index cd1b60c8a4..dfc40f5da4 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -125,28 +125,47 @@ _t_isprint(const char *ptr)
}
#endif /* TS_USE_WIDE */
+
/*
- * Convert C-string from UTF8 to server encoding and
- * lower it
+ * Read the next line from a tsearch data file (expected to be in UTF-8), and
+ * convert it to database encoding if needed. The returned string is palloc'd.
+ * NULL return means EOF.
*/
char *
-recode_and_lowerstr(char *str)
+t_readline(FILE *fp)
{
- char *recoded;
- char *ret;
+ int len;
+ char *recoded;
+ char buf[4096]; /* lines must not be longer than this */
+
+ if (fgets(buf, sizeof(buf), fp) == NULL)
+ return NULL;
- recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
- PG_UTF8, GetDatabaseEncoding());
+ len = strlen(buf);
- if (recoded == NULL)
- elog(ERROR, "encoding conversion failed");
+ /* Make sure the input is valid UTF-8 */
+ (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
- ret = lowerstr(recoded);
+ /* And convert */
+ recoded = (char *) pg_do_encoding_conversion((unsigned char *) buf,
+ len,
+ PG_UTF8,
+ GetDatabaseEncoding());
- if (recoded != str)
- pfree(recoded);
+ if (recoded == NULL) /* should not happen */
+ elog(ERROR, "encoding conversion failed");
+
+ if (recoded == buf)
+ {
+ /*
+ * conversion didn't pstrdup, so we must.
+ * We can use the length of the original string, because
+ * no conversion was done.
+ */
+ recoded = pnstrdup(recoded, len);
+ }
- return ret;
+ return recoded;
}
char *
@@ -155,6 +174,9 @@ lowerstr(char *str)
return lowerstr_with_len(str, strlen(str));
}
+/*
+ * Returned string is palloc'd
+ */
char *
lowerstr_with_len(char *str, int len)
{
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c
index 96368eeef6..abc848fe0e 100644
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -308,7 +308,7 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
{
/*
* Dictionary normalizes lexemes, so we remove from stack all
- * used lexemes , return to basic mode and redo end of stack
+ * used lexemes, return to basic mode and redo end of stack
* (if it exists)
*/
if (res)
@@ -427,14 +427,14 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
* Headline framework
*/
static void
-hladdword(HeadlineText * prs, char *buf, int4 buflen, int type)
+hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type)
{
while (prs->curwords >= prs->lenwords)
{
prs->lenwords *= 2;
- prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord));
+ prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
}
- memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWord));
+ memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].type = (uint8) type;
prs->words[prs->curwords].len = buflen;
prs->words[prs->curwords].word = palloc(buflen);
@@ -443,16 +443,16 @@ hladdword(HeadlineText * prs, char *buf, int4 buflen, int type)
}
static void
-hlfinditem(HeadlineText * prs, TSQuery query, char *buf, int buflen)
+hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
{
int i;
QueryItem *item = GETQUERY(query);
- HeadlineWord *word;
+ HeadlineWordEntry *word;
while (prs->curwords + query->size >= prs->lenwords)
{
prs->lenwords *= 2;
- prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord));
+ prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
}
word = &(prs->words[prs->curwords - 1]);
@@ -462,7 +462,7 @@ hlfinditem(HeadlineText * prs, TSQuery query, char *buf, int buflen)
{
if (word->item)
{
- memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWord));
+ memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].item = item;
prs->words[prs->curwords].repeated = 1;
prs->curwords++;
@@ -475,7 +475,7 @@ hlfinditem(HeadlineText * prs, TSQuery query, char *buf, int buflen)
}
static void
-addHLParsedLex(HeadlineText * prs, TSQuery query, ParsedLex * lexs, TSLexeme * norms)
+addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexeme * norms)
{
ParsedLex *tmplexs;
TSLexeme *ptr;
@@ -511,7 +511,7 @@ addHLParsedLex(HeadlineText * prs, TSQuery query, ParsedLex * lexs, TSLexeme * n
}
void
-hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, char *buf, int4 buflen)
+hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen)
{
int type,
lenlemm;
@@ -571,12 +571,12 @@ hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, char *buf, int4 buflen
}
text *
-generatHeadline(HeadlineText * prs)
+generateHeadline(HeadlineParsedText * prs)
{
text *out;
int len = 128;
char *ptr;
- HeadlineWord *wrd = prs->words;
+ HeadlineWordEntry *wrd = prs->words;
out = (text *) palloc(len);
ptr = ((char *) out) + VARHDRSZ;
diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c
index 89f1d63fbc..b320faa0ef 100644
--- a/src/backend/tsearch/ts_utils.c
+++ b/src/backend/tsearch/ts_utils.c
@@ -63,21 +63,29 @@ get_tsearch_config_filename(const char *basename,
return result;
}
-#define STOPBUFLEN 4096
+static int
+comparestr(const void *a, const void *b)
+{
+ return strcmp(*(char **) a, *(char **) b);
+}
+/*
+ * Reads a stopword file. Each word is run through 'wordop'
+ * function, if given. wordop may either modify the input in-place,
+ * or palloc a new version.
+ */
void
-readstoplist(char *in, StopList * s)
+readstoplist(const char *fname, StopList *s, char *(*wordop) (char *))
{
char **stop = NULL;
s->len = 0;
- if (in && *in)
+ if (fname && *fname)
{
- char *filename = get_tsearch_config_filename(in, "stop");
+ char *filename = get_tsearch_config_filename(fname, "stop");
FILE *hin;
- char buf[STOPBUFLEN];
+ char *line;
int reallen = 0;
- int line = 0;
if ((hin = AllocateFile(filename, "r")) == NULL)
ereport(ERROR,
@@ -85,65 +93,56 @@ readstoplist(char *in, StopList * s)
errmsg("could not open stopword file \"%s\": %m",
filename)));
- while (fgets(buf, STOPBUFLEN, hin))
+ while ((line = t_readline(hin)) != NULL)
{
- char *pbuf = buf;
+ char *pbuf = line;
- line++;
- while (*pbuf && !isspace(*pbuf))
+ /* Trim trailing space */
+ while (*pbuf && !t_isspace(pbuf))
pbuf++;
*pbuf = '\0';
- if (*buf == '\0')
- continue;
-
- if (!pg_verifymbstr(buf, strlen(buf), true))
+ /* Skip empty lines */
+ if (*line == '\0')
{
- FreeFile(hin);
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("invalid multibyte encoding at line %d in file \"%s\"",
- line, filename)));
+ pfree(line);
+ continue;
}
if (s->len >= reallen)
{
if (reallen == 0)
{
- reallen = 16;
+ reallen = 64;
stop = (char **) palloc(sizeof(char *) * reallen);
}
else
{
reallen *= 2;
- stop = (char **) repalloc((void *) stop, sizeof(char *) * reallen);
+ stop = (char **) repalloc((void *) stop,
+ sizeof(char *) * reallen);
}
}
-
- if (s->wordop)
- stop[s->len] = s->wordop(buf);
+ if (wordop)
+ {
+ stop[s->len] = wordop(line);
+ if (stop[s->len] != line)
+ pfree(line);
+ }
else
- stop[s->len] = pstrdup(buf);
+ stop[s->len] = line;
(s->len)++;
}
+
FreeFile(hin);
pfree(filename);
}
s->stop = stop;
-}
-static int
-comparestr(const void *a, const void *b)
-{
- return strcmp(*(char **) a, *(char **) b);
-}
-
-void
-sortstoplist(StopList * s)
-{
+ /* Sort to allow binary searching */
if (s->stop && s->len > 0)
qsort(s->stop, s->len, sizeof(char *), comparestr);
}
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index aa82405484..d9bea57336 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -300,7 +300,7 @@ ts_headline_byid_opt(PG_FUNCTION_ARGS)
text *in = PG_GETARG_TEXT_P(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
- HeadlineText prs;
+ HeadlineParsedText prs;
List *prsoptions;
text *out;
TSConfigCacheEntry *cfg;
@@ -309,9 +309,9 @@ ts_headline_byid_opt(PG_FUNCTION_ARGS)
cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
prsobj = lookup_ts_parser_cache(cfg->prsId);
- memset(&prs, 0, sizeof(HeadlineText));
+ memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
- prs.words = (HeadlineWord *) palloc(sizeof(HeadlineWord) * prs.lenwords);
+ prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
hlparsetext(cfg->cfgId, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
@@ -325,7 +325,7 @@ ts_headline_byid_opt(PG_FUNCTION_ARGS)
PointerGetDatum(prsoptions),
PointerGetDatum(query));
- out = generatHeadline(&prs);
+ out = generateHeadline(&prs);
PG_FREE_IF_COPY(in, 1);
PG_FREE_IF_COPY(query, 2);
diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h
index d8a8c73839..ef15dd0466 100644
--- a/src/include/tsearch/dicts/spell.h
+++ b/src/include/tsearch/dicts/spell.h
@@ -18,12 +18,17 @@
#include "tsearch/dicts/regis.h"
#include "tsearch/ts_public.h"
+/*
+ * Max length of a flag name. Names longer than this will be truncated
+ * to the maximum.
+ */
+#define MAXFLAGLEN 16
+
struct SPNode;
typedef struct
{
- uint32
- val:8,
+ uint32 val:8,
isword:1,
compoundflag:4,
affix:19;
@@ -54,22 +59,25 @@ typedef struct spell_struct
{
union
{
- char flag[16];
+ /*
+ * flag is filled in by NIImportDictionary. After NISortDictionary,
+ * d is valid and flag is invalid.
+ */
+ char flag[MAXFLAGLEN];
struct
{
int affix;
int len;
} d;
} p;
- char word[1];
+ char word[1]; /* variable length, null-terminated */
} SPELL;
#define SPELLHDRSZ (offsetof(SPELL, word))
typedef struct aff_struct
{
- uint32
- flag:8,
+ uint32 flag:8,
type:1,
flagflags:7,
issimple:1,
@@ -85,11 +93,16 @@ typedef struct aff_struct
} AFFIX;
/*
- * affixes use deictinary flags too
+ * affixes use dictionary flags too
*/
#define FF_COMPOUNDPERMITFLAG 0x10
#define FF_COMPOUNDFORBIDFLAG 0x20
#define FF_CROSSPRODUCT 0x40
+
+/*
+ * Don't change the order of these. Initialization sorts by these,
+ * and expects prefixes to come first after sorting.
+ */
#define FF_SUFFIX 1
#define FF_PREFIX 0
@@ -97,8 +110,7 @@ struct AffixNode;
typedef struct
{
- uint32
- val:8,
+ uint32 val:8,
naff:24;
AFFIX **aff;
struct AffixNode *node;
@@ -126,9 +138,13 @@ typedef struct
int naffixes;
AFFIX *Affix;
- int nspell;
- int mspell;
+ /*
+ * Temporary array of all words in the dict file. Only used during
+ * initialization
+ */
SPELL **Spell;
+ int nspell; /* number of valid entries in Spell array */
+ int mspell; /* allocated length of Spell array */
AffixNode *Suffix;
AffixNode *Prefix;
diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
index 42a677ef96..cf84f702cd 100644
--- a/src/include/tsearch/ts_locale.h
+++ b/src/include/tsearch/ts_locale.h
@@ -38,11 +38,11 @@
#ifdef TS_USE_WIDE
-size_t char2wchar(wchar_t *to, const char *from, size_t len);
+extern size_t char2wchar(wchar_t *to, const char *from, size_t len);
#ifdef WIN32
-size_t wchar2char(char *to, const wchar_t *from, size_t len);
+extern size_t wchar2char(char *to, const wchar_t *from, size_t len);
#else /* WIN32 */
/* correct wcstombs */
@@ -81,8 +81,8 @@ extern int _t_isprint(const char *ptr);
#define COPYCHAR(d,s) TOUCHAR(d) = TOUCHAR(s)
#endif
-char *lowerstr(char *str);
-char *lowerstr_with_len(char *str, int len);
-char *recode_and_lowerstr(char *str);
+extern char *lowerstr(char *str);
+extern char *lowerstr_with_len(char *str, int len);
+extern char *t_readline(FILE *fp);
#endif /* __TSLOCALE_H__ */
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
index d8883ead0b..a69c4c9e53 100644
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -71,12 +71,11 @@ typedef struct
{
int len;
char **stop;
- char *(*wordop) (char *);
} StopList;
-extern void sortstoplist(StopList * s);
-extern void readstoplist(char *in, StopList * s);
-extern bool searchstoplist(StopList * s, char *key);
+extern void readstoplist(const char *fname, StopList *s,
+ char *(*wordop) (char *));
+extern bool searchstoplist(StopList *s, char *key);
/*
* Interface with dictionaries
@@ -102,9 +101,8 @@ typedef struct
#define TSL_ADDPOS 0x01
/*
- * Struct for supporting complex dictionaries like
- * thesaurus, pointer to is an 4-th argument for
- * dictlexize method
+ * Struct for supporting complex dictionaries like thesaurus.
+ * 4th argument for dictlexize method is a pointer to this
*/
typedef struct
{
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index 754148de1a..382cf4b35e 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -13,6 +13,7 @@
#define _PG_TS_UTILS_H_
#include "tsearch/ts_type.h"
+#include "tsearch/ts_public.h"
/*
* Common parse definitions for tsvector and tsquery
@@ -38,7 +39,8 @@ typedef struct
extern bool gettoken_tsvector(TSVectorParseState *state);
-struct ParseQueryNode;
+struct ParseQueryNode; /* private in backend/utils/adt/tsquery.c */
+
typedef struct
{
char *buffer; /* entire string we are scanning */
@@ -46,7 +48,7 @@ typedef struct
int4 state;
int4 count;
- /* reverse polish notation in list (for temprorary usage) */
+ /* reverse polish notation in list (for temporary usage) */
struct ParseQueryNode *str;
/* number in str */
@@ -102,36 +104,12 @@ extern void parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen);
* headline framework, flow in common to generate:
* 1 parse text with hlparsetext
* 2 parser-specific function to find part
- * 3 generatHeadline to generate result text
+ * 3 generateHeadline to generate result text
*/
-typedef struct
-{
- uint32 selected:1,
- in:1,
- replace:1,
- repeated:1,
- unused:4,
- type:8,
- len:16;
- char *word;
- QueryItem *item;
-} HeadlineWord;
-
-typedef struct
-{
- HeadlineWord *words;
- int4 lenwords;
- int4 curwords;
- char *startsel;
- char *stopsel;
- int2 startsellen;
- int2 stopsellen;
-} HeadlineText;
-
-extern void hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query,
+extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
char *buf, int4 buflen);
-extern text *generatHeadline(HeadlineText * prs);
+extern text *generateHeadline(HeadlineParsedText * prs);
/*
* token/node types for parsing