diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index 19f58511c8..fbadc25d18 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -3114,6 +3114,21 @@ CREATE TEXT SEARCH DICTIONARY english_stem (
+
+ Dictionaries in Shared Memory
+
+
+ Some dictionaries, especially Ispell, consumes
+ a significant amount of memory, in some cases tens of megabytes. Most of
+ them store the data in text files, and building the in-memory structure is
+ both CPU and time-consuming. Instead of doing this in each backend when
+ it needs a dictionary for the first time, the compiled dictionary may be
+ stored in dynamic shared memory so that it may be reused by other backends.
+ Currently only Ispell supports loading into
+ dynamic shared memory.
+
+
+
diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c
index 39f1e6faeb..ced52d2790 100644
--- a/src/backend/tsearch/dict_ispell.c
+++ b/src/backend/tsearch/dict_ispell.c
@@ -5,6 +5,15 @@
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
*
+ * By default all Ispell dictionaries are stored in DSM. But if the amount
+ * of memory exceeds max_shared_dictionaries_size, then the dictionary will be
+ * allocated in private backend memory (in dictCtx context).
+ *
+ * All necessary data are built within dispell_build() function. But
+ * structures for regular expressions are compiled on first demand and
+ * stored using AffixReg array. It is because regex_t and Regis cannot be
+ * stored in shared memory easily.
+ *
*
* IDENTIFICATION
* src/backend/tsearch/dict_ispell.c
@@ -14,8 +23,10 @@
#include "postgres.h"
#include "commands/defrem.h"
+#include "storage/dsm.h"
#include "tsearch/dicts/spell.h"
#include "tsearch/ts_locale.h"
+#include "tsearch/ts_shared.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
@@ -26,54 +37,126 @@ typedef struct
IspellDict obj;
} DictISpell;
+static void parse_dictoptions(List *dictoptions,
+ char **dictfile, char **afffile, char **stopfile);
+static void *dispell_build(List *dictoptions, Size *size);
+
Datum
dispell_init(PG_FUNCTION_ARGS)
{
DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0);
DictISpell *d;
- bool affloaded = false,
- dictloaded = false,
- stoploaded = false;
- ListCell *l;
+ void *dict_location;
+ char *stopfile;
d = (DictISpell *) palloc0(sizeof(DictISpell));
- NIStartBuild(&(d->obj));
+ parse_dictoptions(init_data->dict_options, NULL, NULL, &stopfile);
+
+ if (stopfile)
+ readstoplist(stopfile, &(d->stoplist), lowerstr);
+
+ dict_location = ts_dict_shmem_location(init_data, dispell_build);
+ Assert(dict_location);
+
+ d->obj.dict = (IspellDictData *) dict_location;
+ d->obj.reg = (AffixReg *) palloc0(d->obj.dict->nAffix *
+ sizeof(AffixReg));
+ /* Current memory context is dictionary's private memory context */
+ d->obj.dictCtx = CurrentMemoryContext;
+
+ PG_RETURN_POINTER(d);
+}
+
+Datum
+dispell_lexize(PG_FUNCTION_ARGS)
+{
+ DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
+ char *in = (char *) PG_GETARG_POINTER(1);
+ int32 len = PG_GETARG_INT32(2);
+ char *txt;
+ TSLexeme *res;
+ TSLexeme *ptr,
+ *cptr;
+
+ if (len <= 0)
+ PG_RETURN_POINTER(NULL);
+
+ txt = lowerstr_with_len(in, len);
+ res = NINormalizeWord(&(d->obj), txt);
+
+ if (res == NULL)
+ PG_RETURN_POINTER(NULL);
+
+ cptr = res;
+ for (ptr = cptr; ptr->lexeme; ptr++)
+ {
+ if (searchstoplist(&(d->stoplist), ptr->lexeme))
+ {
+ pfree(ptr->lexeme);
+ ptr->lexeme = NULL;
+ }
+ else
+ {
+ if (cptr != ptr)
+ memcpy(cptr, ptr, sizeof(TSLexeme));
+ cptr++;
+ }
+ }
+ cptr->lexeme = NULL;
+
+ PG_RETURN_POINTER(res);
+}
+
+static void
+parse_dictoptions(List *dictoptions, char **dictfile, char **afffile,
+ char **stopfile)
+{
+ ListCell *l;
- foreach(l, init_data->dict_options)
+ if (dictfile)
+ *dictfile = NULL;
+ if (afffile)
+ *afffile = NULL;
+ if (stopfile)
+ *stopfile = NULL;
+
+ foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
if (strcmp(defel->defname, "dictfile") == 0)
{
- if (dictloaded)
+ if (!dictfile)
+ continue;
+
+ if (*dictfile)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple DictFile parameters")));
- NIImportDictionary(&(d->obj),
- get_tsearch_config_filename(defGetString(defel),
- "dict"));
- dictloaded = true;
+ *dictfile = get_tsearch_config_filename(defGetString(defel), "dict");
}
else if (strcmp(defel->defname, "afffile") == 0)
{
- if (affloaded)
+ if (!afffile)
+ continue;
+
+ if (*afffile)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple AffFile parameters")));
- NIImportAffixes(&(d->obj),
- get_tsearch_config_filename(defGetString(defel),
- "affix"));
- affloaded = true;
+ *afffile = get_tsearch_config_filename(defGetString(defel), "affix");
}
else if (strcmp(defel->defname, "stopwords") == 0)
{
- if (stoploaded)
+ if (!stopfile)
+ continue;
+
+ if (*stopfile)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
- readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
- stoploaded = true;
+ *stopfile = defGetString(defel);
}
else
{
@@ -83,66 +166,52 @@ dispell_init(PG_FUNCTION_ARGS)
defel->defname)));
}
}
+}
- if (affloaded && dictloaded)
- {
- NISortDictionary(&(d->obj));
- NISortAffixes(&(d->obj));
- }
- else if (!affloaded)
+/*
+ * Build the dictionary.
+ *
+ * Result is palloc'ed.
+ */
+static void *
+dispell_build(List *dictoptions, Size *size)
+{
+ IspellDictBuild build;
+ char *dictfile,
+ *afffile;
+
+ parse_dictoptions(dictoptions, &dictfile, &afffile, NULL);
+
+ if (!afffile)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing AffFile parameter")));
}
- else
+ else if (!dictfile)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing DictFile parameter")));
}
- NIFinishBuild(&(d->obj));
+ MemSet(&build, 0, sizeof(build));
+ NIStartBuild(&build);
- PG_RETURN_POINTER(d);
-}
+ /* Read files */
+ NIImportDictionary(&build, dictfile);
+ NIImportAffixes(&build, afffile);
-Datum
-dispell_lexize(PG_FUNCTION_ARGS)
-{
- DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
- char *in = (char *) PG_GETARG_POINTER(1);
- int32 len = PG_GETARG_INT32(2);
- char *txt;
- TSLexeme *res;
- TSLexeme *ptr,
- *cptr;
+ /* Build persistent data to use by backends */
+ NISortDictionary(&build);
+ NISortAffixes(&build);
- if (len <= 0)
- PG_RETURN_POINTER(NULL);
+ NICopyData(&build);
- txt = lowerstr_with_len(in, len);
- res = NINormalizeWord(&(d->obj), txt);
-
- if (res == NULL)
- PG_RETURN_POINTER(NULL);
+ /* Release temporary data */
+ NIFinishBuild(&build);
- cptr = res;
- for (ptr = cptr; ptr->lexeme; ptr++)
- {
- if (searchstoplist(&(d->stoplist), ptr->lexeme))
- {
- pfree(ptr->lexeme);
- ptr->lexeme = NULL;
- }
- else
- {
- if (cptr != ptr)
- memcpy(cptr, ptr, sizeof(TSLexeme));
- cptr++;
- }
- }
- cptr->lexeme = NULL;
-
- PG_RETURN_POINTER(res);
+ /* Return the buffer and its size */
+ *size = build.dict_size;
+ return build.dict;
}
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
index 09297e384c..db4ef90228 100644
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -23,33 +23,35 @@
* Compilation of a dictionary
* ---------------------------
*
- * A compiled dictionary is stored in the IspellDict structure. Compilation of
- * a dictionary is divided into the several steps:
+ * A compiled dictionary is stored in the following structures:
+ * - IspellDictBuild - stores temporary data and IspellDictData
+ * - IspellDictData - stores permanent data used within NINormalizeWord()
+ * Compilation of the dictionary is divided into the several steps:
* - NIImportDictionary() - stores each word of a .dict file in the
* temporary Spell field.
- * - NIImportAffixes() - stores affix rules of an .affix file in the
- * Affix field (not temporary) if an .affix file has the Ispell format.
+ * - NIImportAffixes() - stores affix rules of an .affix file in the temporary
+ * Affix field if an .affix file has the Ispell format.
* -> NIImportOOAffixes() - stores affix rules if an .affix file has the
* Hunspell format. The AffixData field is initialized if AF parameter
* is defined.
* - NISortDictionary() - builds a prefix tree (Trie) from the words list
- * and stores it in the Dictionary field. The words list is got from the
+ * and stores it in the DictNodes field. The words list is got from the
* Spell field. The AffixData field is initialized if AF parameter is not
* defined.
* - NISortAffixes():
* - builds a list of compound affixes from the affix list and stores it
* in the CompoundAffix.
* - builds prefix trees (Trie) from the affix list for prefixes and suffixes
- * and stores them in Suffix and Prefix fields.
+ * and stores them in SuffixNodes and PrefixNodes fields.
* The affix list is got from the Affix field.
+ * Persistent data of the dictionary is copied within NICopyData().
*
* Memory management
* -----------------
*
- * The IspellDict structure has the Spell field which is used only in compile
- * time. The Spell field stores a words list. It can take a lot of memory.
- * Therefore when a dictionary is compiled this field is cleared by
- * NIFinishBuild().
+ * The IspellDictBuild structure has the temporary data which is used only in
+ * compile time. It can take a lot of memory. Therefore after compiling the
+ * dictionary this data is cleared by NIFinishBuild().
*
* All resources which should cleared by NIFinishBuild() is initialized using
* tmpalloc() and tmpalloc0().
@@ -73,112 +75,147 @@
* after the initialization is done. During initialization,
* CurrentMemoryContext is the long-lived memory context associated
* with the dictionary cache entry. We keep the short-lived stuff
- * in the Conf->buildCxt context.
+ * in the ConfBuild->buildCxt context.
*/
-#define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz))
-#define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz))
+#define tmpalloc(sz) MemoryContextAlloc(ConfBuild->buildCxt, (sz))
+#define tmpalloc0(sz) MemoryContextAllocZero(ConfBuild->buildCxt, (sz))
-#define tmpstrdup(str) MemoryContextStrdup(Conf->buildCxt, (str))
+#define tmpstrdup(str) MemoryContextStrdup(ConfBuild->buildCxt, (str))
/*
* Prepare for constructing an ISpell dictionary.
*
- * The IspellDict struct is assumed to be zeroed when allocated.
+ * The IspellDictBuild struct is assumed to be zeroed when allocated.
*/
void
-NIStartBuild(IspellDict *Conf)
+NIStartBuild(IspellDictBuild *ConfBuild)
{
+ uint32 dict_size;
+
/*
* The temp context is a child of CurTransactionContext, so that it will
* go away automatically on error.
*/
- Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
- "Ispell dictionary init context",
- ALLOCSET_DEFAULT_SIZES);
+ ConfBuild->buildCxt = AllocSetContextCreate(CurTransactionContext,
+ "Ispell dictionary init context",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * Allocate buffer for the dictionary in current context not in buildCxt.
+ */
+ dict_size = MAXALIGN(IspellDictDataHdrSize);
+ ConfBuild->dict = palloc0(dict_size);
+ ConfBuild->dict_size = dict_size;
}
/*
- * Clean up when dictionary construction is complete.
+ * Copy compiled and persistent data into IspellDictData.
*/
void
-NIFinishBuild(IspellDict *Conf)
+NICopyData(IspellDictBuild *ConfBuild)
{
- /* Release no-longer-needed temp memory */
- MemoryContextDelete(Conf->buildCxt);
- /* Just for cleanliness, zero the now-dangling pointers */
- Conf->buildCxt = NULL;
- Conf->Spell = NULL;
- Conf->firstfree = NULL;
- Conf->CompoundAffixFlags = NULL;
-}
+ IspellDictData *dict;
+ uint32 size;
+ int i;
+ uint32 *offsets,
+ offset;
+ /*
+ * Calculate necessary space
+ */
+ size = ConfBuild->nAffixData * sizeof(uint32);
+ size += ConfBuild->AffixDataEnd;
-/*
- * "Compact" palloc: allocate without extra palloc overhead.
- *
- * Since we have no need to free the ispell data items individually, there's
- * not much value in the per-chunk overhead normally consumed by palloc.
- * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
- *
- * We currently pre-zero all data allocated this way, even though some of it
- * doesn't need that. The cpalloc and cpalloc0 macros are just documentation
- * to indicate which allocations actually require zeroing.
- */
-#define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */
-#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
+ size += ConfBuild->nAffix * sizeof(uint32);
+ size += ConfBuild->AffixSize;
-static void *
-compact_palloc0(IspellDict *Conf, size_t size)
-{
- void *result;
+ size += ConfBuild->DictNodes.NodesEnd;
+ size += ConfBuild->PrefixNodes.NodesEnd;
+ size += ConfBuild->SuffixNodes.NodesEnd;
- /* Should only be called during init */
- Assert(Conf->buildCxt != NULL);
+ size += sizeof(CMPDAffix) * ConfBuild->nCompoundAffix;
- /* No point in this for large chunks */
- if (size > COMPACT_MAX_REQ)
- return palloc0(size);
+ /*
+ * Copy data itself
+ */
+ ConfBuild->dict_size = IspellDictDataHdrSize + size;
+ ConfBuild->dict = repalloc(ConfBuild->dict, ConfBuild->dict_size);
+
+ dict = ConfBuild->dict;
+
+ /* AffixData */
+ dict->nAffixData = ConfBuild->nAffixData;
+ dict->AffixDataStart = sizeof(uint32) * ConfBuild->nAffixData;
+ memcpy(DictAffixDataOffset(dict), ConfBuild->AffixDataOffset,
+ sizeof(uint32) * ConfBuild->nAffixData);
+ memcpy(DictAffixData(dict), ConfBuild->AffixData, ConfBuild->AffixDataEnd);
+
+ /* Affix array */
+ dict->nAffix = ConfBuild->nAffix;
+ dict->AffixOffsetStart = dict->AffixDataStart + ConfBuild->AffixDataEnd;
+ dict->AffixStart = dict->AffixOffsetStart + sizeof(uint32) * ConfBuild->nAffix;
+ offsets = (uint32 *) DictAffixOffset(dict);
+ offset = 0;
+ for (i = 0; i < ConfBuild->nAffix; i++)
+ {
+ AFFIX *affix;
+ uint32 size = AffixGetSize(ConfBuild->Affix[i]);
- /* Keep everything maxaligned */
- size = MAXALIGN(size);
+ offsets[i] = offset;
+ affix = (AFFIX *) DictAffixGet(dict, i);
+ Assert(affix);
- /* Need more space? */
- if (size > Conf->avail)
- {
- Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
- Conf->avail = COMPACT_ALLOC_CHUNK;
- }
+ memcpy(affix, ConfBuild->Affix[i], size);
- result = (void *) Conf->firstfree;
- Conf->firstfree += size;
- Conf->avail -= size;
+ offset += size;
+ }
- return result;
+ /* DictNodes prefix tree */
+ dict->DictNodesStart = dict->AffixStart + offset;
+ memcpy(DictDictNodes(dict), ConfBuild->DictNodes.Nodes,
+ ConfBuild->DictNodes.NodesEnd);
+
+ /* PrefixNodes prefix tree */
+ dict->PrefixNodesStart = dict->DictNodesStart + ConfBuild->DictNodes.NodesEnd;
+ memcpy(DictPrefixNodes(dict), ConfBuild->PrefixNodes.Nodes,
+ ConfBuild->PrefixNodes.NodesEnd);
+
+ /* SuffixNodes prefix tree */
+ dict->SuffixNodesStart = dict->PrefixNodesStart + ConfBuild->PrefixNodes.NodesEnd;
+ memcpy(DictSuffixNodes(dict), ConfBuild->SuffixNodes.Nodes,
+ ConfBuild->SuffixNodes.NodesEnd);
+
+ /* CompoundAffix array */
+ dict->CompoundAffixStart = dict->SuffixNodesStart +
+ ConfBuild->SuffixNodes.NodesEnd;
+ memcpy(DictCompoundAffix(dict), ConfBuild->CompoundAffix,
+ sizeof(CMPDAffix) * ConfBuild->nCompoundAffix);
}
-#define cpalloc(size) compact_palloc0(Conf, size)
-#define cpalloc0(size) compact_palloc0(Conf, size)
-
-static char *
-cpstrdup(IspellDict *Conf, const char *str)
+/*
+ * Clean up when dictionary construction is complete.
+ */
+void
+NIFinishBuild(IspellDictBuild *ConfBuild)
{
- char *res = cpalloc(strlen(str) + 1);
-
- strcpy(res, str);
- return res;
+ /* Release no-longer-needed temp memory */
+ MemoryContextDelete(ConfBuild->buildCxt);
+ /* Just for cleanliness, zero the now-dangling pointers */
+ ConfBuild->buildCxt = NULL;
+ ConfBuild->Spell = NULL;
+ ConfBuild->CompoundAffixFlags = NULL;
}
-
/*
* Apply lowerstr(), producing a temporary result (in the buildCxt).
*/
static char *
-lowerstr_ctx(IspellDict *Conf, const char *src)
+lowerstr_ctx(IspellDictBuild *ConfBuild, const char *src)
{
MemoryContext saveCtx;
char *dst;
- saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
+ saveCtx = MemoryContextSwitchTo(ConfBuild->buildCxt);
dst = lowerstr(src);
MemoryContextSwitchTo(saveCtx);
@@ -190,7 +227,7 @@ lowerstr_ctx(IspellDict *Conf, const char *src)
#define STRNCMP(s,p) strncmp( (s), (p), strlen(p) )
#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
-#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
+#define GETCHAR(A,N,T) GETWCHAR( AffixFieldRepl(A), (A)->replen, N, T )
static char *VoidString = "";
@@ -311,18 +348,189 @@ strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
static int
cmpaffix(const void *s1, const void *s2)
{
- const AFFIX *a1 = (const AFFIX *) s1;
- const AFFIX *a2 = (const AFFIX *) s2;
+ const AFFIX *a1 = *((AFFIX *const *) s1);
+ const AFFIX *a2 = *((AFFIX *const *) s2);
if (a1->type < a2->type)
return -1;
if (a1->type > a2->type)
return 1;
if (a1->type == FF_PREFIX)
- return strcmp(a1->repl, a2->repl);
+ return strcmp(AffixFieldRepl(a1), AffixFieldRepl(a2));
else
- return strbcmp((const unsigned char *) a1->repl,
- (const unsigned char *) a2->repl);
+ return strbcmp((const unsigned char *) AffixFieldRepl(a1),
+ (const unsigned char *) AffixFieldRepl(a2));
+}
+
+/*
+ * Allocate space for AffixData.
+ */
+static void
+InitAffixData(IspellDictBuild *ConfBuild, int numAffixData)
+{
+ uint32 size;
+
+ size = 8 * 1024 /* Reserve 8KB for data */;
+
+ ConfBuild->AffixData = (char *) tmpalloc(size);
+ ConfBuild->AffixDataSize = size;
+ ConfBuild->AffixDataOffset = (uint32 *) tmpalloc(numAffixData * sizeof(uint32));
+ ConfBuild->nAffixData = 0;
+ ConfBuild->mAffixData= numAffixData;
+
+ /* Save offset of the end of data */
+ ConfBuild->AffixDataEnd = 0;
+}
+
+/*
+ * Add affix set of affix flags into IspellDict struct. If IspellDict doesn't
+ * fit new affix set then resize it.
+ *
+ * ConfBuild: building structure for the current dictionary.
+ * AffixSet: set of affix flags.
+ */
+static void
+AddAffixSet(IspellDictBuild *ConfBuild, const char *AffixSet,
+ uint32 AffixSetLen)
+{
+ /*
+ * Check available space for AffixSet.
+ */
+ if (ConfBuild->AffixDataEnd + AffixSetLen + 1 /* \0 */ >=
+ ConfBuild->AffixDataSize)
+ {
+ uint32 newsize = Max(ConfBuild->AffixDataSize + 8 * 1024 /* 8KB */,
+ ConfBuild->AffixDataSize + AffixSetLen + 1);
+
+ ConfBuild->AffixData = (char *) repalloc(ConfBuild->AffixData, newsize);
+ ConfBuild->AffixDataSize = newsize;
+ }
+
+ /* Check available number of offsets */
+ if (ConfBuild->nAffixData >= ConfBuild->mAffixData)
+ {
+ ConfBuild->mAffixData *= 2;
+ ConfBuild->AffixDataOffset = (uint32 *) repalloc(ConfBuild->AffixDataOffset,
+ sizeof(uint32) * ConfBuild->mAffixData);
+ }
+
+ ConfBuild->AffixDataOffset[ConfBuild->nAffixData] = ConfBuild->AffixDataEnd;
+ StrNCpy(AffixDataGet(ConfBuild, ConfBuild->nAffixData),
+ AffixSet, AffixSetLen + 1);
+
+ /* Save offset of the end of data */
+ ConfBuild->AffixDataEnd += AffixSetLen + 1;
+ ConfBuild->nAffixData++;
+}
+
+/*
+ * Allocate space for prefix tree node.
+ *
+ * ConfBuild: building structure for the current dictionary.
+ * array: NodeArray where to allocate new node.
+ * length: number of allocated NodeData.
+ * sizeNodeData: minimum size of each NodeData.
+ * sizeNodeHeader: size of header of new node.
+ *
+ * Returns an offset of new node in NodeArray->Nodes.
+ */
+static uint32
+AllocateNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length,
+ uint32 sizeNodeData, uint32 sizeNodeHeader)
+{
+ uint32 node_offset;
+ uint32 size;
+
+ size = sizeNodeHeader + length * sizeNodeData;
+ size = MAXALIGN(size);
+
+ if (array->NodesSize == 0)
+ {
+ array->NodesSize = size * 32; /* Reserve space for next levels of the
+ * prefix tree */
+ array->Nodes = (char *) tmpalloc(array->NodesSize);
+ array->NodesEnd = 0;
+ }
+ else if (array->NodesEnd + size >= array->NodesSize)
+ {
+ array->NodesSize = Max(array->NodesSize * 2, array->NodesSize + size);
+ array->Nodes = (char *) repalloc(array->Nodes, array->NodesSize);
+ }
+
+ node_offset = array->NodesEnd;
+ array->NodesEnd += size;
+
+ return node_offset;
+}
+
+/*
+ * Allocate space for SPNode.
+ *
+ * Returns an offset of new node in ConfBuild->DictNodes->Nodes.
+ */
+static uint32
+AllocateSPNode(IspellDictBuild *ConfBuild, uint32 length)
+{
+ uint32 offset;
+ SPNode *node;
+ SPNodeData *data;
+ uint32 i;
+
+ offset = AllocateNode(ConfBuild, &ConfBuild->DictNodes, length,
+ sizeof(SPNodeData), SPNHDRSZ);
+ node = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, offset);
+ node->length = length;
+
+ /*
+ * Initialize all SPNodeData with default values. We cannot use memset()
+ * here because not all fields have 0 as default value.
+ */
+ for (i = 0; i < length; i++)
+ {
+ data = &(node->data[i]);
+ data->val = 0;
+ data->affix = ISPELL_INVALID_INDEX;
+ data->compoundflag = 0;
+ data->isword = 0;
+ data->node_offset = ISPELL_INVALID_OFFSET;
+ }
+
+ return offset;
+}
+
+/*
+ * Allocate space for AffixNode.
+ *
+ * Returns an offset of new node in NodeArray->Nodes.
+ */
+static uint32
+AllocateAffixNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length)
+{
+ uint32 offset;
+ AffixNode *node;
+ AffixNodeData *data;
+ uint32 i;
+
+ offset = AllocateNode(ConfBuild, array, length, sizeof(AffixNodeData),
+ ANHRDSZ);
+ node = (AffixNode *) NodeArrayGet(array, offset);
+ node->length = length;
+ node->isvoid = 0;
+
+ /*
+ * Initialize all AffixNodeData with default values. We cannot use memset()
+ * here because not all fields have 0 as default value.
+ */
+ for (i = 0; i < length; i++)
+ {
+ data = &(node->data[i]);
+ data->val = 0;
+ data->affstart = ISPELL_INVALID_INDEX;
+ data->affend = ISPELL_INVALID_INDEX;
+ data->node_offset = ISPELL_INVALID_OFFSET;
+ }
+
+ return offset;
}
/*
@@ -333,7 +541,7 @@ cmpaffix(const void *s1, const void *s2)
* - 2 characters (FM_LONG). A character may be Unicode.
* - numbers from 1 to 65000 (FM_NUM).
*
- * Depending on the flagMode an affix string can have the following format:
+ * Depending on the flagmode an affix string can have the following format:
* - FM_CHAR: ABCD
* Here we have 4 flags: A, B, C and D
* - FM_LONG: ABCDE*
@@ -341,13 +549,13 @@ cmpaffix(const void *s1, const void *s2)
* - FM_NUM: 200,205,50
* Here we have 3 flags: 200, 205 and 50
*
- * Conf: current dictionary.
+ * flagmode: flag mode of the dictionary
* sflagset: the set of affix flags. Returns a reference to the start of a next
* affix flag.
* sflag: returns an affix flag from sflagset.
*/
static void
-getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
+getNextFlagFromString(FlagMode flagmode, char **sflagset, char *sflag)
{
int32 s;
char *next,
@@ -356,11 +564,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
bool stop = false;
bool met_comma = false;
- maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
+ maxstep = (flagmode == FM_LONG) ? 2 : 1;
while (**sflagset)
{
- switch (Conf->flagMode)
+ switch (flagmode)
{
case FM_LONG:
case FM_CHAR:
@@ -422,15 +630,15 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
stop = true;
break;
default:
- elog(ERROR, "unrecognized type of Conf->flagMode: %d",
- Conf->flagMode);
+ elog(ERROR, "unrecognized type of flagmode: %d",
+ flagmode);
}
if (stop)
break;
}
- if (Conf->flagMode == FM_LONG && maxstep > 0)
+ if (flagmode == FM_LONG && maxstep > 0)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix flag \"%s\" with \"long\" flag value",
@@ -440,31 +648,28 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
}
/*
- * Checks if the affix set Conf->AffixData[affix] contains affixflag.
- * Conf->AffixData[affix] does not contain affixflag if this flag is not used
- * actually by the .dict file.
+ * Checks if the affix set from AffixData contains affixflag. Affix set does
+ * not contain affixflag if this flag is not used actually by the .dict file.
*
- * Conf: current dictionary.
- * affix: index of the Conf->AffixData array.
+ * flagmode: flag mode of the dictionary.
+ * sflagset: the set of affix flags.
* affixflag: the affix flag.
*
- * Returns true if the string Conf->AffixData[affix] contains affixflag,
- * otherwise returns false.
+ * Returns true if the affix set string contains affixflag, otherwise returns
+ * false.
*/
static bool
-IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
+IsAffixFlagInUse(FlagMode flagmode, char *sflagset, const char *affixflag)
{
- char *flagcur;
+ char *flagcur = sflagset;
char flag[BUFSIZ];
if (*affixflag == 0)
return true;
- flagcur = Conf->AffixData[affix];
-
while (*flagcur)
{
- getNextFlagFromString(Conf, &flagcur, flag);
+ getNextFlagFromString(flagmode, &flagcur, flag);
/* Compare first affix flag in flagcur with affixflag */
if (strcmp(flag, affixflag) == 0)
return true;
@@ -477,31 +682,33 @@ IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
/*
* Adds the new word into the temporary array Spell.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* word: new word.
* flag: set of affix flags. Single flag can be get by getNextFlagFromString().
*/
static void
-NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
+NIAddSpell(IspellDictBuild *ConfBuild, const char *word, const char *flag)
{
- if (Conf->nspell >= Conf->mspell)
+ if (ConfBuild->nSpell >= ConfBuild->mSpell)
{
- if (Conf->mspell)
+ if (ConfBuild->mSpell)
{
- Conf->mspell *= 2;
- Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
+ ConfBuild->mSpell *= 2;
+ ConfBuild->Spell = (SPELL **) repalloc(ConfBuild->Spell,
+ ConfBuild->mSpell * sizeof(SPELL *));
}
else
{
- Conf->mspell = 1024 * 20;
- Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
+ ConfBuild->mSpell = 1024 * 20;
+ ConfBuild->Spell = (SPELL **) tmpalloc(ConfBuild->mSpell * sizeof(SPELL *));
}
}
- Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
- strcpy(Conf->Spell[Conf->nspell]->word, word);
- Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
+ ConfBuild->Spell[ConfBuild->nSpell] =
+ (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
+ strcpy(ConfBuild->Spell[ConfBuild->nSpell]->word, word);
+ ConfBuild->Spell[ConfBuild->nSpell]->p.flag = (*flag != '\0')
? tmpstrdup(flag) : VoidString;
- Conf->nspell++;
+ ConfBuild->nSpell++;
}
/*
@@ -509,11 +716,11 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
*
* Note caller must already have applied get_tsearch_config_filename.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* filename: path to the .dict file.
*/
void
-NIImportDictionary(IspellDict *Conf, const char *filename)
+NIImportDictionary(IspellDictBuild *ConfBuild, const char *filename)
{
tsearch_readline_state trst;
char *line;
@@ -564,9 +771,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
}
s += pg_mblen(s);
}
- pstr = lowerstr_ctx(Conf, line);
+ pstr = lowerstr_ctx(ConfBuild, line);
- NIAddSpell(Conf, pstr, flag);
+ NIAddSpell(ConfBuild, pstr, flag);
pfree(pstr);
pfree(line);
@@ -590,7 +797,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
* SFX M 0 's .
* is presented here.
*
- * Conf: current dictionary.
+ * dict: current dictionary.
* word: basic form of word.
* affixflag: affix flag, by which a basic form of word was generated.
* flag: compound flag used to compare with StopMiddle->compoundflag.
@@ -598,9 +805,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
* Returns 1 if the word was found in the prefix tree, else returns 0.
*/
static int
-FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
+FindWord(IspellDictData *dict, const char *word, const char *affixflag, int flag)
{
- SPNode *node = Conf->Dictionary;
+ SPNode *node = (SPNode *) DictDictNodes(dict);
SPNodeData *StopLow,
*StopHigh,
*StopMiddle;
@@ -636,10 +843,14 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
* Check if this affix rule is presented in the affix set
* with index StopMiddle->affix.
*/
- if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
+ if (IsAffixFlagInUse(dict->flagMode,
+ DictAffixDataGet(dict, StopMiddle->affix),
+ affixflag))
return 1;
}
- node = StopMiddle->node;
+ /* Retreive SPNode by the offset */
+ node = (SPNode *) DictNodeGet(DictDictNodes(dict),
+ StopMiddle->node_offset);
ptr++;
break;
}
@@ -657,7 +868,8 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
/*
* Adds a new affix rule to the Affix field.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary, is used to allocate
+ * temporary data.
* flag: affix flag ('\' in the below example).
* flagflags: set of flags from the flagval field for this affix rule. This set
* is listed after '/' character in the added string (repl).
@@ -673,26 +885,54 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
* type: FF_SUFFIX or FF_PREFIX.
*/
static void
-NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
- const char *find, const char *repl, int type)
+NIAddAffix(IspellDictBuild *ConfBuild, const char *flag, char flagflags,
+ const char *mask, const char *find, const char *repl, int type)
{
AFFIX *Affix;
+ uint32 size;
+ uint32 flaglen = strlen(flag),
+ findlen = strlen(find),
+ repllen = strlen(repl),
+ masklen = strlen(mask);
+
+ /* Sanity checks */
+ if (flaglen > AF_FLAG_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("affix flag \"%s\" too long", flag)));
+ if (findlen > AF_REPL_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("affix find field \"%s\" too long", find)));
+ if (repllen > AF_REPL_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("affix repl field \"%s\" too long", repl)));
+ if (masklen > AF_REPL_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("affix mask field \"%s\" too long", repl)));
- if (Conf->naffixes >= Conf->maffixes)
+ if (ConfBuild->nAffix >= ConfBuild->mAffix)
{
- if (Conf->maffixes)
+ if (ConfBuild->mAffix)
{
- Conf->maffixes *= 2;
- Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
+ ConfBuild->mAffix *= 2;
+ ConfBuild->Affix = (AFFIX **) repalloc(ConfBuild->Affix,
+ ConfBuild->mAffix * sizeof(AFFIX *));
}
else
{
- Conf->maffixes = 16;
- Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
+ ConfBuild->mAffix = 255;
+ ConfBuild->Affix = (AFFIX **) tmpalloc(ConfBuild->mAffix * sizeof(AFFIX *));
}
}
- Affix = Conf->Affix + Conf->naffixes;
+ size = AFFIXHDRSZ + flaglen + 1 /* \0 */ + findlen + 1 /* \0 */ +
+ repllen + 1 /* \0 */ + masklen + 1 /* \0 */;
+
+ Affix = (AFFIX *) tmpalloc(size);
+ ConfBuild->Affix[ConfBuild->nAffix] = Affix;
/* This affix rule can be applied for words with any ending */
if (strcmp(mask, ".") == 0 || *mask == '\0')
@@ -705,42 +945,12 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
{
Affix->issimple = 0;
Affix->isregis = 1;
- RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
- *mask ? mask : VoidString);
}
/* This affix rule will use regex_t to search word ending */
else
{
- int masklen;
- int wmasklen;
- int err;
- pg_wchar *wmask;
- char *tmask;
-
Affix->issimple = 0;
Affix->isregis = 0;
- tmask = (char *) tmpalloc(strlen(mask) + 3);
- if (type == FF_SUFFIX)
- sprintf(tmask, "%s$", mask);
- else
- sprintf(tmask, "^%s", mask);
-
- masklen = strlen(tmask);
- wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
- wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
-
- err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
- REG_ADVANCED | REG_NOSUB,
- DEFAULT_COLLATION_OID);
- if (err)
- {
- char errstr[100];
-
- pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
- errmsg("invalid regular expression: %s", errstr)));
- }
}
Affix->flagflags = flagflags;
@@ -749,15 +959,22 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
Affix->flagflags |= FF_COMPOUNDFLAG;
}
- Affix->flag = cpstrdup(Conf, flag);
+
Affix->type = type;
- Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
- if ((Affix->replen = strlen(repl)) > 0)
- Affix->repl = cpstrdup(Conf, repl);
- else
- Affix->repl = VoidString;
- Conf->naffixes++;
+ Affix->replen = repllen;
+ StrNCpy(AffixFieldRepl(Affix), repl, repllen + 1);
+
+ Affix->findlen = findlen;
+ StrNCpy(AffixFieldFind(Affix), find, findlen + 1);
+
+ Affix->masklen = masklen;
+ StrNCpy(AffixFieldMask(Affix), mask, masklen + 1);
+
+ StrNCpy(AffixFieldFlag(Affix), flag, flaglen + 1);
+
+ ConfBuild->nAffix++;
+ ConfBuild->AffixSize += size;
}
/* Parsing states for parse_affentry() and friends */
@@ -1021,10 +1238,10 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
* Sets a Hunspell options depending on flag type.
*/
static void
-setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
+setCompoundAffixFlagValue(IspellDictBuild *ConfBuild, CompoundAffixFlag *entry,
char *s, uint32 val)
{
- if (Conf->flagMode == FM_NUM)
+ if (ConfBuild->dict->flagMode == FM_NUM)
{
char *next;
int i;
@@ -1044,19 +1261,19 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
else
entry->flag.s = tmpstrdup(s);
- entry->flagMode = Conf->flagMode;
+ entry->flagMode = ConfBuild->dict->flagMode;
entry->value = val;
}
/*
* Sets up a correspondence for the affix parameter with the affix flag.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* s: affix flag in string.
* val: affix parameter.
*/
static void
-addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
+addCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s, uint32 val)
{
CompoundAffixFlag *newValue;
char sbuf[BUFSIZ];
@@ -1083,29 +1300,29 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
*sflag = '\0';
/* Resize array or allocate memory for array CompoundAffixFlag */
- if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
+ if (ConfBuild->nCompoundAffixFlag >= ConfBuild->mCompoundAffixFlag)
{
- if (Conf->mCompoundAffixFlag)
+ if (ConfBuild->mCompoundAffixFlag)
{
- Conf->mCompoundAffixFlag *= 2;
- Conf->CompoundAffixFlags = (CompoundAffixFlag *)
- repalloc((void *) Conf->CompoundAffixFlags,
- Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
+ ConfBuild->mCompoundAffixFlag *= 2;
+ ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *)
+ repalloc((void *) ConfBuild->CompoundAffixFlags,
+ ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
}
else
{
- Conf->mCompoundAffixFlag = 10;
- Conf->CompoundAffixFlags = (CompoundAffixFlag *)
- tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
+ ConfBuild->mCompoundAffixFlag = 10;
+ ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *)
+ tmpalloc(ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
}
}
- newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
+ newValue = ConfBuild->CompoundAffixFlags + ConfBuild->nCompoundAffixFlag;
- setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
+ setCompoundAffixFlagValue(ConfBuild, newValue, sbuf, val);
- Conf->usecompound = true;
- Conf->nCompoundAffixFlag++;
+ ConfBuild->dict->usecompound = true;
+ ConfBuild->nCompoundAffixFlag++;
}
/*
@@ -1113,7 +1330,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
* flags s.
*/
static int
-getCompoundAffixFlagValue(IspellDict *Conf, char *s)
+getCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s)
{
uint32 flag = 0;
CompoundAffixFlag *found,
@@ -1121,18 +1338,18 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s)
char sflag[BUFSIZ];
char *flagcur;
- if (Conf->nCompoundAffixFlag == 0)
+ if (ConfBuild->nCompoundAffixFlag == 0)
return 0;
flagcur = s;
while (*flagcur)
{
- getNextFlagFromString(Conf, &flagcur, sflag);
- setCompoundAffixFlagValue(Conf, &key, sflag, 0);
+ getNextFlagFromString(ConfBuild->dict->flagMode, &flagcur, sflag);
+ setCompoundAffixFlagValue(ConfBuild, &key, sflag, 0);
found = (CompoundAffixFlag *)
- bsearch(&key, (void *) Conf->CompoundAffixFlags,
- Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
+ bsearch(&key, (void *) ConfBuild->CompoundAffixFlags,
+ ConfBuild->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
cmpcmdflag);
if (found != NULL)
flag |= found->value;
@@ -1144,14 +1361,13 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s)
/*
* Returns a flag set using the s parameter.
*
- * If Conf->useFlagAliases is true then the s parameter is index of the
- * Conf->AffixData array and function returns its entry.
- * Else function returns the s parameter.
+ * If useFlagAliases is true then the s parameter is index of the AffixData
+ * array and function returns its entry. Else function returns the s parameter.
*/
static char *
-getAffixFlagSet(IspellDict *Conf, char *s)
+getAffixFlagSet(IspellDictBuild *ConfBuild, char *s)
{
- if (Conf->useFlagAliases && *s != '\0')
+ if (ConfBuild->dict->useFlagAliases && *s != '\0')
{
int curaffix;
char *end;
@@ -1162,13 +1378,13 @@ getAffixFlagSet(IspellDict *Conf, char *s)
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"", s)));
- if (curaffix > 0 && curaffix <= Conf->nAffixData)
+ if (curaffix > 0 && curaffix <= ConfBuild->nAffixData)
/*
* Do not subtract 1 from curaffix because empty string was added
* in NIImportOOAffixes
*/
- return Conf->AffixData[curaffix];
+ return AffixDataGet(ConfBuild, curaffix);
else
return VoidString;
}
@@ -1179,11 +1395,11 @@ getAffixFlagSet(IspellDict *Conf, char *s)
/*
* Import an affix file that follows MySpell or Hunspell format.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* filename: path to the .affix file.
*/
static void
-NIImportOOAffixes(IspellDict *Conf, const char *filename)
+NIImportOOAffixes(IspellDictBuild *ConfBuild, const char *filename)
{
char type[BUFSIZ],
*ptype = NULL;
@@ -1203,9 +1419,9 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
char *recoded;
/* read file to find any flag */
- Conf->usecompound = false;
- Conf->useFlagAliases = false;
- Conf->flagMode = FM_CHAR;
+ ConfBuild->dict->usecompound = false;
+ ConfBuild->dict->useFlagAliases = false;
+ ConfBuild->dict->flagMode = FM_CHAR;
if (!tsearch_readline_begin(&trst, filename))
ereport(ERROR,
@@ -1222,30 +1438,36 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
}
if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
- addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
+ addCompoundAffixFlagValue(ConfBuild,
+ recoded + strlen("COMPOUNDFLAG"),
FF_COMPOUNDFLAG);
else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
- addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
+ addCompoundAffixFlagValue(ConfBuild,
+ recoded + strlen("COMPOUNDBEGIN"),
FF_COMPOUNDBEGIN);
else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
- addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
+ addCompoundAffixFlagValue(ConfBuild,
+ recoded + strlen("COMPOUNDLAST"),
FF_COMPOUNDLAST);
/* COMPOUNDLAST and COMPOUNDEND are synonyms */
else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
- addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
+ addCompoundAffixFlagValue(ConfBuild,
+ recoded + strlen("COMPOUNDEND"),
FF_COMPOUNDLAST);
else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
- addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
+ addCompoundAffixFlagValue(ConfBuild,
+ recoded + strlen("COMPOUNDMIDDLE"),
FF_COMPOUNDMIDDLE);
else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
- addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
+ addCompoundAffixFlagValue(ConfBuild,
+ recoded + strlen("ONLYINCOMPOUND"),
FF_COMPOUNDONLY);
else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
- addCompoundAffixFlagValue(Conf,
+ addCompoundAffixFlagValue(ConfBuild,
recoded + strlen("COMPOUNDPERMITFLAG"),
FF_COMPOUNDPERMITFLAG);
else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
- addCompoundAffixFlagValue(Conf,
+ addCompoundAffixFlagValue(ConfBuild,
recoded + strlen("COMPOUNDFORBIDFLAG"),
FF_COMPOUNDFORBIDFLAG);
else if (STRNCMP(recoded, "FLAG") == 0)
@@ -1258,9 +1480,9 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
if (*s)
{
if (STRNCMP(s, "long") == 0)
- Conf->flagMode = FM_LONG;
+ ConfBuild->dict->flagMode = FM_LONG;
else if (STRNCMP(s, "num") == 0)
- Conf->flagMode = FM_NUM;
+ ConfBuild->dict->flagMode = FM_NUM;
else if (STRNCMP(s, "default") != 0)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
@@ -1274,8 +1496,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
}
tsearch_readline_end(&trst);
- if (Conf->nCompoundAffixFlag > 1)
- qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
+ if (ConfBuild->nCompoundAffixFlag > 1)
+ qsort((void *) ConfBuild->CompoundAffixFlags, ConfBuild->nCompoundAffixFlag,
sizeof(CompoundAffixFlag), cmpcmdflag);
if (!tsearch_readline_begin(&trst, filename))
@@ -1295,15 +1517,15 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
if (ptype)
pfree(ptype);
- ptype = lowerstr_ctx(Conf, type);
+ ptype = lowerstr_ctx(ConfBuild, type);
/* First try to parse AF parameter (alias compression) */
if (STRNCMP(ptype, "af") == 0)
{
/* First line is the number of aliases */
- if (!Conf->useFlagAliases)
+ if (!ConfBuild->dict->useFlagAliases)
{
- Conf->useFlagAliases = true;
+ ConfBuild->dict->useFlagAliases = true;
naffix = atoi(sflag);
if (naffix <= 0)
ereport(ERROR,
@@ -1313,11 +1535,10 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
/* Also reserve place for empty flag set */
naffix++;
- Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
- Conf->lenAffixData = Conf->nAffixData = naffix;
+ InitAffixData(ConfBuild, naffix);
/* Add empty flag set into AffixData */
- Conf->AffixData[curaffix] = VoidString;
+ AddAffixSet(ConfBuild, VoidString, 0);
curaffix++;
}
/* Other lines are aliases */
@@ -1325,7 +1546,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
{
if (curaffix < naffix)
{
- Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
+ AddAffixSet(ConfBuild, sflag, strlen(sflag));
curaffix++;
}
else
@@ -1343,8 +1564,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
sflaglen = strlen(sflag);
if (sflaglen == 0
- || (sflaglen > 1 && Conf->flagMode == FM_CHAR)
- || (sflaglen > 2 && Conf->flagMode == FM_LONG))
+ || (sflaglen > 1 && ConfBuild->dict->flagMode == FM_CHAR)
+ || (sflaglen > 2 && ConfBuild->dict->flagMode == FM_LONG))
goto nextline;
/*--------
@@ -1372,21 +1593,21 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
/* Get flags after '/' (flags are case sensitive) */
if ((ptr = strchr(repl, '/')) != NULL)
- aflg |= getCompoundAffixFlagValue(Conf,
- getAffixFlagSet(Conf,
+ aflg |= getCompoundAffixFlagValue(ConfBuild,
+ getAffixFlagSet(ConfBuild,
ptr + 1));
/* Get lowercased version of string before '/' */
- prepl = lowerstr_ctx(Conf, repl);
+ prepl = lowerstr_ctx(ConfBuild, repl);
if ((ptr = strchr(prepl, '/')) != NULL)
*ptr = '\0';
- pfind = lowerstr_ctx(Conf, find);
- pmask = lowerstr_ctx(Conf, mask);
+ pfind = lowerstr_ctx(ConfBuild, find);
+ pmask = lowerstr_ctx(ConfBuild, mask);
if (t_iseq(find, '0'))
*pfind = '\0';
if (t_iseq(repl, '0'))
*prepl = '\0';
- NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
+ NIAddAffix(ConfBuild, sflag, flagflags | aflg, pmask, pfind, prepl,
isSuffix ? FF_SUFFIX : FF_PREFIX);
pfree(prepl);
pfree(pfind);
@@ -1412,7 +1633,7 @@ nextline:
* work to NIImportOOAffixes(), which will re-read the whole file.
*/
void
-NIImportAffixes(IspellDict *Conf, const char *filename)
+NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename)
{
char *pstr = NULL;
char flag[BUFSIZ];
@@ -1433,9 +1654,9 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
errmsg("could not open affix file \"%s\": %m",
filename)));
- Conf->usecompound = false;
- Conf->useFlagAliases = false;
- Conf->flagMode = FM_CHAR;
+ ConfBuild->dict->usecompound = false;
+ ConfBuild->dict->useFlagAliases = false;
+ ConfBuild->dict->flagMode = FM_CHAR;
while ((recoded = tsearch_readline(&trst)) != NULL)
{
@@ -1457,10 +1678,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
s += pg_mblen(s);
if (*s && pg_mblen(s) == 1)
- {
- addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
- Conf->usecompound = true;
- }
+ addCompoundAffixFlagValue(ConfBuild, s, FF_COMPOUNDFLAG);
+
oldformat = true;
goto nextline;
}
@@ -1533,7 +1752,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
if (!parse_affentry(pstr, mask, find, repl))
goto nextline;
- NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
+ NIAddAffix(ConfBuild, flag, flagflags, mask, find, repl,
+ suffixes ? FF_SUFFIX : FF_PREFIX);
nextline:
pfree(recoded);
@@ -1552,53 +1772,48 @@ isnewformat:
errmsg("affix file contains both old-style and new-style commands")));
tsearch_readline_end(&trst);
- NIImportOOAffixes(Conf, filename);
+ NIImportOOAffixes(ConfBuild, filename);
}
/*
* Merges two affix flag sets and stores a new affix flag set into
- * Conf->AffixData.
+ * ConfBuild->AffixData.
*
* Returns index of a new affix flag set.
*/
static int
-MergeAffix(IspellDict *Conf, int a1, int a2)
+MergeAffix(IspellDictBuild *ConfBuild, int a1, int a2)
{
- char **ptr;
+ char *ptr;
+ uint32 len;
/* Do not merge affix flags if one of affix flags is empty */
- if (*Conf->AffixData[a1] == '\0')
+ if (*AffixDataGet(ConfBuild, a1) == '\0')
return a2;
- else if (*Conf->AffixData[a2] == '\0')
+ else if (*AffixDataGet(ConfBuild, a2) == '\0')
return a1;
- while (Conf->nAffixData + 1 >= Conf->lenAffixData)
- {
- Conf->lenAffixData *= 2;
- Conf->AffixData = (char **) repalloc(Conf->AffixData,
- sizeof(char *) * Conf->lenAffixData);
- }
-
- ptr = Conf->AffixData + Conf->nAffixData;
- if (Conf->flagMode == FM_NUM)
+ if (ConfBuild->dict->flagMode == FM_NUM)
{
- *ptr = cpalloc(strlen(Conf->AffixData[a1]) +
- strlen(Conf->AffixData[a2]) +
- 1 /* comma */ + 1 /* \0 */ );
- sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
+ len = strlen(AffixDataGet(ConfBuild, a1)) + 1 /* comma */ +
+ strlen(AffixDataGet(ConfBuild, a2));
+ ptr = tmpalloc(len + 1 /* \0 */);
+ sprintf(ptr, "%s,%s", AffixDataGet(ConfBuild, a1),
+ AffixDataGet(ConfBuild, a2));
}
else
{
- *ptr = cpalloc(strlen(Conf->AffixData[a1]) +
- strlen(Conf->AffixData[a2]) +
- 1 /* \0 */ );
- sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
+ len = strlen(AffixDataGet(ConfBuild, a1)) +
+ strlen(AffixDataGet(ConfBuild, a2));
+ ptr = tmpalloc(len + 1 /* \0 */ );
+ sprintf(ptr, "%s%s", AffixDataGet(ConfBuild, a1),
+ AffixDataGet(ConfBuild, a2));
}
- ptr++;
- *ptr = NULL;
- Conf->nAffixData++;
- return Conf->nAffixData - 1;
+ AddAffixSet(ConfBuild, ptr, len);
+ pfree(ptr);
+
+ return ConfBuild->nAffixData - 1;
}
/*
@@ -1606,66 +1821,87 @@ MergeAffix(IspellDict *Conf, int a1, int a2)
* flags with the given index.
*/
static uint32
-makeCompoundFlags(IspellDict *Conf, int affix)
+makeCompoundFlags(IspellDictBuild *ConfBuild, int affix)
{
- char *str = Conf->AffixData[affix];
+ char *str = AffixDataGet(ConfBuild, affix);
- return (getCompoundAffixFlagValue(Conf, str) & FF_COMPOUNDFLAGMASK);
+ return (getCompoundAffixFlagValue(ConfBuild, str) & FF_COMPOUNDFLAGMASK);
}
/*
* Makes a prefix tree for the given level.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* low: lower index of the Conf->Spell array.
* high: upper index of the Conf->Spell array.
* level: current prefix tree level.
+ *
+ * Returns an offset of SPNode in DictNodes.
*/
-static SPNode *
-mkSPNode(IspellDict *Conf, int low, int high, int level)
+static uint32
+mkSPNode(IspellDictBuild *ConfBuild, int low, int high, int level)
{
int i;
int nchar = 0;
char lastchar = '\0';
+ uint32 rs_offset,
+ new_offset;
SPNode *rs;
SPNodeData *data;
+ int data_index = 0;
int lownew = low;
for (i = low; i < high; i++)
- if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
+ if (ConfBuild->Spell[i]->p.d.len > level &&
+ lastchar != ConfBuild->Spell[i]->word[level])
{
nchar++;
- lastchar = Conf->Spell[i]->word[level];
+ lastchar = ConfBuild->Spell[i]->word[level];
}
if (!nchar)
- return NULL;
+ return ISPELL_INVALID_OFFSET;
- rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
- rs->length = nchar;
+ rs_offset = AllocateSPNode(ConfBuild, nchar);
+ rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset);
data = rs->data;
lastchar = '\0';
for (i = low; i < high; i++)
- if (Conf->Spell[i]->p.d.len > level)
+ if (ConfBuild->Spell[i]->p.d.len > level)
{
- if (lastchar != Conf->Spell[i]->word[level])
+ if (lastchar != ConfBuild->Spell[i]->word[level])
{
if (lastchar)
{
/* Next level of the prefix tree */
- data->node = mkSPNode(Conf, lownew, i, level + 1);
+ new_offset = mkSPNode(ConfBuild, lownew, i, level + 1);
+
+ /*
+ * ConfBuild->DictNodes can be repalloc'ed within
+ * mkSPNode(), so reinitialize pointers.
+ */
+ rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset);
+
+ /* First save offset of the new node */
+ data = &(rs->data[data_index]);
+ data->node_offset = new_offset;
+
+ /* Work with next node */
+ data_index++;
+ Assert(data_index < nchar);
+ data = &(rs->data[data_index]);
+
lownew = i;
- data++;
}
- lastchar = Conf->Spell[i]->word[level];
+ lastchar = ConfBuild->Spell[i]->word[level];
}
- data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
- if (Conf->Spell[i]->p.d.len == level + 1)
+ data->val = ((uint8 *) (ConfBuild->Spell[i]->word))[level];
+ if (ConfBuild->Spell[i]->p.d.len == level + 1)
{
bool clearCompoundOnly = false;
- if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
+ if (data->isword && data->affix != ConfBuild->Spell[i]->p.d.affix)
{
/*
* MergeAffix called a few times. If one of word is
@@ -1674,15 +1910,17 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
*/
clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
- & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
+ & makeCompoundFlags(ConfBuild,
+ ConfBuild->Spell[i]->p.d.affix))
? false : true;
- data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
+ data->affix = MergeAffix(ConfBuild, data->affix,
+ ConfBuild->Spell[i]->p.d.affix);
}
else
- data->affix = Conf->Spell[i]->p.d.affix;
+ data->affix = ConfBuild->Spell[i]->p.d.affix;
data->isword = 1;
- data->compoundflag = makeCompoundFlags(Conf, data->affix);
+ data->compoundflag = makeCompoundFlags(ConfBuild, data->affix);
if ((data->compoundflag & FF_COMPOUNDONLY) &&
(data->compoundflag & FF_COMPOUNDFLAG) == 0)
@@ -1694,9 +1932,19 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
}
/* Next level of the prefix tree */
- data->node = mkSPNode(Conf, lownew, high, level + 1);
+ new_offset = mkSPNode(ConfBuild, lownew, high, level + 1);
+
+ /*
+ * ConfBuild->DictNodes can be repalloc'ed within mkSPNode(), so
+ * reinitialize pointers.
+ */
+ rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset);
+
+ /* Save offset of the new node */
+ data = &(rs->data[data_index]);
+ data->node_offset = new_offset;
- return rs;
+ return rs_offset;
}
/*
@@ -1704,7 +1952,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
* and affixes.
*/
void
-NISortDictionary(IspellDict *Conf)
+NISortDictionary(IspellDictBuild *ConfBuild)
{
int i;
int naffix = 0;
@@ -1713,81 +1961,81 @@ NISortDictionary(IspellDict *Conf)
/* compress affixes */
/*
- * If we use flag aliases then we need to use Conf->AffixData filled in
+ * If we use flag aliases then we need to use ConfBuild->AffixData filled in
* the NIImportOOAffixes().
*/
- if (Conf->useFlagAliases)
+ if (ConfBuild->dict->useFlagAliases)
{
- for (i = 0; i < Conf->nspell; i++)
+ for (i = 0; i < ConfBuild->nSpell; i++)
{
char *end;
- if (*Conf->Spell[i]->p.flag != '\0')
+ if (*ConfBuild->Spell[i]->p.flag != '\0')
{
- curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
- if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
+ curaffix = strtol(ConfBuild->Spell[i]->p.flag, &end, 10);
+ if (ConfBuild->Spell[i]->p.flag == end || errno == ERANGE)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
- Conf->Spell[i]->p.flag)));
+ ConfBuild->Spell[i]->p.flag)));
}
else
{
/*
- * If Conf->Spell[i]->p.flag is empty, then get empty value of
- * Conf->AffixData (0 index).
+ * If ConfBuild->Spell[i]->p.flag is empty, then get empty
+ * value of ConfBuild->AffixData (0 index).
*/
curaffix = 0;
}
- Conf->Spell[i]->p.d.affix = curaffix;
- Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
+ ConfBuild->Spell[i]->p.d.affix = curaffix;
+ ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word);
}
}
- /* Otherwise fill Conf->AffixData here */
+ /* Otherwise fill ConfBuild->AffixData here */
else
{
/* Count the number of different flags used in the dictionary */
- qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *),
+ qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *),
cmpspellaffix);
naffix = 0;
- for (i = 0; i < Conf->nspell; i++)
+ for (i = 0; i < ConfBuild->nSpell; i++)
{
if (i == 0
- || strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag))
+ || strcmp(ConfBuild->Spell[i]->p.flag,
+ ConfBuild->Spell[i - 1]->p.flag))
naffix++;
}
/*
- * Fill in Conf->AffixData with the affixes that were used in the
- * dictionary. Replace textual flag-field of Conf->Spell entries with
- * indexes into Conf->AffixData array.
+ * Fill in AffixData with the affixes that were used in the
+ * dictionary. Replace textual flag-field of ConfBuild->Spell entries
+ * with indexes into ConfBuild->AffixData array.
*/
- Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
+ InitAffixData(ConfBuild, naffix);
curaffix = -1;
- for (i = 0; i < Conf->nspell; i++)
+ for (i = 0; i < ConfBuild->nSpell; i++)
{
if (i == 0
- || strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]))
+ || strcmp(ConfBuild->Spell[i]->p.flag,
+ AffixDataGet(ConfBuild, curaffix)))
{
curaffix++;
Assert(curaffix < naffix);
- Conf->AffixData[curaffix] = cpstrdup(Conf,
- Conf->Spell[i]->p.flag);
+ AddAffixSet(ConfBuild, ConfBuild->Spell[i]->p.flag,
+ strlen(ConfBuild->Spell[i]->p.flag));
}
- Conf->Spell[i]->p.d.affix = curaffix;
- Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
+ ConfBuild->Spell[i]->p.d.affix = curaffix;
+ ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word);
}
-
- Conf->lenAffixData = Conf->nAffixData = naffix;
}
/* Start build a prefix tree */
- qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
- Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
+ qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *), cmpspell);
+ mkSPNode(ConfBuild, 0, ConfBuild->nSpell, 0);
}
/*
@@ -1795,83 +2043,104 @@ NISortDictionary(IspellDict *Conf)
* rule. Affixes with empty replace string do not include in the prefix tree.
* This affixes are included by mkVoidAffix().
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* low: lower index of the Conf->Affix array.
* high: upper index of the Conf->Affix array.
* level: current prefix tree level.
* type: FF_SUFFIX or FF_PREFIX.
+ *
+ * Returns an offset in nodes array.
*/
-static AffixNode *
-mkANode(IspellDict *Conf, int low, int high, int level, int type)
+static uint32
+mkANode(IspellDictBuild *ConfBuild, int low, int high, int level, int type)
{
int i;
int nchar = 0;
uint8 lastchar = '\0';
+ NodeArray *array;
+ uint32 rs_offset,
+ new_offset;
AffixNode *rs;
AffixNodeData *data;
+ int data_index = 0;
int lownew = low;
- int naff;
- AFFIX **aff;
for (i = low; i < high; i++)
- if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
+ if (ConfBuild->Affix[i]->replen > level &&
+ lastchar != GETCHAR(ConfBuild->Affix[i], level, type))
{
nchar++;
- lastchar = GETCHAR(Conf->Affix + i, level, type);
+ lastchar = GETCHAR(ConfBuild->Affix[i], level, type);
}
if (!nchar)
- return NULL;
+ return ISPELL_INVALID_OFFSET;
- aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
- naff = 0;
+ if (type == FF_SUFFIX)
+ array = &ConfBuild->SuffixNodes;
+ else
+ array = &ConfBuild->PrefixNodes;
- rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
- rs->length = nchar;
- data = rs->data;
+ rs_offset = AllocateAffixNode(ConfBuild, array, nchar);
+ rs = (AffixNode *) NodeArrayGet(array, rs_offset);
+ data = (AffixNodeData *) rs->data;
lastchar = '\0';
for (i = low; i < high; i++)
- if (Conf->Affix[i].replen > level)
+ if (ConfBuild->Affix[i]->replen > level)
{
- if (lastchar != GETCHAR(Conf->Affix + i, level, type))
+ if (lastchar != GETCHAR(ConfBuild->Affix[i], level, type))
{
if (lastchar)
{
/* Next level of the prefix tree */
- data->node = mkANode(Conf, lownew, i, level + 1, type);
- if (naff)
- {
- data->naff = naff;
- data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
- memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
- naff = 0;
- }
- data++;
+ new_offset = mkANode(ConfBuild, lownew, i, level + 1, type);
+
+ /*
+ * array can be repalloc'ed within mkANode(), so
+ * reinitialize pointers.
+ */
+ rs = (AffixNode *) NodeArrayGet(array, rs_offset);
+
+ /* First save offset of the new node */
+ data = &(rs->data[data_index]);
+ data->node_offset = new_offset;
+
+ /* Handle next data node */
+ data_index++;
+ Assert(data_index < nchar);
+ data = &(rs->data[data_index]);
+
lownew = i;
}
- lastchar = GETCHAR(Conf->Affix + i, level, type);
+ lastchar = GETCHAR(ConfBuild->Affix[i], level, type);
}
- data->val = GETCHAR(Conf->Affix + i, level, type);
- if (Conf->Affix[i].replen == level + 1)
+ data->val = GETCHAR(ConfBuild->Affix[i], level, type);
+ if (ConfBuild->Affix[i]->replen == level + 1)
{ /* affix stopped */
- aff[naff++] = Conf->Affix + i;
+ if (data->affstart == ISPELL_INVALID_INDEX)
+ {
+ data->affstart = i;
+ data->affend = i;
+ }
+ else
+ data->affend = i;
}
}
/* Next level of the prefix tree */
- data->node = mkANode(Conf, lownew, high, level + 1, type);
- if (naff)
- {
- data->naff = naff;
- data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
- memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
- naff = 0;
- }
+ new_offset = mkANode(ConfBuild, lownew, high, level + 1, type);
+
+ /*
+ * array can be repalloc'ed within mkANode(), so reinitialize pointers.
+ */
+ rs = (AffixNode *) NodeArrayGet(array, rs_offset);
- pfree(aff);
+ /* Save offset of the new node */
+ data = &(rs->data[data_index]);
+ data->node_offset = new_offset;
- return rs;
+ return rs_offset;
}
/*
@@ -1879,139 +2148,156 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
* for affixes which have empty replace string ("repl" field).
*/
static void
-mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
+mkVoidAffix(IspellDictBuild *ConfBuild, bool issuffix, int startsuffix)
{
- int i,
- cnt = 0;
+ int i;
int start = (issuffix) ? startsuffix : 0;
- int end = (issuffix) ? Conf->naffixes : startsuffix;
- AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
-
- Affix->length = 1;
- Affix->isvoid = 1;
+ int end = (issuffix) ? ConfBuild->nAffix : startsuffix;
+ uint32 node_offset;
+ NodeArray *array;
+ AffixNode *Affix;
+ AffixNodeData *AffixData;
if (issuffix)
- {
- Affix->data->node = Conf->Suffix;
- Conf->Suffix = Affix;
- }
+ array = &ConfBuild->SuffixNodes;
else
- {
- Affix->data->node = Conf->Prefix;
- Conf->Prefix = Affix;
- }
+ array = &ConfBuild->PrefixNodes;
- /* Count affixes with empty replace string */
- for (i = start; i < end; i++)
- if (Conf->Affix[i].replen == 0)
- cnt++;
-
- /* There is not affixes with empty replace string */
- if (cnt == 0)
- return;
+ node_offset = AllocateAffixNode(ConfBuild, array, 1);
+ Affix = (AffixNode *) NodeArrayGet(array, node_offset);
- Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
- Affix->data->naff = (uint32) cnt;
+ Affix->isvoid = 1;
+ AffixData = (AffixNodeData *) Affix->data;
- cnt = 0;
for (i = start; i < end; i++)
- if (Conf->Affix[i].replen == 0)
+ if (ConfBuild->Affix[i]->replen == 0)
{
- Affix->data->aff[cnt] = Conf->Affix + i;
- cnt++;
+ if (AffixData->affstart == ISPELL_INVALID_INDEX)
+ {
+ AffixData->affstart = i;
+ AffixData->affend = i;
+ }
+ else
+ AffixData->affend = i;
}
}
/*
- * Checks if the affixflag is used by dictionary. Conf->AffixData does not
+ * Checks if the affixflag is used by dictionary. AffixData does not
* contain affixflag if this flag is not used actually by the .dict file.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* affixflag: affix flag.
*
- * Returns true if the Conf->AffixData array contains affixflag, otherwise
+ * Returns true if the ConfBuild->AffixData array contains affixflag, otherwise
* returns false.
*/
static bool
-isAffixInUse(IspellDict *Conf, char *affixflag)
+isAffixInUse(IspellDictBuild *ConfBuild, char *affixflag)
{
int i;
- for (i = 0; i < Conf->nAffixData; i++)
- if (IsAffixFlagInUse(Conf, i, affixflag))
+ for (i = 0; i < ConfBuild->nAffixData; i++)
+ if (IsAffixFlagInUse(ConfBuild->dict->flagMode,
+ AffixDataGet(ConfBuild, i), affixflag))
return true;
return false;
}
/*
- * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
+ * Builds Prefix and Suffix trees from the imported affixes.
*/
void
-NISortAffixes(IspellDict *Conf)
+NISortAffixes(IspellDictBuild *ConfBuild)
{
AFFIX *Affix;
+ AffixNode *voidPrefix,
+ *voidSuffix;
size_t i;
CMPDAffix *ptr;
- int firstsuffix = Conf->naffixes;
+ int firstsuffix = ConfBuild->nAffix;
+ uint32 prefix_offset,
+ suffix_offset;
- if (Conf->naffixes == 0)
+ if (ConfBuild->nAffix == 0)
return;
/* Store compound affixes in the Conf->CompoundAffix array */
- if (Conf->naffixes > 1)
- qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
- Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
- ptr->affix = NULL;
-
- for (i = 0; i < Conf->naffixes; i++)
+ if (ConfBuild->nAffix > 1)
+ qsort((void *) ConfBuild->Affix, ConfBuild->nAffix,
+ sizeof(AFFIX *), cmpaffix);
+ ConfBuild->nCompoundAffix = ConfBuild->nAffix;
+ ConfBuild->CompoundAffix = ptr =
+ (CMPDAffix *) tmpalloc(sizeof(CMPDAffix) * ConfBuild->nCompoundAffix);
+ ptr->affix = ISPELL_INVALID_INDEX;
+
+ for (i = 0; i < ConfBuild->nAffix; i++)
{
- Affix = &(((AFFIX *) Conf->Affix)[i]);
+ Affix = ConfBuild->Affix[i];
if (Affix->type == FF_SUFFIX && i < firstsuffix)
firstsuffix = i;
if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
- isAffixInUse(Conf, Affix->flag))
+ isAffixInUse(ConfBuild, AffixFieldFlag(Affix)))
{
bool issuffix = (Affix->type == FF_SUFFIX);
- if (ptr == Conf->CompoundAffix ||
+ if (ptr == ConfBuild->CompoundAffix ||
issuffix != (ptr - 1)->issuffix ||
- strbncmp((const unsigned char *) (ptr - 1)->affix,
- (const unsigned char *) Affix->repl,
+ strbncmp((const unsigned char *) AffixFieldRepl(ConfBuild->Affix[(ptr - 1)->affix]),
+ (const unsigned char *) AffixFieldRepl(Affix),
(ptr - 1)->len))
{
/* leave only unique and minimals suffixes */
- ptr->affix = Affix->repl;
+ ptr->affix = i;
ptr->len = Affix->replen;
ptr->issuffix = issuffix;
ptr++;
}
}
}
- ptr->affix = NULL;
- Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
+ ptr->affix = ISPELL_INVALID_INDEX;
+ ConfBuild->nCompoundAffix = ptr - ConfBuild->CompoundAffix + 1;
+ ConfBuild->CompoundAffix = (CMPDAffix *) repalloc(ConfBuild->CompoundAffix,
+ sizeof(CMPDAffix) * (ConfBuild->nCompoundAffix));
/* Start build a prefix tree */
- Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
- Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
- mkVoidAffix(Conf, true, firstsuffix);
- mkVoidAffix(Conf, false, firstsuffix);
+ mkVoidAffix(ConfBuild, true, firstsuffix);
+ mkVoidAffix(ConfBuild, false, firstsuffix);
+
+ prefix_offset = mkANode(ConfBuild, 0, firstsuffix, 0, FF_PREFIX);
+ suffix_offset = mkANode(ConfBuild, firstsuffix, ConfBuild->nAffix, 0,
+ FF_SUFFIX);
+
+ /* Adjust offsets of new nodes for nodes of void affixes */
+ voidPrefix = (AffixNode *) NodeArrayGet(&ConfBuild->PrefixNodes, 0);
+ voidPrefix->data[0].node_offset = prefix_offset;
+
+ voidSuffix = (AffixNode *) NodeArrayGet(&ConfBuild->SuffixNodes, 0);
+ voidSuffix->data[0].node_offset = suffix_offset;
}
static AffixNodeData *
-FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
+FindAffixes(IspellDictData *dict, AffixNode *node, const char *word, int wrdlen,
+ int *level, int type)
{
+ AffixNode *node_start;
AffixNodeData *StopLow,
*StopHigh,
*StopMiddle;
uint8 symbol;
+ if (type == FF_PREFIX)
+ node_start = (AffixNode *) DictPrefixNodes(dict);
+ else
+ node_start = (AffixNode *) DictSuffixNodes(dict);
+
if (node->isvoid)
{ /* search void affixes */
- if (node->data->naff)
+ if (node->data->affstart != ISPELL_INVALID_INDEX)
return node->data;
- node = node->data->node;
+ node = (AffixNode *) DictNodeGet(node_start, node->data->node_offset);
}
while (node && *level < wrdlen)
@@ -2026,9 +2312,10 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
if (StopMiddle->val == symbol)
{
(*level)++;
- if (StopMiddle->naff)
+ if (StopMiddle->affstart != ISPELL_INVALID_INDEX)
return StopMiddle;
- node = StopMiddle->node;
+ node = (AffixNode *) DictNodeGet(node_start,
+ StopMiddle->node_offset);
break;
}
else if (StopMiddle->val < symbol)
@@ -2042,8 +2329,67 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
return NULL;
}
+/*
+ * Compile regular expression on first use and store it within reg.
+ */
+static void
+CompileAffixReg(AffixReg *reg, bool isregis, int type,
+ const char *mask, int masklen, MemoryContext dictCtx)
+{
+ MemoryContext oldcontext;
+
+ Assert(dictCtx);
+
+ /*
+ * Switch to memory context of the dictionary, so compiled expression can be
+ * used in other queries.
+ */
+ oldcontext = MemoryContextSwitchTo(dictCtx);
+
+ if (isregis)
+ RS_compile(®->r.regis, (type == FF_SUFFIX), mask);
+ else
+ {
+ int wmasklen;
+ int err;
+ pg_wchar *wmask;
+ char *tmask;
+
+ tmask = (char *) palloc(masklen + 3);
+ if (type == FF_SUFFIX)
+ sprintf(tmask, "%s$", mask);
+ else
+ sprintf(tmask, "^%s", mask);
+
+ masklen = strlen(tmask);
+ wmask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar));
+ wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
+
+ err = pg_regcomp(&(reg->r.regex), wmask, wmasklen,
+ REG_ADVANCED | REG_NOSUB,
+ DEFAULT_COLLATION_OID);
+ if (err)
+ {
+ char errstr[100];
+
+ pg_regerror(err, &(reg->r.regex), errstr, sizeof(errstr));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("invalid regular expression: %s", errstr)));
+ }
+
+ pfree(wmask);
+ pfree(tmask);
+ }
+
+ reg->iscompiled = true;
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
static char *
-CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
+CheckAffix(const char *word, size_t len, AFFIX *Affix, AffixReg *reg,
+ int flagflags, char *newword, int *baselen, MemoryContext dictCtx)
{
/*
* Check compound allow flags
@@ -2083,7 +2429,7 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww
if (Affix->type == FF_SUFFIX)
{
strcpy(newword, word);
- strcpy(newword + len - Affix->replen, Affix->find);
+ strcpy(newword + len - Affix->replen, AffixFieldFind(Affix));
if (baselen) /* store length of non-changed part of word */
*baselen = len - Affix->replen;
}
@@ -2093,9 +2439,9 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww
* if prefix is an all non-changed part's length then all word
* contains only prefix and suffix, so out
*/
- if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
+ if (baselen && *baselen + Affix->findlen <= Affix->replen)
return NULL;
- strcpy(newword, Affix->find);
+ strcpy(newword, AffixFieldFind(Affix));
strcat(newword, word + Affix->replen);
}
@@ -2106,7 +2452,12 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww
return newword;
else if (Affix->isregis)
{
- if (RS_execute(&(Affix->reg.regis), newword))
+ /* Compile the regular expression on first demand */
+ if (!reg->iscompiled)
+ CompileAffixReg(reg, Affix->isregis, Affix->type,
+ AffixFieldMask(Affix), Affix->masklen, dictCtx);
+
+ if (RS_execute(&(reg->r.regis), newword))
return newword;
}
else
@@ -2116,12 +2467,17 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww
size_t data_len;
int newword_len;
+ /* Compile the regular expression on first demand */
+ if (!reg->iscompiled)
+ CompileAffixReg(reg, Affix->isregis, Affix->type,
+ AffixFieldMask(Affix), Affix->masklen, dictCtx);
+
/* Convert data string to wide characters */
newword_len = strlen(newword);
data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
data_len = pg_mb2wchar_with_len(newword, data, newword_len);
- if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0)))
+ if (!(err = pg_regexec(&(reg->r.regex), data, data_len, 0, NULL, 0, NULL, 0)))
{
pfree(data);
return newword;
@@ -2160,7 +2516,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag)
char **cur;
char newword[2 * MAXNORMLEN] = "";
char pnewword[2 * MAXNORMLEN] = "";
- AffixNode *snode = Conf->Suffix,
+ AffixNode *snode = (AffixNode *) DictSuffixNodes(Conf->dict),
*pnode;
int i,
j;
@@ -2172,7 +2528,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag)
/* Check that the word itself is normal form */
- if (FindWord(Conf, word, VoidString, flag))
+ if (FindWord(Conf->dict, word, VoidString, flag))
{
*cur = pstrdup(word);
cur++;
@@ -2180,23 +2536,29 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag)
}
/* Find all other NORMAL forms of the 'word' (check only prefix) */
- pnode = Conf->Prefix;
+ pnode = (AffixNode *) DictPrefixNodes(Conf->dict);
plevel = 0;
while (pnode)
{
- prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
+ prefix = FindAffixes(Conf->dict, pnode, word, wrdlen, &plevel, FF_PREFIX);
if (!prefix)
break;
- for (j = 0; j < prefix->naff; j++)
+ for (j = prefix->affstart; j <= prefix->affend; j++)
{
- if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
+ AFFIX *affix = (AFFIX *) DictAffixGet(Conf->dict, j);
+ AffixReg *reg = &(Conf->reg[j]);
+
+ if (affix &&
+ CheckAffix(word, wrdlen, affix, reg, flag, newword, NULL,
+ Conf->dictCtx))
{
/* prefix success */
- if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
+ if (FindWord(Conf->dict, newword, AffixFieldFlag(affix), flag))
cur += addToResult(forms, cur, newword);
}
}
- pnode = prefix->node;
+ pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf->dict),
+ prefix->node_offset);
}
/*
@@ -2208,45 +2570,59 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag)
int baselen = 0;
/* find possible suffix */
- suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
+ suffix = FindAffixes(Conf->dict, snode, word, wrdlen, &slevel,
+ FF_SUFFIX);
if (!suffix)
break;
/* foreach suffix check affix */
- for (i = 0; i < suffix->naff; i++)
+ for (i = suffix->affstart; i <= suffix->affend; i++)
{
- if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
+ AFFIX *sufentry = (AFFIX *) DictAffixGet(Conf->dict, i);
+ AffixReg *sufreg = &(Conf->reg[i]);
+
+ if (sufentry &&
+ CheckAffix(word, wrdlen, sufentry, sufreg, flag, newword, &baselen,
+ Conf->dictCtx))
{
/* suffix success */
- if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
+ if (FindWord(Conf->dict, newword, AffixFieldFlag(sufentry), flag))
cur += addToResult(forms, cur, newword);
/* now we will look changed word with prefixes */
- pnode = Conf->Prefix;
+ pnode = (AffixNode *) DictPrefixNodes(Conf->dict);
plevel = 0;
swrdlen = strlen(newword);
while (pnode)
{
- prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
+ prefix = FindAffixes(Conf->dict, pnode, newword, swrdlen,
+ &plevel, FF_PREFIX);
if (!prefix)
break;
- for (j = 0; j < prefix->naff; j++)
+ for (j = prefix->affstart; j <= prefix->affend; j++)
{
- if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
+ AFFIX *prefentry = (AFFIX *) DictAffixGet(Conf->dict, j);
+ AffixReg *prefreg = &(Conf->reg[j]);
+
+ if (prefentry &&
+ CheckAffix(newword, swrdlen, prefentry, prefreg,
+ flag, pnewword, &baselen, Conf->dictCtx))
{
/* prefix success */
- char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
- VoidString : prefix->aff[j]->flag;
+ char *ff = (prefentry->flagflags & sufentry->flagflags & FF_CROSSPRODUCT) ?
+ VoidString : AffixFieldFlag(prefentry);
- if (FindWord(Conf, pnewword, ff, flag))
+ if (FindWord(Conf->dict, pnewword, ff, flag))
cur += addToResult(forms, cur, pnewword);
}
}
- pnode = prefix->node;
+ pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf->dict),
+ prefix->node_offset);
}
}
}
- snode = suffix->node;
+ snode = (AffixNode *) DictNodeGet(DictSuffixNodes(Conf->dict),
+ suffix->node_offset);
}
if (cur == forms)
@@ -2266,7 +2642,8 @@ typedef struct SplitVar
} SplitVar;
static int
-CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
+CheckCompoundAffixes(IspellDictData *dict, CMPDAffix **ptr,
+ char *word, int len, bool CheckInPlace)
{
bool issuffix;
@@ -2276,9 +2653,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
if (CheckInPlace)
{
- while ((*ptr)->affix)
+ while ((*ptr)->affix != ISPELL_INVALID_INDEX)
{
- if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
+ AFFIX *affix = (AFFIX *) DictAffixGet(dict, (*ptr)->affix);
+
+ if (len > (*ptr)->len &&
+ strncmp(AffixFieldRepl(affix), word, (*ptr)->len) == 0)
{
len = (*ptr)->len;
issuffix = (*ptr)->issuffix;
@@ -2292,9 +2672,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
{
char *affbegin;
- while ((*ptr)->affix)
+ while ((*ptr)->affix != ISPELL_INVALID_INDEX)
{
- if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
+ AFFIX *affix = (AFFIX *) DictAffixGet(dict, (*ptr)->affix);
+
+ if (len > (*ptr)->len &&
+ (affbegin = strstr(word, AffixFieldRepl(affix))) != NULL)
{
len = (*ptr)->len + (affbegin - word);
issuffix = (*ptr)->issuffix;
@@ -2346,13 +2729,14 @@ AddStem(SplitVar *v, char *word)
}
static SplitVar *
-SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos)
+SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig,
+ char *word, int wordlen, int startpos, int minpos)
{
SplitVar *var = NULL;
SPNodeData *StopLow,
*StopHigh,
*StopMiddle = NULL;
- SPNode *node = (snode) ? snode : Conf->Dictionary;
+ SPNode *node = (snode) ? snode : (SPNode *) DictDictNodes(Conf->dict);
int level = (snode) ? minpos : startpos; /* recursive
* minpos==level */
int lenaff;
@@ -2367,8 +2751,11 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
while (level < wordlen)
{
/* find word with epenthetic or/and compound affix */
- caff = Conf->CompoundAffix;
- while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
+ caff = (CMPDAffix *) DictCompoundAffix(Conf->dict);
+ while (level > startpos &&
+ (lenaff = CheckCompoundAffixes(Conf->dict, &caff,
+ word + level, wordlen - level,
+ (node) ? true : false)) >= 0)
{
/*
* there is one of compound affixes, so check word for existings
@@ -2415,7 +2802,8 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
while (ptr->next)
ptr = ptr->next;
- ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
+ ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen,
+ startpos + lenaff, startpos + lenaff);
pfree(new->stem);
pfree(new);
@@ -2474,13 +2862,14 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
/* we can find next word */
level++;
AddStem(var, pnstrdup(word + startpos, level - startpos));
- node = Conf->Dictionary;
+ node = (SPNode *) DictDictNodes(Conf->dict);
startpos = level;
continue;
}
}
}
- node = StopMiddle->node;
+ node = (SPNode *) DictNodeGet(DictDictNodes(Conf->dict),
+ StopMiddle->node_offset);
}
else
node = NULL;
@@ -2530,7 +2919,7 @@ NINormalizeWord(IspellDict *Conf, char *word)
pfree(res);
}
- if (Conf->usecompound)
+ if (Conf->dict->usecompound)
{
int wordlen = strlen(word);
SplitVar *ptr,
diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h
index 210f97dda9..b40cf379eb 100644
--- a/src/include/tsearch/dicts/spell.h
+++ b/src/include/tsearch/dicts/spell.h
@@ -18,21 +18,23 @@
#include "tsearch/dicts/regis.h"
#include "tsearch/ts_public.h"
+#define ISPELL_INVALID_INDEX (0x7FFFF)
+#define ISPELL_INVALID_OFFSET (0xFFFFFFFF)
+
/*
* SPNode and SPNodeData are used to represent prefix tree (Trie) to store
* a words list.
*/
-struct SPNode;
-
typedef struct
{
uint32 val:8,
isword:1,
/* Stores compound flags listed below */
compoundflag:4,
- /* Reference to an entry of the AffixData field */
+ /* Index of an entry of the AffixData field */
affix:19;
- struct SPNode *node;
+ /* Offset to a node of the DictNodes field */
+ uint32 node_offset;
} SPNodeData;
/*
@@ -86,21 +88,55 @@ typedef struct spell_struct
*/
typedef struct aff_struct
{
- char *flag;
/* FF_SUFFIX or FF_PREFIX */
- uint32 type:1,
+ uint16 type:1,
flagflags:7,
issimple:1,
isregis:1,
- replen:14;
- char *find;
- char *repl;
+ flaglen:2;
+
+ /* 8 bytes could be too mach for repl, find and mask, but who knows */
+ uint8 replen;
+ uint8 findlen;
+ uint8 masklen;
+
+ /*
+ * fields stores the following data (each ends with \0):
+ * - repl
+ * - find
+ * - mask
+ * - flag - one character (if FM_CHAR),
+ * two characters (if FM_LONG),
+ * number, >= 0 and < 65536 (if FM_NUM).
+ */
+ char fields[FLEXIBLE_ARRAY_MEMBER];
+} AFFIX;
+
+#define AF_FLAG_MAXSIZE 5 /* strlen(65536) */
+#define AF_REPL_MAXSIZE 255 /* 8 bytes */
+
+#define AFFIXHDRSZ (offsetof(AFFIX, fields))
+
+#define AffixFieldRepl(af) ((af)->fields)
+#define AffixFieldFind(af) ((af)->fields + (af)->replen + 1)
+#define AffixFieldMask(af) (AffixFieldFind(af) + (af)->findlen + 1)
+#define AffixFieldFlag(af) (AffixFieldMask(af) + (af)->masklen + 1)
+#define AffixGetSize(af) (AFFIXHDRSZ + (af)->replen + 1 + (af)->findlen + 1 \
+ + (af)->masklen + 1 + strlen(AffixFieldFlag(af)) + 1)
+
+/*
+ * Stores compiled regular expression of affix. AffixReg uses mask field of
+ * AFFIX as a regular expression.
+ */
+typedef struct AffixReg
+{
+ bool iscompiled;
union
{
regex_t regex;
Regis regis;
- } reg;
-} AFFIX;
+ } r;
+} AffixReg;
/*
* affixes use dictionary flags too
@@ -120,14 +156,13 @@ typedef struct aff_struct
* AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
* an affix list.
*/
-struct AffixNode;
-
typedef struct
{
- uint32 val:8,
- naff:24;
- AFFIX **aff;
- struct AffixNode *node;
+ uint8 val;
+ uint32 affstart;
+ uint32 affend;
+ /* Offset to a node of the PrefixNodes or SuffixNodes field */
+ uint32 node_offset;
} AffixNodeData;
typedef struct AffixNode
@@ -139,9 +174,19 @@ typedef struct AffixNode
#define ANHRDSZ (offsetof(AffixNode, data))
+typedef struct NodeArray
+{
+ char *Nodes;
+ uint32 NodesSize; /* allocated size of Nodes */
+ uint32 NodesEnd; /* end of data in Nodes */
+} NodeArray;
+
+#define NodeArrayGet(na, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : (na)->Nodes + (of))
+
typedef struct
{
- char *affix;
+ /* Index of an affix of the Affix field */
+ uint32 affix;
int len;
bool issuffix;
} CMPDAffix;
@@ -176,30 +221,71 @@ typedef struct CompoundAffixFlag
#define FLAGNUM_MAXSIZE (1 << 16)
-typedef struct
+typedef struct IspellDictData
{
- int maffixes;
- int naffixes;
- AFFIX *Affix;
-
- AffixNode *Suffix;
- AffixNode *Prefix;
+ FlagMode flagMode;
+ bool usecompound;
- SPNode *Dictionary;
- /* Array of sets of affixes */
- char **AffixData;
- int lenAffixData;
- int nAffixData;
bool useFlagAliases;
- CMPDAffix *CompoundAffix;
+ uint32 nAffixData;
+ uint32 AffixDataStart;
- bool usecompound;
- FlagMode flagMode;
+ uint32 AffixOffsetStart;
+ uint32 AffixStart;
+ uint32 nAffix;
+
+ uint32 DictNodesStart;
+ uint32 PrefixNodesStart;
+ uint32 SuffixNodesStart;
+
+ uint32 CompoundAffixStart;
/*
- * All follow fields are actually needed only for initialization
+ * data stores:
+ * - AffixData - array of affix sets
+ * - Affix - sorted array of affixes
+ * - DictNodes - prefix tree of a word list
+ * - PrefixNodes - prefix tree of a prefix list
+ * - SuffixNodes - prefix tree of a suffix list
+ * - CompoundAffix - array of compound affixes
*/
+ char data[FLEXIBLE_ARRAY_MEMBER];
+} IspellDictData;
+
+#define IspellDictDataHdrSize (offsetof(IspellDictData, data))
+
+#define DictAffixDataOffset(d) ((d)->data)
+#define DictAffixData(d) ((d)->data + (d)->AffixDataStart)
+#define DictAffixDataGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \
+ DictAffixData(d) + ((uint32 *) DictAffixDataOffset(d))[i])
+
+#define DictAffixOffset(d) ((d)->data + (d)->AffixOffsetStart)
+#define DictAffix(d) ((d)->data + (d)->AffixStart)
+#define DictAffixGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \
+ DictAffix(d) + ((uint32 *) DictAffixOffset(d))[i])
+
+#define DictDictNodes(d) ((d)->data + (d)->DictNodesStart)
+#define DictPrefixNodes(d) ((d)->data + (d)->PrefixNodesStart)
+#define DictSuffixNodes(d) ((d)->data + (d)->SuffixNodesStart)
+#define DictNodeGet(node_start, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : \
+ (char *) (node_start) + (of))
+
+#define DictCompoundAffix(d) ((d)->data + (d)->CompoundAffixStart)
+
+/*
+ * IspellDictBuild is used to initialize IspellDictData struct. This is a
+ * temprorary structure which is setup by NIStartBuild() and released by
+ * NIFinishBuild().
+ */
+typedef struct IspellDictBuild
+{
+ MemoryContext buildCxt; /* temp context for construction */
+
+ IspellDictData *dict;
+ uint32 dict_size;
+
+ /* Temporary data */
/* Array of Hunspell options in affix file */
CompoundAffixFlag *CompoundAffixFlags;
@@ -208,29 +294,73 @@ typedef struct
/* allocated length of CompoundAffixFlags array */
int mCompoundAffixFlag;
- /*
- * Remaining fields are only used during dictionary construction; they are
- * set up by NIStartBuild and cleared by NIFinishBuild.
- */
- MemoryContext buildCxt; /* temp context for construction */
-
- /* Temporary array of all words in the dict file */
+ /* Array of all words in the dict file */
SPELL **Spell;
- int nspell; /* number of valid entries in Spell array */
- int mspell; /* allocated length of Spell array */
+ int nSpell; /* number of valid entries in Spell array */
+ int mSpell; /* allocated length of Spell array */
+
+ /* Array of all affixes in the aff file */
+ AFFIX **Affix;
+ int nAffix; /* number of valid entries in Affix array */
+ int mAffix; /* allocated length of Affix array */
+ uint32 AffixSize;
+
+ /* Data for IspellDictData */
+
+ /* Array of sets of affixes */
+ uint32 *AffixDataOffset;
+ int nAffixData; /* number of affix sets */
+ int mAffixData; /* allocated number of affix sets */
+ char *AffixData;
+ uint32 AffixDataSize; /* allocated size of AffixData */
+ uint32 AffixDataEnd; /* end of data in AffixData */
+
+ /* Prefix tree which stores a word list */
+ NodeArray DictNodes;
+
+ /* Prefix tree which stores a prefix list */
+ NodeArray PrefixNodes;
+
+ /* Prefix tree which stores a suffix list */
+ NodeArray SuffixNodes;
- /* These are used to allocate "compact" data without palloc overhead */
- char *firstfree; /* first free address (always maxaligned) */
- size_t avail; /* free space remaining at firstfree */
+ /* Array of compound affixes */
+ CMPDAffix *CompoundAffix;
+ int nCompoundAffix; /* number of entries of CompoundAffix */
+} IspellDictBuild;
+
+#define AffixDataGet(d, i) ((d)->AffixData + (d)->AffixDataOffset[i])
+
+/*
+ * IspellDict is used within NINormalizeWord.
+ */
+typedef struct IspellDict
+{
+ /*
+ * Pointer to a DSM location of IspellDictData. Should be retreived per
+ * every dispell_lexize() call.
+ */
+ IspellDictData *dict;
+ /*
+ * Array of regular expression of affixes. Each regular expression is
+ * compiled only on demand.
+ */
+ AffixReg *reg;
+ /*
+ * Memory context for compiling regular expressions.
+ */
+ MemoryContext dictCtx;
} IspellDict;
extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
-extern void NIStartBuild(IspellDict *Conf);
-extern void NIImportAffixes(IspellDict *Conf, const char *filename);
-extern void NIImportDictionary(IspellDict *Conf, const char *filename);
-extern void NISortDictionary(IspellDict *Conf);
-extern void NISortAffixes(IspellDict *Conf);
-extern void NIFinishBuild(IspellDict *Conf);
+extern void NIStartBuild(IspellDictBuild *ConfBuild);
+extern void NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename);
+extern void NIImportDictionary(IspellDictBuild *ConfBuild,
+ const char *filename);
+extern void NISortDictionary(IspellDictBuild *ConfBuild);
+extern void NISortAffixes(IspellDictBuild *ConfBuild);
+extern void NICopyData(IspellDictBuild *ConfBuild);
+extern void NIFinishBuild(IspellDictBuild *ConfBuild);
#endif