From d79f5b85e36eba026368322757864fc773085c25 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 2 Dec 2024 11:34:17 +0100 Subject: [PATCH v2 2/4] Remove t_isspace() --- contrib/dict_xsyn/dict_xsyn.c | 4 +-- contrib/ltree/ltxtquery_io.c | 2 +- contrib/pg_trgm/trgm.h | 6 ---- contrib/unaccent/unaccent.c | 2 +- src/backend/tsearch/dict_synonym.c | 4 +-- src/backend/tsearch/dict_thesaurus.c | 10 +++--- src/backend/tsearch/spell.c | 42 ++++++++++++------------- src/backend/tsearch/ts_locale.c | 15 --------- src/backend/tsearch/ts_utils.c | 2 +- src/backend/utils/adt/tsquery.c | 10 +++--- src/backend/utils/adt/tsvector_parser.c | 6 ++-- src/include/tsearch/ts_locale.h | 1 - 12 files changed, 41 insertions(+), 63 deletions(-) diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c index 3635ed1df84..f8c0a5bf5c5 100644 --- a/contrib/dict_xsyn/dict_xsyn.c +++ b/contrib/dict_xsyn/dict_xsyn.c @@ -48,14 +48,14 @@ find_word(char *in, char **end) char *start; *end = NULL; - while (*in && t_isspace(in)) + while (*in && isspace((unsigned char) *in)) in += pg_mblen(in); if (!*in || *in == '#') return NULL; start = in; - while (*in && !t_isspace(in)) + while (*in && !isspace((unsigned char) *in)) in += pg_mblen(in); *end = in; diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c index 121fc55e469..7b8fba17ff2 100644 --- a/contrib/ltree/ltxtquery_io.c +++ b/contrib/ltree/ltxtquery_io.c @@ -88,7 +88,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint *lenval = charlen; *flag = 0; } - else if (!t_isspace(state->buf)) + else if (!isspace((unsigned char) *state->buf)) ereturn(state->escontext, ERR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("operand syntax error"))); diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h index afb0adb222b..10827563694 100644 --- a/contrib/pg_trgm/trgm.h +++ b/contrib/pg_trgm/trgm.h @@ -15,7 +15,6 @@ */ #define LPADDING 2 #define RPADDING 1 -#define KEEPONLYALNUM /* * Caution: IGNORECASE macro means that trigrams are case-insensitive. * If this macro is disabled, the ~* and ~~* operators must be removed from @@ -51,13 +50,8 @@ typedef char trgm[3]; *(((char*)(a))+2) = *(((char*)(b))+2); \ } while(0) -#ifdef KEEPONLYALNUM #define ISWORDCHR(c) (t_isalnum(c)) #define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') ) -#else -#define ISWORDCHR(c) (!t_isspace(c)) -#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) ) -#endif #define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) ) #define ISESCAPECHAR(x) (*(x) == '\\') /* Wildcard escape character */ diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c index 0217696aac1..fcc25dc7139 100644 --- a/contrib/unaccent/unaccent.c +++ b/contrib/unaccent/unaccent.c @@ -155,7 +155,7 @@ initTrie(const char *filename) { ptrlen = pg_mblen(ptr); /* ignore whitespace, but end src or trg */ - if (t_isspace(ptr)) + if (isspace((unsigned char) *ptr)) { if (state == 1) state = 2; diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c index 77cd511ee51..77c0d7a3593 100644 --- a/src/backend/tsearch/dict_synonym.c +++ b/src/backend/tsearch/dict_synonym.c @@ -47,7 +47,7 @@ findwrd(char *in, char **end, uint16 *flags) char *lastchar; /* Skip leading spaces */ - while (*in && t_isspace(in)) + while (*in && isspace((unsigned char) *in)) in += pg_mblen(in); /* Return NULL on empty lines */ @@ -60,7 +60,7 @@ findwrd(char *in, char **end, uint16 *flags) lastchar = start = in; /* Find end of word */ - while (*in && !t_isspace(in)) + while (*in && !isspace((unsigned char) *in)) { lastchar = in; in += pg_mblen(in); diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c index 6b159f9f569..f1449b5607f 100644 --- a/src/backend/tsearch/dict_thesaurus.c +++ b/src/backend/tsearch/dict_thesaurus.c @@ -190,7 +190,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) ptr = line; /* is it a comment? */ - while (*ptr && t_isspace(ptr)) + while (*ptr && isspace((unsigned char) *ptr)) ptr += pg_mblen(ptr); if (t_iseq(ptr, '#') || *ptr == '\0' || @@ -212,7 +212,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) errmsg("unexpected delimiter"))); state = TR_WAITSUBS; } - else if (!t_isspace(ptr)) + else if (!isspace((unsigned char) *ptr)) { beginwrd = ptr; state = TR_INLEX; @@ -225,7 +225,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITSUBS; } - else if (t_isspace(ptr)) + else if (isspace((unsigned char) *ptr)) { newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITLEX; @@ -245,7 +245,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) state = TR_INSUBS; beginwrd = ptr + pg_mblen(ptr); } - else if (!t_isspace(ptr)) + else if (!isspace((unsigned char) *ptr)) { useasis = false; beginwrd = ptr; @@ -254,7 +254,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) } else if (state == TR_INSUBS) { - if (t_isspace(ptr)) + if (isspace((unsigned char) *ptr)) { if (ptr == beginwrd) ereport(ERROR, diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index 7800f794e84..b41afbd7322 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -408,7 +408,7 @@ getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag) *sflagset))); met_comma = true; } - else if (!t_isspace(*sflagset)) + else if (!isspace((unsigned char) **sflagset)) { ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), @@ -542,7 +542,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename) while (*s) { /* we allow only single encoded flags for faster works */ - if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s)) + if (pg_mblen(s) == 1 && t_isprint(s) && !isspace((unsigned char) *s)) s++; else { @@ -558,7 +558,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename) s = line; while (*s) { - if (t_isspace(s)) + if (isspace((unsigned char) *s)) { *s = '\0'; break; @@ -799,7 +799,7 @@ get_nextfield(char **str, char *next) { if (t_iseq(*str, '#')) return false; - else if (!t_isspace(*str)) + else if (!isspace((unsigned char) **str)) { int clen = pg_mblen(*str); @@ -814,7 +814,7 @@ get_nextfield(char **str, char *next) } else /* state == PAE_INMASK */ { - if (t_isspace(*str)) + if (isspace((unsigned char) **str)) { *next = '\0'; return true; @@ -925,7 +925,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) { if (t_iseq(str, '#')) return false; - else if (!t_isspace(str)) + else if (!isspace((unsigned char) *str)) { COPYCHAR(pmask, str); pmask += pg_mblen(str); @@ -939,7 +939,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) *pmask = '\0'; state = PAE_WAIT_FIND; } - else if (!t_isspace(str)) + else if (!isspace((unsigned char) *str)) { COPYCHAR(pmask, str); pmask += pg_mblen(str); @@ -957,7 +957,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) prepl += pg_mblen(str); state = PAE_INREPL; } - else if (!t_isspace(str)) + else if (!isspace((unsigned char) *str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -974,7 +974,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) COPYCHAR(pfind, str); pfind += pg_mblen(str); } - else if (!t_isspace(str)) + else if (!isspace((unsigned char) *str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -991,7 +991,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) prepl += pg_mblen(str); state = PAE_INREPL; } - else if (!t_isspace(str)) + else if (!isspace((unsigned char) *str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -1008,7 +1008,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) COPYCHAR(prepl, str); prepl += pg_mblen(str); } - else if (!t_isspace(str)) + else if (!isspace((unsigned char) *str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -1070,7 +1070,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) char *sflag; int clen; - while (*s && t_isspace(s)) + while (*s && isspace((unsigned char) *s)) s += pg_mblen(s); if (!*s) @@ -1080,7 +1080,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) /* Get flag without \n */ sflag = sbuf; - while (*s && !t_isspace(s) && *s != '\n') + while (*s && !isspace((unsigned char) *s) && *s != '\n') { clen = pg_mblen(s); COPYCHAR(sflag, s); @@ -1225,7 +1225,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) while ((recoded = tsearch_readline(&trst)) != NULL) { - if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) + if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#')) { pfree(recoded); continue; @@ -1262,7 +1262,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) { char *s = recoded + strlen("FLAG"); - while (*s && t_isspace(s)) + while (*s && isspace((unsigned char) *s)) s += pg_mblen(s); if (*s) @@ -1298,7 +1298,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) { int fields_read; - if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) + if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#')) goto nextline; fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask); @@ -1461,9 +1461,9 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s = findchar2(recoded, 'l', 'L'); if (s) { - while (*s && !t_isspace(s)) + while (*s && !isspace((unsigned char) *s)) s += pg_mblen(s); - while (*s && t_isspace(s)) + while (*s && isspace((unsigned char) *s)) s += pg_mblen(s); if (*s && pg_mblen(s) == 1) @@ -1494,7 +1494,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s = recoded + 4; /* we need non-lowercased string */ flagflags = 0; - while (*s && t_isspace(s)) + while (*s && isspace((unsigned char) *s)) s += pg_mblen(s); if (*s == '*') @@ -1523,7 +1523,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s++; if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' || - t_isspace(s)) + isspace((unsigned char) *s)) { oldformat = true; goto nextline; @@ -1750,7 +1750,7 @@ NISortDictionary(IspellDict *Conf) (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", Conf->Spell[i]->p.flag))); - if (*end != '\0' && !isdigit((unsigned char) *end) && !t_isspace(end)) + if (*end != '\0' && !isdigit((unsigned char) *end) && !isspace((unsigned char) *end)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index 7247b8cbe8a..70a39f48814 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -31,21 +31,6 @@ static void tsearch_readline_callback(void *arg); */ #define WC_BUF_LEN 3 -int -t_isspace(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isspace(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswspace((wint_t) character[0]); -} - int t_isalpha(const char *ptr) { diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c index 81967d29e9a..f20e61d4c8c 100644 --- a/src/backend/tsearch/ts_utils.c +++ b/src/backend/tsearch/ts_utils.c @@ -88,7 +88,7 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *)) char *pbuf = line; /* Trim trailing space */ - while (*pbuf && !t_isspace(pbuf)) + while (*pbuf && !isspace((unsigned char) *pbuf)) pbuf += pg_mblen(pbuf); *pbuf = '\0'; diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 219ab543f62..0366c2a2acd 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -274,7 +274,7 @@ parse_or_operator(TSQueryParserState pstate) * So we still treat OR literal as operation with possibly incorrect * operand and will not search it as lexeme */ - if (!t_isspace(ptr)) + if (!isspace((unsigned char) *ptr)) break; } @@ -315,7 +315,7 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator, /* generic syntax error message is fine */ return PT_ERR; } - else if (!t_isspace(state->buf)) + else if (!isspace((unsigned char) *state->buf)) { /* * We rely on the tsvector parser to parse the value for @@ -383,7 +383,7 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator, { return (state->count) ? PT_ERR : PT_END; } - else if (!t_isspace(state->buf)) + else if (!isspace((unsigned char) *state->buf)) { return PT_ERR; } @@ -444,7 +444,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, state->state = WAITOPERAND; continue; } - else if (!t_isspace(state->buf)) + else if (!isspace((unsigned char) *state->buf)) { /* * We rely on the tsvector parser to parse the value for @@ -492,7 +492,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, state->buf++; continue; } - else if (!t_isspace(state->buf)) + else if (!isspace((unsigned char) *state->buf)) { /* insert implicit AND between operands */ state->state = WAITOPERAND; diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c index 9e33de0bde7..750a1e8e8d9 100644 --- a/src/backend/utils/adt/tsvector_parser.c +++ b/src/backend/utils/adt/tsvector_parser.c @@ -206,7 +206,7 @@ gettoken_tsvector(TSVectorParseState state, else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) || (state->is_web && t_iseq(state->prsbuf, '"'))) PRSSYNTAXERROR; - else if (!t_isspace(state->prsbuf)) + else if (!isspace((unsigned char) *state->prsbuf)) { COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); @@ -236,7 +236,7 @@ gettoken_tsvector(TSVectorParseState state, statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } - else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || + else if (isspace((unsigned char) *state->prsbuf) || *(state->prsbuf) == '\0' || (state->oprisdelim && ISOPERATOR(state->prsbuf)) || (state->is_web && t_iseq(state->prsbuf, '"'))) { @@ -372,7 +372,7 @@ gettoken_tsvector(TSVectorParseState state, PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 0); } - else if (t_isspace(state->prsbuf) || + else if (isspace((unsigned char) *state->prsbuf) || *(state->prsbuf) == '\0') RETURN_TOKEN; else if (!isdigit((unsigned char) *state->prsbuf)) diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h index 8ef380791fe..9606bb30983 100644 --- a/src/include/tsearch/ts_locale.h +++ b/src/include/tsearch/ts_locale.h @@ -39,7 +39,6 @@ typedef struct #define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s)) -extern int t_isspace(const char *ptr); extern int t_isalpha(const char *ptr); extern int t_isalnum(const char *ptr); extern int t_isprint(const char *ptr); -- 2.47.1