*** /tmp/DQoMjJ_regexp.c 2014-01-28 19:59:37.470271459 +0100 --- src/backend/utils/adt/regexp.c 2014-01-28 19:44:47.298288383 +0100 *************** *** 113,118 **** --- 113,119 ---- bool ignore_degenerate); static void cleanup_regexp_matches(regexp_matches_ctx *matchctx); static ArrayType *build_regexp_matches_result(regexp_matches_ctx *matchctx); + static ArrayType *build_regexp_matches_positions_result(regexp_matches_ctx *matchctx); static Datum build_regexp_split_result(regexp_matches_ctx *splitctx); *************** *** 833,838 **** --- 834,898 ---- return regexp_matches(fcinfo); } + + /* + * regexp_matches_positions() + * Return a table of matched locations of a pattern within a string. + */ + Datum + regexp_matches_positions(PG_FUNCTION_ARGS) + { + FuncCallContext *funcctx; + regexp_matches_ctx *matchctx; + + if (SRF_IS_FIRSTCALL()) + { + text *pattern = PG_GETARG_TEXT_PP(1); + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2); + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* be sure to copy the input string into the multi-call ctx */ + matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern, + flags, + PG_GET_COLLATION(), + false, true, false); + + /* Pre-create workspace that build_regexp_matches_positions_result needs */ + matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns); + matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns); + + MemoryContextSwitchTo(oldcontext); + funcctx->user_fctx = (void *) matchctx; + } + + funcctx = SRF_PERCALL_SETUP(); + matchctx = (regexp_matches_ctx *) funcctx->user_fctx; + + if (matchctx->next_match < matchctx->nmatches) + { + ArrayType *result_ary; + + result_ary = build_regexp_matches_positions_result(matchctx); + matchctx->next_match++; + SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary)); + } + + /* release space in multi-call ctx to avoid intraquery memory leak */ + cleanup_regexp_matches(matchctx); + + SRF_RETURN_DONE(funcctx); + } + + /* This is separate to keep the opr_sanity regression test from complaining */ + Datum + regexp_matches_positions_no_flags(PG_FUNCTION_ARGS) + { + return regexp_matches_positions(fcinfo); + } + /* * setup_regexp_matches --- do the initial matching for regexp_matches() * or regexp_split() *************** *** 1035,1040 **** --- 1095,1140 ---- } /* + * build_regexp_matches_positions_result - build output array for current match + */ + static ArrayType * + build_regexp_matches_positions_result(regexp_matches_ctx *matchctx) + { + Datum *elems = matchctx->elems; + bool *nulls = matchctx->nulls; + int dims[1]; + int lbs[1]; + int loc; + int i; + + /* Extract matching substrings from the original string */ + loc = matchctx->next_match * matchctx->npatterns * 2; + for (i = 0; i < matchctx->npatterns; i++) + { + int so = matchctx->match_locs[loc++]; + int eo = matchctx->match_locs[loc++]; + + if (so < 0 || eo < 0) + { + elems[i] = (Datum) 0; + nulls[i] = true; + } + else + { + elems[i] = Int32GetDatum(so)+1; + nulls[i] = false; + } + } + + /* And form an array */ + dims[0] = matchctx->npatterns; + lbs[0] = 1; + /* XXX: this hardcodes assumptions about the int4 type */ + return construct_md_array(elems, nulls, 1, dims, lbs, + INT4OID, 4, true, 'i'); + } + + /* * regexp_split_to_table() * Split the string at matches of the pattern, returning the * split-out substrings as a table. *** /tmp/xPfd4G_pg_proc.h 2014-01-28 19:59:37.478271459 +0100 --- src/include/catalog/pg_proc.h 2014-01-28 19:44:47.298288383 +0100 *************** *** 1899,1904 **** --- 1899,1908 ---- DESCR("find all match groups for regexp"); DATA(insert OID = 2764 ( regexp_matches PGNSP PGUID 12 1 10 0 0 f f f f t t i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ regexp_matches _null_ _null_ _null_ )); DESCR("find all match groups for regexp"); + DATA(insert OID = 7769 ( regexp_matches_positions PGNSP PGUID 12 1 1 0 0 f f f f t t i 2 0 1009 "25 25" _null_ _null_ _null_ _null_ regexp_matches_positions_no_flags _null_ _null_ _null_ )); + DESCR("find all match positions for regexp"); + DATA(insert OID = 7770 ( regexp_matches_positions PGNSP PGUID 12 1 10 0 0 f f f f t t i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ regexp_matches_positions _null_ _null_ _null_ )); + DESCR("find all match positions for regexp"); DATA(insert OID = 2088 ( split_part PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 25 "25 25 23" _null_ _null_ _null_ _null_ split_text _null_ _null_ _null_ )); DESCR("split string by field_sep and return field_num"); DATA(insert OID = 2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 f f f f t t i 2 0 25 "25 25" _null_ _null_ _null_ _null_ regexp_split_to_table_no_flags _null_ _null_ _null_ )); *** /tmp/lXOnOH_builtins.h 2014-01-28 19:59:37.490271458 +0100 --- src/include/utils/builtins.h 2014-01-28 19:44:47.302288383 +0100 *************** *** 587,592 **** --- 587,594 ---- extern Datum similar_escape(PG_FUNCTION_ARGS); extern Datum regexp_matches(PG_FUNCTION_ARGS); extern Datum regexp_matches_no_flags(PG_FUNCTION_ARGS); + extern Datum regexp_matches_positions(PG_FUNCTION_ARGS); + extern Datum regexp_matches_no_flags_positions(PG_FUNCTION_ARGS); extern Datum regexp_split_to_table(PG_FUNCTION_ARGS); extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS); extern Datum regexp_split_to_array(PG_FUNCTION_ARGS); *** /tmp/LiDRpL_strings.out 2014-01-28 19:59:37.498271458 +0100 --- src/test/regress/expected/strings.out 2014-01-28 19:44:47.302288383 +0100 *************** *** 505,510 **** --- 505,517 ---- ERROR: invalid regular expression: parentheses () not balanced SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$); ERROR: invalid regular expression: invalid repetition count(s) + -- return all match positions from regexp + SELECT regexp_matches_positions('foobarbequebaz', $re$(bar)(beque)$re$); + regexp_matches_positions + -------------------------- + {4,7} + (1 row) + -- split string on regexp SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', $re$\s+$re$) AS foo; foo | length *** /tmp/h7moDJ_strings.sql 2014-01-28 19:59:37.506271458 +0100 --- src/test/regress/sql/strings.sql 2014-01-28 19:44:47.302288383 +0100 *************** *** 170,175 **** --- 170,178 ---- SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$); SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$); + -- return all match positions from regexp + SELECT regexp_matches_positions('foobarbequebaz', $re$(bar)(beque)$re$); + -- split string on regexp SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', $re$\s+$re$) AS foo; SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', $re$\s+$re$);