From owner-svn-src-user@FreeBSD.ORG Tue Feb 14 14:55:28 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 79AB51065670; Tue, 14 Feb 2012 14:55:28 +0000 (UTC) (envelope-from gabor@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 679458FC16; Tue, 14 Feb 2012 14:55:28 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q1EEtRF4003459; Tue, 14 Feb 2012 14:55:28 GMT (envelope-from gabor@svn.freebsd.org) Received: (from gabor@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q1EEtR5K003457; Tue, 14 Feb 2012 14:55:27 GMT (envelope-from gabor@svn.freebsd.org) Message-Id: <201202141455.q1EEtR5K003457@svn.freebsd.org> From: Gabor Kovesdan Date: Tue, 14 Feb 2012 14:55:27 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r231683 - user/gabor/tre-integration/contrib/tre/lib X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 14 Feb 2012 14:55:28 -0000 Author: gabor Date: Tue Feb 14 14:55:27 2012 New Revision: 231683 URL: http://svn.freebsd.org/changeset/base/231683 Log: - Separate different matching logics for better later reuse and readability Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/regexec.c Tue Feb 14 14:24:37 2012 (r231682) +++ user/gabor/tre-integration/contrib/tre/lib/regexec.c Tue Feb 14 14:55:27 2012 (r231683) @@ -62,6 +62,14 @@ __weak_reference(tre_regawexec, regawexe __weak_reference(tre_regawnexec, regawnexec); #endif +static int tre_match_heur(const tre_tnfa_t *tnfa, heur_t *heur, + const void *string, size_t len, + tre_str_type_t type, size_t nmatch, + regmatch_t pmatch[], int eflags); +static int tre_match_nfa(const tre_tnfa_t *tnfa, const void *string, + size_t len, tre_str_type_t type, size_t nmatch, + regmatch_t pmatch[], int eflags); + /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match endpoint values. */ void @@ -154,16 +162,27 @@ tre_match(const tre_tnfa_t *tnfa, const tre_str_type_t type, size_t nmatch, regmatch_t pmatch[], int eflags, fastmatch_t *shortcut, heur_t *heur) { - reg_errcode_t status; - int *tags = NULL, eo; - /* Check if we can cheat with a faster algorithm. */ if ((shortcut != NULL) && (type != STR_USER)) - { - DPRINT("tre_match: using tre_match_fast() instead of the full NFA\n"); - return tre_match_fast(shortcut, string, len, type, nmatch, - pmatch, eflags); - } + tre_match_fast(shortcut, string, len, type, nmatch, + pmatch, eflags); + else if ((heur != NULL) && (type != STR_USER)) + return tre_match_heur(tnfa, heur, string, len, type, nmatch, + pmatch, eflags); + + return tre_match_nfa(tnfa, string, len, type, nmatch, + pmatch, eflags); +} + +static int +tre_match_heur(const tre_tnfa_t *tnfa, heur_t *heur, const void *string, + size_t len, tre_str_type_t type, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + int ret; + size_t st = 0, i = 1, n; + const char *data_byte = string; + const tre_char_t *data_wide = string; #define FIX_OFFSETS(adj) \ if (ret == REG_NOMATCH) \ @@ -183,144 +202,145 @@ tre_match(const tre_tnfa_t *tnfa, const string = (type == STR_WIDE) ? (void *)&data_wide[off] : \ (void *)&data_byte[off]; - /* Check if we have a heuristic to speed up the search. */ - if ((heur != NULL) && (type != STR_USER)) + /* + * REG_NEWLINE: looking for the longest fragment and then + * isolate the line and run the automaton. + */ + if (heur->type == HEUR_LONGEST) { - int ret; - size_t st = 0, i = 1, n; - const char *data_byte = string; - const tre_char_t *data_wide = string; - - /* - * REG_NEWLINE: looking for the longest fragment and then - * isolate the line and run the automaton. - */ - if (heur->type == HEUR_LONGEST) + while (st < len) { - while (st < len) - { - size_t eo, so; + size_t eo, so; - SEEK_TO(st); + SEEK_TO(st); - /* Match for heuristic */ - ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch, - pmatch, eflags); - if (ret != REG_OK) - return ret; - - /* - * If we do not know the length of the possibly matching part, - * look for newlines. - */ - if (heur->tlen == -1) - { - for (so = st + pmatch[0].rm_so - 1; ; so--) - { - if ((type == STR_WIDE) ? (data_wide[so] == TRE_CHAR('\n')) : - (data_byte[so] == '\n')) + /* Match for heuristic */ + ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch, + pmatch, eflags); + if (ret != REG_OK) + return ret; + + /* + * If we do not know the length of the possibly matching part, + * look for newlines. + */ + if (heur->tlen == -1) + { + for (so = st + pmatch[0].rm_so - 1; ; so--) + { + if ((type == STR_WIDE) ? (data_wide[so] == TRE_CHAR('\n')) : + (data_byte[so] == '\n')) break; if (so == 0) break; - } + } - for (eo = st + pmatch[0].rm_eo; st + eo < len; eo++) - { - if ((type == STR_WIDE) ? (data_wide[eo] == TRE_CHAR('\n')) : - (data_byte[eo] == '\n')) + for (eo = st + pmatch[0].rm_eo; st + eo < len; eo++) + { + if ((type == STR_WIDE) ? (data_wide[eo] == TRE_CHAR('\n')) : + (data_byte[eo] == '\n')) break; - } - } + } + } - /* - * If we know the possible match length, just check the narrowest - * context that we can, without looking for explicit newlines. - */ - else - { - size_t rem = heur->tlen - (pmatch[0].rm_eo - pmatch[0].rm_so); - so = st + pmatch[0].rm_so <= rem ? 0 : st + pmatch[0].rm_so - rem; - eo = st + pmatch[0].rm_eo + rem >= len ? len : st + pmatch[0].rm_eo + rem; - } - - SEEK_TO(so); - ret = tre_match(tnfa, string, eo - so, type, nmatch, pmatch, eflags, NULL, NULL); - FIX_OFFSETS(st = eo); + /* + * If we know the possible match length, just check the narrowest + * context that we can, without looking for explicit newlines. + */ + else + { + size_t rem = heur->tlen - (pmatch[0].rm_eo - pmatch[0].rm_so); + + so = st + pmatch[0].rm_so <= rem ? 0 : st + pmatch[0].rm_so - rem; + eo = st + pmatch[0].rm_eo + rem >= len ? len : st + pmatch[0].rm_eo + rem; + } - } - return REG_NOMATCH; + SEEK_TO(so); + ret = tre_match_nfa(tnfa, string, eo - so, type, nmatch, pmatch, eflags); + FIX_OFFSETS(st = eo); } + return REG_NOMATCH; + } - /* - * General case when REG_NEWLINE is not set. Look for prefix, - * intermediate and suffix heuristics is available, to determine - * the context where the automaton will be invoked. The start - * of the context is st and the relative end offset from st is - * stored in n. - */ - else + /* + * General case when REG_NEWLINE is not set. Look for prefix, + * intermediate and suffix heuristics is available, to determine + * the context where the automaton will be invoked. The start + * of the context is st and the relative end offset from st is + * stored in n. + */ + else + { + while (st < len) { - while (st < len) - { - SEEK_TO(st); + SEEK_TO(st); - /* Prefix heuristic */ - ret = tre_match_fast(heur->heurs[0], string, len - st, - type, nmatch, pmatch, eflags); - if (ret != REG_OK) - return ret; - st += pmatch[0].rm_so; - n = pmatch[0].rm_eo - pmatch[0].rm_so; - - /* Intermediate heuristics (if any) */ - while (!(heur->heurs[i] == NULL) && - ((heur->heurs[i + 1] != NULL) || - ((heur->heurs[i + 1] == NULL) && (heur->type == HEUR_PREFIX_ARRAY)))) - { - SEEK_TO(st + n); - if (len <= st + n) - return REG_NOMATCH; - ret = tre_match_fast(heur->heurs[i], string, len - st - n, - type, nmatch, pmatch, eflags); - if (ret != REG_OK) - return ret; - n += pmatch[0].rm_eo; - i++; - } - - /* Suffix heuristic available */ - if ((heur->type == HEUR_ARRAY) && heur->heurs[i] != NULL) - { - SEEK_TO(st + n); - if (len <= st + n) - return REG_NOMATCH; - ret = tre_match_fast(heur->heurs[i], string, len - st - n, - type, nmatch, pmatch, eflags); - if (ret != REG_OK) - return ret; - n += pmatch[0].rm_eo; - - SEEK_TO(st); - ret = tre_match(tnfa, string, n, type, nmatch, pmatch, - eflags, NULL, NULL); - FIX_OFFSETS(st += n); - } - /* Suffix heuristic not available */ - else - { - size_t l = (heur->tlen == -1) ? len - st : heur->tlen; + /* Prefix heuristic */ + ret = tre_match_fast(heur->heurs[0], string, len - st, + type, nmatch, pmatch, eflags); + if (ret != REG_OK) + return ret; + st += pmatch[0].rm_so; + n = pmatch[0].rm_eo - pmatch[0].rm_so; + + /* Intermediate heuristics (if any) */ + while (!(heur->heurs[i] == NULL) && + ((heur->heurs[i + 1] != NULL) || + ((heur->heurs[i + 1] == NULL) && (heur->type == HEUR_PREFIX_ARRAY)))) + { + SEEK_TO(st + n); + if (len <= st + n) + return REG_NOMATCH; + ret = tre_match_fast(heur->heurs[i], string, len - st - n, + type, nmatch, pmatch, eflags); + if (ret != REG_OK) + return ret; + n += pmatch[0].rm_eo; + i++; + } + + /* Suffix heuristic available */ + if ((heur->type == HEUR_ARRAY) && heur->heurs[i] != NULL) + { + SEEK_TO(st + n); + if (len <= st + n) + return REG_NOMATCH; + ret = tre_match_fast(heur->heurs[i], string, len - st - n, + type, nmatch, pmatch, eflags); + if (ret != REG_OK) + return ret; + n += pmatch[0].rm_eo; + + SEEK_TO(st); + ret = tre_match_nfa(tnfa, string, n, type, nmatch, pmatch, + eflags); + FIX_OFFSETS(st += n); + } - if (l > len - st) - return REG_NOMATCH; - SEEK_TO(st); - ret = tre_match(tnfa, string, l, type, nmatch, - pmatch, eflags, NULL, NULL); - FIX_OFFSETS(st += n); - } + /* Suffix heuristic not available */ + else + { + size_t l = (heur->tlen == -1) ? len - st : heur->tlen; + + if (l > len - st) + return REG_NOMATCH; + SEEK_TO(st); + ret = tre_match_nfa(tnfa, string, l, type, nmatch, + pmatch, eflags); + FIX_OFFSETS(st += n); } - return REG_NOMATCH; - } + } + return REG_NOMATCH; } +} + +static int +tre_match_nfa(const tre_tnfa_t *tnfa, const void *string, size_t len, + tre_str_type_t type, size_t nmatch, regmatch_t pmatch[], + int eflags) +{ + reg_errcode_t status; + int eo, *tags = NULL; if (tnfa->num_tags > 0 && nmatch > 0) {