From: Pekka Riikonen Date: Tue, 1 Jan 2008 17:45:40 +0000 (+0000) Subject: Added SILC_REGEX_NOTBOL and SILC_REGEX_NOTEOL flags. X-Git-Tag: 1.2.beta1~42 X-Git-Url: http://git.silcnet.org/gitweb/?p=crypto.git;a=commitdiff_plain;h=89429cd0dc7e29b8284d5b6066f94719509ec885 Added SILC_REGEX_NOTBOL and SILC_REGEX_NOTEOL flags. --- diff --git a/CHANGES.RUNTIME b/CHANGES.RUNTIME index f97fb9dd..6901253d 100644 --- a/CHANGES.RUNTIME +++ b/CHANGES.RUNTIME @@ -1,3 +1,8 @@ +Tue Jan 1 19:40:15 EET 2008 Pekka Riikonen + + * Added SILC_REGEX_NOTBOL and SILC_REGEX_NOTEOL flags to + lib/silcutil/silcregex.[ch]. + Tue Jan 1 18:00:47 EET 2008 Pekka Riikonen * Added silc_regex and silc_regex_buffer, routines that provide diff --git a/lib/silcutil/silcregex.c b/lib/silcutil/silcregex.c index e422ded5..1412f20d 100644 --- a/lib/silcutil/silcregex.c +++ b/lib/silcutil/silcregex.c @@ -25,8 +25,8 @@ Emacs-specific code and syntax table code is almost directly borrowed from GNU regexp. - The SILC Regex API by Pekka Riikonen, under the same license as the original - code. + The SILC Regex API and modifications by Pekka Riikonen, under the same + license as the original code. */ @@ -34,7 +34,7 @@ /* Modified for use in SILC Runtime Toolkit. I think we have disabled many features we could use, for the sake of simple API, which we may want to - extend later. */ + extend later. But, we've added RE_NOTBOL and RE_NOTEOL. */ #define RE_NREGS 128 /* number of registers available */ @@ -47,6 +47,8 @@ #define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */ #define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */ #define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */ +#define RE_NOTBOL 256 /* bol fails to match */ +#define RE_NOTEOL 512 /* eol fails to match */ /* definitions for some common regexp styles */ #define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS) @@ -74,22 +76,22 @@ SilcResult re_compile_pattern(char *regex, int regex_size, SilcRegex compiled); translation table, or NULL if it is not used. */ int re_match(SilcRegex compiled, char *string, int size, int pos, - regexp_registers_t regs); + regexp_registers_t regs, unsigned int flags); /* This tries to match the regexp against the string. This returns the length of the matched portion, or -1 if the pattern could not be matched and -2 if an error (such as failure stack overflow) is encountered. */ int re_match_2(SilcRegex compiled, char *string1, int size1, - char *string2, int size2, int pos, regexp_registers_t regs, - int mstop); + char *string2, int size2, int pos, regexp_registers_t regs, + int mstop, unsigned int flags); /* This tries to match the regexp to the concatenation of string1 and string2. This returns the length of the matched portion, or -1 if the pattern could not be matched and -2 if an error (such as failure stack overflow) is encountered. */ int re_search(SilcRegex compiled, char *string, int size, int startpos, - int range, regexp_registers_t regs); + int range, regexp_registers_t regs, unsigned int flags); /* This rearches for a substring matching the regexp. This returns the first index at which a match is found. range specifies at how many positions to try matching; positive values indicate searching forwards, and negative @@ -99,7 +101,7 @@ int re_search(SilcRegex compiled, char *string, int size, int startpos, int re_search_2(SilcRegex compiled, char *string1, int size1, char *string2, int size2, int startpos, int range, - regexp_registers_t regs, int mstop); + regexp_registers_t regs, int mstop, unsigned int flags); /* This is like re_search, but search from the concatenation of string1 and string2. */ @@ -973,11 +975,12 @@ SilcRegex bufp; #define INITIAL_FAILURES 128 /* initial # failure points to allocate */ #define MAX_FAILURES 4100 /* max # of failure points before failing */ -int re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop) +int re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop, flags) SilcRegex bufp; char *string1, *string2; int size1, size2, pos, mstop; regexp_registers_t regs; +unsigned int flags; { struct failure_point { char *text, *partend, *code; } *failure_stack_start, *failure_sp, *failure_stack_end, @@ -1101,15 +1104,21 @@ regexp_registers_t regs; silc_free((char *)failure_stack_start); return match_end - pos; case Cbol: - if (text == string1 || text[-1] == '\n') /* text[-1] always valid */ + if (text == string1 || text[-1] == '\n') { /* text[-1] always valid */ + if (flags & RE_NOTBOL) + goto fail; break; + } goto fail; case Ceol: if (text == string2 + size2 || (text == string1 + size1 ? (size2 == 0 || *string2 == '\n') : - *text == '\n')) + *text == '\n')) { + if (flags & RE_NOTEOL) + goto fail; break; + } goto fail; case Cset: NEXTCHAR(ch); @@ -1436,21 +1445,24 @@ regexp_registers_t regs; #undef NEXTCHAR #undef PUSH_FAILURE -int re_match(bufp, string, size, pos, regs) +int re_match(bufp, string, size, pos, regs, flags) SilcRegex bufp; char *string; int size, pos; regexp_registers_t regs; +unsigned int flags; { - return re_match_2(bufp, string, size, (char *)NULL, 0, pos, regs, size); + return re_match_2(bufp, string, size, (char *)NULL, 0, pos, regs, size, + flags); } int re_search_2(bufp, string1, size1, string2, size2, pos, range, regs, - mstop) + mstop, flags) SilcRegex bufp; char *string1, *string2; int size1, size2, pos, range, mstop; regexp_registers_t regs; +unsigned int flags; { char *fastmap, *translate, *text, *partstart, *partend; int dir, ret; @@ -1550,7 +1562,8 @@ regexp_registers_t regs; continue; } assert(pos >= 0 && pos <= size1 + size2); - ret = re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop); + ret = re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop, + flags); if (ret >= 0) return pos; if (ret == -2) @@ -1559,14 +1572,15 @@ regexp_registers_t regs; return -1; } -int re_search(bufp, string, size, startpos, range, regs) +int re_search(bufp, string, size, startpos, range, regs, flags) SilcRegex bufp; char *string; int size, startpos, range; regexp_registers_t regs; +unsigned int flags; { return re_search_2(bufp, string, size, (char *)NULL, 0, - startpos, range, regs, size); + startpos, range, regs, size, flags); } /****************************** SILC Regex API ******************************/ @@ -1605,6 +1619,7 @@ SilcBool silc_regex_match(SilcRegex regexp, const char *string, SilcRegexMatch match, SilcRegexFlags flags) { struct re_registers regs; + unsigned int f = 0; int ret, i; if (!regexp || !string) { @@ -1621,9 +1636,15 @@ SilcBool silc_regex_match(SilcRegex regexp, const char *string, if (num_match > RE_NREGS) num_match = RE_NREGS; + /* Set flags */ + if (flags & SILC_REGEX_NOTBOL) + f |= RE_NOTBOL; + if (flags & SILC_REGEX_NOTEOL) + f |= RE_NOTEOL; + /* Search */ ret = re_search(regexp, (char *)string, string_len, 0, string_len, - num_match ? ®s : NULL); + num_match ? ®s : NULL, f); if (ret < 0) { if (ret == -2) silc_set_errno(SILC_ERR); diff --git a/lib/silcutil/silcregex.h b/lib/silcutil/silcregex.h index 1527a2c5..2adddb37 100644 --- a/lib/silcutil/silcregex.h +++ b/lib/silcutil/silcregex.h @@ -112,7 +112,17 @@ typedef struct SilcRegexMatchObject { * SOURCE */ typedef enum { - SILC_REGEX_FLAG_DEFAULT = 0, + SILC_REGEX_DEFAULT = 0x00000000, + + /* The following flags can be used with silc_regex_match */ + + /* The beginning-of-line (^) always fails to match. This can be useful + when beginning of a string should not be interpreted as the beginning + of line. */ + SILC_REGEX_NOTBOL = 0x00010000, + + /* The end-of-line ($) always fails to match. */ + SILC_REGEX_NOTEOL = 0x00020000, } SilcRegexFlags; /***/ diff --git a/lib/silcutil/tests/test_silcregex.c b/lib/silcutil/tests/test_silcregex.c index ba98bbe0..87b7a5f4 100644 --- a/lib/silcutil/tests/test_silcregex.c +++ b/lib/silcutil/tests/test_silcregex.c @@ -24,7 +24,7 @@ int main(int argc, char **argv) string = "Hello World"; SILC_LOG_DEBUG(("Match %s", string)); - if (!silc_regex_match(®, string, num_match, match, 0)) + if (!silc_regex_match(®, string, strlen(string), num_match, match, 0)) goto err; for (i = 0; i < num_match; i++) { if (match[i].start != -1) { @@ -46,17 +46,17 @@ int main(int argc, char **argv) string = "foo"; SILC_LOG_DEBUG(("Match %s", string)); - if (!silc_regex_match(®, string, 0, NULL, 0)) + if (!silc_regex_match(®, string, strlen(string), 0, NULL, 0)) goto err; string = "foo20"; SILC_LOG_DEBUG(("Match %s", string)); - if (!silc_regex_match(®, string, 0, NULL, 0)) + if (!silc_regex_match(®, string, strlen(string), 0, NULL, 0)) goto err; string = "foo20, bar, foo100, foo"; SILC_LOG_DEBUG(("Match all substrings in %s", string)); - while (silc_regex_match(®, string, 1, match, 0)) { + while (silc_regex_match(®, string, strlen(string), 1, match, 0)) { SILC_LOG_DEBUG(("Match start %d", match[0].start)); sub = silc_memdup(string + match[0].start, match[0].end - match[0].start); SILC_LOG_DEBUG(("Match substring '%s'", sub)); @@ -66,7 +66,7 @@ int main(int argc, char **argv) string = "foo20, bar, foo100, Foo, foo0"; SILC_LOG_DEBUG(("Match all substrings at once in %s", string)); - if (!silc_regex_match(®, string, num_match, match, 0)) + if (!silc_regex_match(®, string, strlen(string), num_match, match, 0)) goto err; for (i = 0; i < num_match; i++) { @@ -88,7 +88,7 @@ int main(int argc, char **argv) string = "http://silcnet.org:443/foobar/pelle.html"; SILC_LOG_DEBUG(("Parse URI")); - if (!silc_regex_match(®, string, num_match, match, 0)) + if (!silc_regex_match(®, string, strlen(string), num_match, match, 0)) goto err; for (i = 0; i < num_match; i++) { @@ -103,7 +103,7 @@ int main(int argc, char **argv) string = "http://silcnet.org/"; SILC_LOG_DEBUG(("Parse URI")); - if (!silc_regex_match(®, string, num_match, match, 0)) + if (!silc_regex_match(®, string, strlen(string), num_match, match, 0)) goto err; for (i = 0; i < num_match; i++) { @@ -125,7 +125,7 @@ int main(int argc, char **argv) string = "ab"; SILC_LOG_DEBUG(("Match all substrings at once in %s", string)); - if (!silc_regex_match(®, string, num_match, match, 0)) + if (!silc_regex_match(®, string, strlen(string), num_match, match, 0)) goto err; for (i = 0; i < num_match; i++) { @@ -140,6 +140,38 @@ int main(int argc, char **argv) silc_regex_free(®); + regex = "^a"; + SILC_LOG_DEBUG(("Regex %s", regex)); + if (!silc_regex_compile(®, regex, 0)) + goto err; + + string = "a"; + SILC_LOG_DEBUG(("Test NOTBOL flag", string)); + if (silc_regex_match(®, string, strlen(string), 0, NULL, + SILC_REGEX_NOTBOL)) + goto err; + if (silc_errno != SILC_ERR_NOT_FOUND) + goto err; + SILC_LOG_DEBUG(("Did not match (OK)")); + + silc_regex_free(®); + + regex = "a$"; + SILC_LOG_DEBUG(("Regex %s", regex)); + if (!silc_regex_compile(®, regex, 0)) + goto err; + + string = "a"; + SILC_LOG_DEBUG(("Test NOTEOL flag", string)); + if (silc_regex_match(®, string, strlen(string), 0, NULL, + SILC_REGEX_NOTEOL)) + goto err; + if (silc_errno != SILC_ERR_NOT_FOUND) + goto err; + SILC_LOG_DEBUG(("Did not match (OK)")); + + silc_regex_free(®); + success = TRUE; err: