X-Git-Url: http://git.silcnet.org/gitweb/?p=silc.git;a=blobdiff_plain;f=lib%2Fsilcutil%2Fsilcregex.h;h=4047088f8a6767ac10899b2ed6c7f4522c1b8aae;hp=7fac2ac26a27b1b42ba12aaecc2665105591f6e1;hb=10c399ae1a586f5c82b96da137bd88f8e015b659;hpb=f449c0a95327fd4e60a5b9d3c9d50fae0f8d835a diff --git a/lib/silcutil/silcregex.h b/lib/silcutil/silcregex.h index 7fac2ac2..4047088f 100644 --- a/lib/silcutil/silcregex.h +++ b/lib/silcutil/silcregex.h @@ -1,25 +1,19 @@ /* - regexpr.h + silcregex.h - Author: Tatu Ylonen + Author: Pekka Riikonen - Copyright (c) 1991 Tatu Ylonen, Espoo, Finland + Copyright (C) 2007 - 2008 Pekka Riikonen - Permission to use, copy, modify, distribute, and sell this software - and its documentation is hereby granted without fee, provided that the - above copyright notice appears in all source code copies, the name of - Tatu Ylonen is not used to advertise products containing this software - or a derivation thereof, and all modified versions are clearly marked - as such. + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. - This software is provided "as is" without express or implied warranty. - - Created: Thu Sep 26 17:15:36 1991 ylo - Last modified: Fri Jan 3 12:05:45 1992 ylo - - The SILC Regex API by Pekka Riikonen, under the same license as the original - code. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. */ @@ -30,6 +24,57 @@ * SILC regular expression interface provides Unix and POSIX compliant * regular expression compilation and matching. * + * The interface also provides many convenience functions to make the use + * of regular expressions easier. Especially the silc_regex allows very + * simple way to match strings against regular expressions and get the + * exact match or matches as a return. The silc_subst provides simple and + * familiar way to match and substitute strings (Sed syntax). + * + * The regex syntax follows POSIX regex syntax: + * + * Expressions: + * ^ Match start of line/string + * '^a' matches 'ab' but not 'ba' + * $ Match end of line/string + * 'a$' matches 'ba' but not 'ab' + * . Match any single character (except new line (\n)) + * '.a' matches 'ba' but not 'a' + * + Preceding item is matched one or more times + * 'a+b' matches 'aaab' but not 'b' + * * Preceding item is matched zero or more times + * 'a*b' matches 'ab', 'aab' and 'b' + * ? Preceding item is matched zero or one time + * 'ca?b' matches 'cb' and 'cab' but not 'caab' + * | Joins two expressions and matches either of them (OR) + * 'foo|bar' matches 'foo' or 'bar' + * {n} Preceding item is matched exactly n times (n can be 0-255) + * 'a{2}' matches 'aa' but not 'aaa' + * {n,} Preceding item is matched n or more times + * 'a{2,} matches 'aa' and 'aaaa' but not 'a' + * {n,m} Preceding item is matched at least n times and at most m times + * 'a{2,4}' matches 'aa', 'aaa' and 'aaaa' but not 'aaaaa' + * [ ] Match any single character in the character list inside [ ] + * '[0123]' matches only '0', '1', '2' or '3' + * [ - ] Match any single character in the specified range + * '[0-5]' matches digits 0-5. + * [^ ] Match any character not in the character list or range + * '[^09]]' matches any other character except '0' and '9' + * ( ) Subexpression, grouping + * + * Escaping (C-language style, '\' is written as '\\'): + * \\ Considers following character literal ('\\{' is '{') + * \\\\ Matches literal \ + * \a Matches bell (BEL) + * \t Matches horizontal tab (HT) + * \n Matches new line (LF) + * \v Matches vertical tab (VT) + * \f Matches form feed (FF) + * \r Matches carriage ret (CR) + * \\< Match null string at the start of a word + * \\> Match null string at the end of a word + * \\b Match null string at the edge of a wrod + * \\B Match null string when not at the edge of a word + * * EXAMPLE * * SilcRegexStruct reg; @@ -64,11 +109,13 @@ * ***/ typedef struct SilcRegexObject { - char *buffer; /* compiled pattern */ - int allocated; /* allocated size of compiled pattern */ - int used; /* actual length of compiled pattern */ + SilcStack rstack; /* Stack for fast allocations */ + unsigned char *buffer; /* compiled pattern */ char *fastmap; /* fastmap[ch] is true if ch can start pattern */ char *translate; /* translation to apply during comp/match */ + int allocated; /* allocated size of compiled pattern */ + int used; /* actual length of compiled pattern */ + int num_registers; /* number of registers used */ char fastmap_accurate; /* true if fastmap is valid */ char can_be_null; /* true if can match empty string */ char uses_registers; /* registers used and need to be initialized */ @@ -108,7 +155,17 @@ typedef struct SilcRegexMatchObject { * SOURCE */ typedef enum { - SILC_REGEX_FLAG_DEFAULT = 0, + SILC_REGEX_DEFAULT = 0x00000000, + + /* The following flags can be used with silc_regex_match */ + + /* The beginning-of-line (^) always fails to match. This can be useful + when beginning of a string should not be interpreted as the beginning + of line. */ + SILC_REGEX_NOTBOL = 0x00010000, + + /* The end-of-line ($) always fails to match. */ + SILC_REGEX_NOTEOL = 0x00020000, } SilcRegexFlags; /***/ @@ -138,8 +195,8 @@ SilcBool silc_regex_compile(SilcRegex regexp, const char *regex, * SYNOPSIS * * SilcBool silc_regex_match(SilcRegex regexp, const char *string, - * SilcUInt32 num_match, SilcRegexMatch match, - * SilcRegexFlags flags); + * SilcUInt32 string_len, SilcUInt32 num_match, + * SilcRegexMatch match, SilcRegexFlags flags); * * DESCRIPTION * @@ -164,13 +221,13 @@ SilcBool silc_regex_compile(SilcRegex regexp, const char *regex, * EXAMPLE * * // Find first match (check if string matches) - * if (!silc_regex_match(®, "foo20", 0, NULL, 0)) + * if (!silc_regex_match(®, "foo20", 5, 0, NULL, 0)) * no_match; * * // Find multiple matches, one by one * SilcRegexMatchStruct match; * - * while (silc_regex_match(®, string, 1, &match, 0)) { + * while (silc_regex_match(®, string, len, 1, &match, 0)) { * match_string = silc_memdup(string + match.start, * match.end - match.start); * string += match.end; @@ -181,12 +238,12 @@ SilcBool silc_regex_compile(SilcRegex regexp, const char *regex, * SilcRegexMatchStruct match[7]; * * silc_regex_compile(®, "^(([^:]+)://)?([^:/]+)(:([0-9]+))?(/.*)", 0); - * silc_regex_match(®, "http://example.com/page.html", 7, match, 0); + * silc_regex_match(®, "http://example.com/page.html", len, 7, match, 0); * ***/ SilcBool silc_regex_match(SilcRegex regexp, const char *string, - SilcUInt32 num_match, SilcRegexMatch match, - SilcRegexFlags flags); + SilcUInt32 string_len, SilcUInt32 num_match, + SilcRegexMatch match, SilcRegexFlags flags); /****f* silcutil/SilcRegexAPI/silc_regex_free * @@ -203,4 +260,120 @@ SilcBool silc_regex_match(SilcRegex regexp, const char *string, ***/ void silc_regex_free(SilcRegex regexp); +/****f* silcutil/SilcRegexAPI/silc_regex + * + * SYNOPSIS + * + * SilcBool silc_regex(const char *string, const char *regex, + * SilcBuffer match, ...); + * + * DESCRIPTION + * + * Matches the `string' to the regular expression `regex'. Returns TRUE + * if the `string' matches the regular expression or FALSE if it does not + * match. The silc_errno is also set to SILC_ERR_NOT_FOUND. + * + * The first (whole) match is returned to `match' buffer if it is non-NULL. + * The variable argument list are buffers where multiple matches are + * returned in case of group (parenthesized) regular expression. The caller + * needs to know how many pointers to provide in order to get all matches. + * If a particular group is optional, a buffer pointer still must be given + * as argument for it, however, if it did not match the returned buffer + * length is 0 and data pointer is NULL. + * + * If `match' is non-NULL the variable argument list must be ended with + * NULL. The data in the `match' and in any other buffer is from `string' + * and must not be freed by the caller. + * + * EXAMPLE + * + * // Simple match + * if (!silc_regex("foobar", "foo.", NULL)) + * no_match; + * + * // Get the pointer to the first match + * if (!silc_regex("foobar", ".bar", &match, NULL)) + * no_match; + * + * // Group match + * SilcBufferStruct match, sub1, sub2; + * + * if (!silc_regex("Hello World", "(H..).(o..)", &match, &sub1, &sub2, NULL)) + * no_match; + * + ***/ +SilcBool silc_regex(const char *string, const char *regex, + SilcBuffer match, ...); + +/****f* silcutil/SilcRegexAPI/silc_regex_buffer + * + * SYNOPSIS + * + * SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex, + * SilcBuffer match, ...); + * + * DESCRIPTION + * + * Same as silc_regex but the string to match is in `buffer'. Returns + * TRUE if the string matches and FALSE if it doesn't. See examples and + * other information in silc_regex. The `buffer' and `match' may be the + * same buffer. + * + ***/ +SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex, + SilcBuffer match, ...); + +/****f* silcutil/SilcRegexAPI/silc_subst + * + * SYNOPSIS + * + * SilcBool silc_subst(SilcBuffer buffer, const char *subst); + * + * DESCRIPTION + * + * Regular expression matching and substitution in `buffer' according + * to the substitution expression `subst'. This function provides + * Sed (Stream Editor) style substitution interface. The `subst' may + * be of following formats: + * + * 's/REGEXP/REPLACEMENT/FLAGS' + * + * Matches regular expression REGEXP in each line in the buffer and + * substitutes the match with REPLACEMENT. + * + * 'ADDRs/REGEXP/REPLACEMENT/FLAGS' + * + * Selects lines in the buffer matching the address ADDR and matches the + * regular expression REGEXP in the line and substitutes the match with + * REPLACEMENT. + * + * The ADDR may be of following format: + * + * /REGEXP/ Matches only lines matching the regular expression + * NUMBER Matches only the specified line number (1-n) + * $ Matches only the last line + * + * The FLAGS may be of following format: + * + * no FLAGS Finds first match in the line and replaces that + * g Finds and replaces all matches in the line + * + * An '!' may precede the 's'. In that case the ADDR is not matched. + * + * Returns TRUE if the match and replacement was done, FALSE in case + * of error, and sets the silc_errno. + * + * If you need to match and/or replace '/' characters, they must be + * escaped with '\' (C-style escaping for '\' is '\\'). + * + * If you need more versatile ways to modify the buffer you may consider + * using the SILC_STR_REGEX in SILC Buffer Format API directly. This + * function only provides basic matching and substitution. + * + ***/ +SilcBool silc_subst(SilcBuffer buffer, const char *subst); + +/* Backwards support */ +#define silc_string_regex_match(regex, string) silc_regex(string, regex, NULL) + #endif /* SILCREGEX_H */