5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2007 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
20 /****h* silcutil/SILC Regular Expression Interface
24 * SILC regular expression interface provides Unix and POSIX compliant
25 * regular expression compilation and matching. The syntax is compliant
26 * with Unix and POSIX regular expression syntax.
28 * The interface also provides many convenience functions to make the use
29 * of regular expressions easier.
33 * SilcRegexStruct reg;
35 * // Compile regular expression
36 * if (!silc_regex_compile(®, "foo[0-9]*", 0))
39 * // Match string against the compiled regex
40 * if (!silc_regex_match(®, "foo20", 0, NULL, 0))
43 * // Free the compiled regular expression
44 * silc_regex_free(®);
51 /****s* silcutil/SilcRegexAPI/SilcRegex
55 * typedef struct { ... } *SilcRegex, SilcRegexStruct;
59 * The regular expression context. This context is given as argument
60 * to all silc_regex_* functions. It is usually statically allocated
61 * but can be dynamically allocated by silc_malloc.
64 typedef struct SilcRegexObject {
65 SilcStack rstack; /* Stack for fast allocations */
66 unsigned char *buffer; /* compiled pattern */
67 char *fastmap; /* fastmap[ch] is true if ch can start pattern */
68 char *translate; /* translation to apply during comp/match */
69 int allocated; /* allocated size of compiled pattern */
70 int used; /* actual length of compiled pattern */
71 int num_registers; /* number of registers used */
72 char fastmap_accurate; /* true if fastmap is valid */
73 char can_be_null; /* true if can match empty string */
74 char uses_registers; /* registers used and need to be initialized */
75 char anchor; /* anchor: 0=none 1=begline 2=begbuf */
76 } *SilcRegex, SilcRegexStruct;
78 /****s* silcutil/SilcRegexAPI/SilcRegexMatch
82 * typedef struct { ... } *SilcRegexMatch, SilcRegexMatchStruct;
86 * The regular expression match context that provides information on the
87 * found match. It provides the start offset and end offset of the
92 typedef struct SilcRegexMatchObject {
93 int start; /* Start offset of region */
94 int end; /* End offset of region */
95 } *SilcRegexMatch, SilcRegexMatchStruct;
98 /****d* silcutil/SilcRegexAPI/SilcRegexFlags
102 * typedef enum { ... } SilcRegexFlags;
106 * Regular expression feature flags.
111 SILC_REGEX_DEFAULT = 0x00000000,
113 /* The following flags can be used with silc_regex_match */
115 /* The beginning-of-line (^) always fails to match. This can be useful
116 when beginning of a string should not be interpreted as the beginning
118 SILC_REGEX_NOTBOL = 0x00010000,
120 /* The end-of-line ($) always fails to match. */
121 SILC_REGEX_NOTEOL = 0x00020000,
125 /****f* silcutil/SilcRegexAPI/silc_regex_compile
129 * SilcBool silc_regex_compile(SilcRegex regexp, const char *regex,
130 * SilcRegexFlags flags);
134 * Compiles the regular expression string `regex'. The `regexp' is a
135 * pre-allocated regular expression context. The `flags' define
136 * various feature flags. This function must be called before the
137 * silc_regex_match can be used to find matches.
139 * Returns TRUE after the compilation is completed. Returns FALSE on
140 * error and sets silc_errno.
143 SilcBool silc_regex_compile(SilcRegex regexp, const char *regex,
144 SilcRegexFlags flags);
146 /****f* silcutil/SilcRegexAPI/silc_regex_compile
150 * SilcBool silc_regex_match(SilcRegex regexp, const char *string,
151 * SilcUInt32 string_len, SilcUInt32 num_match,
152 * SilcRegexMatch match, SilcRegexFlags flags);
156 * Finds one or more matches from the `string' using the pre-compiled
157 * regular expression `regexp'. It must be compiled by calling the
158 * silc_regex_compile before calling this function. The `flags' defines
159 * various feature flags.
161 * If only one match is needed the `num_match' may be set to 0 and the
162 * `match' is set to NULL. If multiple matches (substrings) are needed the
163 * `num_match' defines the size of the `match' array, where each of the
164 * matches (with parenthesized regular expression) will be stored. The
165 * `match' provides information on where the match was found in `string',
166 * providing the start offset and end offset of the match. Unused entires
167 * in the array will have -1 as the offset values.
169 * Returns TRUE if the string matched the regular expression or FALSE
170 * if it did not match or error occurred. The silc_errno will indicate
171 * the error. The silc_errno is set to SILC_ERR_NOT_FOUND if the regular
172 * expression did not match.
176 * // Find first match (check if string matches)
177 * if (!silc_regex_match(®, "foo20", 5, 0, NULL, 0))
180 * // Find multiple matches, one by one
181 * SilcRegexMatchStruct match;
183 * while (silc_regex_match(®, string, len, 1, &match, 0)) {
184 * match_string = silc_memdup(string + match.start,
185 * match.end - match.start);
186 * string += match.end;
189 * // Parse URI into its components, available in the match[] array
190 * SilcRegexStruct reg;
191 * SilcRegexMatchStruct match[7];
193 * silc_regex_compile(®, "^(([^:]+)://)?([^:/]+)(:([0-9]+))?(/.*)", 0);
194 * silc_regex_match(®, "http://example.com/page.html", len, 7, match, 0);
197 SilcBool silc_regex_match(SilcRegex regexp, const char *string,
198 SilcUInt32 string_len, SilcUInt32 num_match,
199 SilcRegexMatch match, SilcRegexFlags flags);
201 /****f* silcutil/SilcRegexAPI/silc_regex_free
205 * void silc_regex_free(SilcRegex regexp);
209 * Free's the compiled regular expression context `regexp'. This must
210 * be called even if `regexp' is statically allocated. If the
211 * silc_regex_compile has been called this function must be called.
214 void silc_regex_free(SilcRegex regexp);
216 /****f* silcutil/SilcRegexAPI/silc_regex
220 * SilcBool silc_regex(const char *string, const char *regex,
221 * SilcBuffer match, ...);
225 * Matches the `string' to the regular expression `regex'. Returns TRUE
226 * if the `string' matches the regular expression or FALSE if it does not
227 * match. The silc_errno is also set to SILC_ERR_NOT_FOUND.
229 * The first (whole) match is returned to `match' buffer if it is non-NULL.
230 * The variable argument list are buffers where multiple matches are
231 * returned in case of group (parenthesized) regular expression. The caller
232 * needs to know how many pointers to provide, in order to get all matches.
233 * If `match' is non-NULL the variable argument list must be ended with
234 * NULL. The data in the `match' and in any other buffer is from `string'
235 * and must not be freed by the caller.
240 * if (!silc_regex("foobar", "foo.", NULL))
243 * // Get the pointer to the first match
244 * if (!silc_regex("foobar", ".bar", &match, NULL))
248 * SilcBufferStruct match, sub1, sub2;
250 * if (!silc_regex("Hello World", "(H..).(o..)", &match, &sub1, &sub2, NULL))
254 SilcBool silc_regex(const char *string, const char *regex,
255 SilcBuffer match, ...);
257 /****f* silcutil/SilcRegexAPI/silc_regex_buffer
261 * SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex,
262 * SilcBuffer match, ...);
266 * Same as silc_regex but the string to match is in `buffer'. Returns
267 * TRUE if the string matches and FALSE if it doesn't. See examples and
268 * other information in silc_regex. The `buffer' and `match' may be the
272 SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex,
273 SilcBuffer match, ...);
275 /* Backwards support */
276 #define silc_string_regex_match(regex, string) silc_regex(string, regex, NULL)
278 #endif /* SILCREGEX_H */