5 Author: Tatu Ylonen <ylo@ngs.fi>
7 Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
9 Permission to use, copy, modify, distribute, and sell this software
10 and its documentation is hereby granted without fee, provided that the
11 above copyright notice appears in all source code copies, the name of
12 Tatu Ylonen is not used to advertise products containing this software
13 or a derivation thereof, and all modified versions are clearly marked
16 This software is provided "as is" without express or implied warranty.
18 Created: Thu Sep 26 17:15:36 1991 ylo
19 Last modified: Fri Jan 3 12:05:45 1992 ylo
21 The SILC Regex API by Pekka Riikonen, under the same license as the original
26 /****h* silcutil/SILC Regular Expression Interface
30 * SILC regular expression interface provides Unix and POSIX compliant
31 * regular expression compilation and matching. The syntax is compliant
32 * with Unix and POSIX regular expression syntax.
34 * The interface also provides many convenience functions to make the use
35 * of regular expressions easier.
39 * SilcRegexStruct reg;
41 * // Compile regular expression
42 * if (!silc_regex_compile(®, "foo[0-9]*", 0))
45 * // Match string against the compiled regex
46 * if (!silc_regex_match(®, "foo20", 0, NULL, 0))
49 * // Free the compiled regular expression
50 * silc_regex_free(®);
57 /****s* silcutil/SilcRegexAPI/SilcRegex
61 * typedef struct { ... } *SilcRegex, SilcRegexStruct;
65 * The regular expression context. This context is given as argument
66 * to all silc_regex_* functions. It is usually statically allocated
67 * but can be dynamically allocated by silc_malloc.
70 typedef struct SilcRegexObject {
71 char *buffer; /* compiled pattern */
72 int allocated; /* allocated size of compiled pattern */
73 int used; /* actual length of compiled pattern */
74 char *fastmap; /* fastmap[ch] is true if ch can start pattern */
75 char *translate; /* translation to apply during comp/match */
76 char fastmap_accurate; /* true if fastmap is valid */
77 char can_be_null; /* true if can match empty string */
78 char uses_registers; /* registers used and need to be initialized */
79 char anchor; /* anchor: 0=none 1=begline 2=begbuf */
80 } *SilcRegex, SilcRegexStruct;
82 /****s* silcutil/SilcRegexAPI/SilcRegexMatch
86 * typedef struct { ... } *SilcRegexMatch, SilcRegexMatchStruct;
90 * The regular expression match context that provides information on the
91 * found match. It provides the start offset and end offset of the
96 typedef struct SilcRegexMatchObject {
97 int start; /* Start offset of region */
98 int end; /* End offset of region */
99 } *SilcRegexMatch, SilcRegexMatchStruct;
102 /****d* silcutil/SilcRegexAPI/SilcRegexFlags
106 * typedef enum { ... } SilcRegexFlags;
110 * Regular expression feature flags.
115 SILC_REGEX_DEFAULT = 0x00000000,
117 /* The following flags can be used with silc_regex_match */
119 /* The beginning-of-line (^) always fails to match. This can be useful
120 when beginning of a string should not be interpreted as the beginning
122 SILC_REGEX_NOTBOL = 0x00010000,
124 /* The end-of-line ($) always fails to match. */
125 SILC_REGEX_NOTEOL = 0x00020000,
129 /****f* silcutil/SilcRegexAPI/silc_regex_compile
133 * SilcBool silc_regex_compile(SilcRegex regexp, const char *regex,
134 * SilcRegexFlags flags);
138 * Compiles the regular expression string `regex'. The `regexp' is a
139 * pre-allocated regular expression context. The `flags' define
140 * various feature flags. This function must be called before the
141 * silc_regex_match can be used to find matches.
143 * Returns TRUE after the compilation is completed. Returns FALSE on
144 * error and sets silc_errno.
147 SilcBool silc_regex_compile(SilcRegex regexp, const char *regex,
148 SilcRegexFlags flags);
150 /****f* silcutil/SilcRegexAPI/silc_regex_compile
154 * SilcBool silc_regex_match(SilcRegex regexp, const char *string,
155 * SilcUInt32 string_len, SilcUInt32 num_match,
156 * SilcRegexMatch match, SilcRegexFlags flags);
160 * Finds one or more matches from the `string' using the pre-compiled
161 * regular expression `regexp'. It must be compiled by calling the
162 * silc_regex_compile before calling this function. The `flags' defines
163 * various feature flags.
165 * If only one match is needed the `num_match' may be set to 0 and the
166 * `match' is set to NULL. If multiple matches (substrings) are needed the
167 * `num_match' defines the size of the `match' array, where each of the
168 * matches (with parenthesized regular expression) will be stored. The
169 * `match' provides information on where the match was found in `string',
170 * providing the start offset and end offset of the match. Unused entires
171 * in the array will have -1 as the offset values.
173 * Returns TRUE if the string matched the regular expression or FALSE
174 * if it did not match or error occurred. The silc_errno will indicate
175 * the error. The silc_errno is set to SILC_ERR_NOT_FOUND if the regular
176 * expression did not match.
180 * // Find first match (check if string matches)
181 * if (!silc_regex_match(®, "foo20", 5, 0, NULL, 0))
184 * // Find multiple matches, one by one
185 * SilcRegexMatchStruct match;
187 * while (silc_regex_match(®, string, len, 1, &match, 0)) {
188 * match_string = silc_memdup(string + match.start,
189 * match.end - match.start);
190 * string += match.end;
193 * // Parse URI into its components, available in the match[] array
194 * SilcRegexStruct reg;
195 * SilcRegexMatchStruct match[7];
197 * silc_regex_compile(®, "^(([^:]+)://)?([^:/]+)(:([0-9]+))?(/.*)", 0);
198 * silc_regex_match(®, "http://example.com/page.html", len, 7, match, 0);
201 SilcBool silc_regex_match(SilcRegex regexp, const char *string,
202 SilcUInt32 string_len, SilcUInt32 num_match,
203 SilcRegexMatch match, SilcRegexFlags flags);
205 /****f* silcutil/SilcRegexAPI/silc_regex_free
209 * void silc_regex_free(SilcRegex regexp);
213 * Free's the compiled regular expression context `regexp'. This must
214 * be called even if `regexp' is statically allocated. If the
215 * silc_regex_compile has been called this function must be called.
218 void silc_regex_free(SilcRegex regexp);
220 /****f* silcutil/SilcRegexAPI/silc_regex
224 * SilcBool silc_regex(const char *string, const char *regex,
225 * SilcBuffer match, ...);
229 * Matches the `string' to the regular expression `regex'. Returns TRUE
230 * if the `string' matches the regular expression or FALSE if it does not
231 * match. The silc_errno is also set to SILC_ERR_NOT_FOUND.
233 * The first (whole) match is returned to `match' buffer if it is non-NULL.
234 * The variable argument list are buffers where multiple matches are
235 * returned in case of group (parenthesized) regular expression. The caller
236 * needs to know how many pointers to provide, in order to get all matches.
237 * If `match' is non-NULL the variable argument list must be ended with
238 * NULL. The data in the `match' and in any other buffer is from `string'
239 * and must not be freed by the caller.
244 * if (!silc_regex("foobar", "foo.", NULL))
247 * // Get the pointer to the first match
248 * if (!silc_regex("foobar", ".bar", &match, NULL))
252 * SilcBufferStruct match, sub1, sub2;
254 * if (!silc_regex("Hello World", "(H..).(o..)", &match, &sub1, &sub2, NULL))
258 SilcBool silc_regex(const char *string, const char *regex,
259 SilcBuffer match, ...);
261 /****f* silcutil/SilcRegexAPI/silc_regex_buffer
265 * SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex,
266 * SilcBuffer match, ...);
270 * Same as silc_regex but the string to match is in `buffer'. Returns
271 * TRUE if the string matches and FALSE if it doesn't. See examples and
272 * other information in silc_regex. The `buffer' and `match' may be the
276 SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex,
277 SilcBuffer match, ...);
279 /* Backwards support */
280 #define silc_string_regex_match(regex, string) silc_regex(string, regex, NULL)
282 #endif /* SILCREGEX_H */