From 96ae4adfa8757be98b0587941e26d0733e1fb22e Mon Sep 17 00:00:00 2001 From: Pekka Riikonen Date: Sun, 30 Dec 2007 23:35:50 +0000 Subject: [PATCH] Added SILC regular expression API. Added new regex error values. Removed the old lib/contrib/regexp.[ch]. --- CHANGES.RUNTIME | 4 + TODO | 38 +- configure.ad | 8 - includes/silc.h.in | 9 +- lib/contrib/Makefile.ad | 8 +- lib/contrib/regexpr.h | 139 ------ lib/silcutil/Makefile.ad | 6 +- lib/silcutil/silcerrno.c | 22 +- lib/silcutil/silcerrno.h | 7 + .../regexpr.c => silcutil/silcregex.c} | 440 ++++++++---------- lib/silcutil/silcregex.h | 8 +- lib/silcutil/silcschedule.h | 5 - lib/silcutil/silcstrutil.c | 30 +- lib/silcutil/tests/Makefile.am | 4 +- lib/silcutil/tests/test_silcregex.c | 129 +++++ 15 files changed, 392 insertions(+), 465 deletions(-) delete mode 100644 lib/contrib/regexpr.h rename lib/{contrib/regexpr.c => silcutil/silcregex.c} (83%) create mode 100644 lib/silcutil/tests/test_silcregex.c diff --git a/CHANGES.RUNTIME b/CHANGES.RUNTIME index 33f5b70d..d129dfea 100644 --- a/CHANGES.RUNTIME +++ b/CHANGES.RUNTIME @@ -1,3 +1,7 @@ +Mon Dec 31 01:30:17 EET 2007 Pekka Riikonen + + * Added SILC regular expression API to lib/silcutil/silcregex.[ch]. + Sun Dec 30 14:35:33 EET 2007 Pekka Riikonen * Implemented asynchronous events to SILC Scheduler. Added diff --git a/TODO b/TODO index 4c61e988..569d4dd8 100644 --- a/TODO +++ b/TODO @@ -93,20 +93,32 @@ lib/silcclient, The Client Library Runtime library, lib/silcutil/ ============================== + o silc_malloc et. al. to respect --with-alignment. + o Fix universal time decoding (doesn't accept all formats) in silctime.c. + o Add directory opening/traversing functions + + o regex from /lib/contrib to lib/silcutil, define SILC Regex API. (***DONE) + + o Additional scheduler changes: optimize silc_schedule_wakeup. Wakeup + only if the scheduler is actually waiting something. If it is + delivering tasks wakeup is not needed. + + o silc_stringprep to non-allocating version. + + o Add builtin SOCKS and HTTP Proxy support, well the SOCKS at least. + SILC currently supports SOCKS4 and SOCKS5 but it needs to be compiled + in separately. + o Add functions to manipulate environment variables. (***DONE) o Add functions to loading shared/dynamic object symbols (replaces the SIM library (lib/silcsim) and introduces generic library). Add this to lib/silcutil/silcdll.[ch]. (***TESTING NEEDED WIN32, TODO Symbian) - o Add directory opening/traversing functions - o silc_getopt routines (***DONE) - o regex from /lib/contrib to lib/silcutil. - o The SILC Event signals. Asynchronous events that can be created, connected to and signalled. Either own event routines or glued into SilcSchedule. (***DONE) @@ -127,23 +139,10 @@ Runtime library, lib/silcutil/ from any of the schedulers. (***DONE) - o Additional scheduler changes: optimize silc_schedule_wakeup. Wakeup - only if the scheduler is actually waiting something. If it is - delivering tasks wakeup is not needed. - - o Structured log messages to Log API. Allows machine readable log - messages. Would allow sending of any kind of data in a log message. - o Base64 to an own API (***DONE) o Timer API (***DONE) - o Add builtin SOCKS and HTTP Proxy support, well the SOCKS at least. - SILC currently supports SOCKS4 and SOCKS5 but it needs to be compiled - in separately. - - o silc_stringprep to non-allocating version. - o silc_hash_table_replace -> silc_hash_table_set. Retain support for silc_hash_table_replace as macro. (***DONE) @@ -165,8 +164,6 @@ Runtime library, lib/silcutil/ o silc_stack_alloc shouldn't require multiple by 8 size argument, it should figure it out itself. (***DONE) - o silc_malloc et. al. to respect --with-alignment. - o Add '%@' format to silc_snprintf functions. (***DONE) @@ -179,6 +176,9 @@ Runtime library, lib/silcutil/ o Generic SilcResult that includes all possible status and error conditions and generic errno API. (***DONE) + (o Structured log messages to Log API. Allows machine readable log + messages. Would allow sending of any kind of data in a log message.) maybe + (o Change some stream routines (like socket stream API) to accept ANY stream and use silc_stream_get_root to get the socket stream from the given stream. This will make various stream APIs more easier to use diff --git a/configure.ad b/configure.ad index bd4251f4..7467c0c3 100644 --- a/configure.ad +++ b/configure.ad @@ -283,14 +283,6 @@ AC_CHECK_FUNCS(setenv getenv putenv unsetenv clearenv) # lib/contrib conditionals # -AC_CHECK_HEADER(regex.h, - [ - AC_DEFINE([HAVE_REGEX_H], [], [HAVE_REGEX_H]) - have_regex=1 - ], have_regex=0 -) -AM_CONDITIONAL(HAVE_REGEX, test x$have_regex = x1) - AC_CHECK_FUNC(getopt_long, [ AC_DEFINE([HAVE_GETOPT_LONG], [], [HAVE_GETOPT_LONG]) diff --git a/includes/silc.h.in b/includes/silc.h.in index 14f63c1c..88fa7d16 100644 --- a/includes/silc.h.in +++ b/includes/silc.h.in @@ -195,14 +195,6 @@ extern "C" { #include #endif -#ifndef HAVE_REGEX_H -#if defined(HAVE_SILCDEFS_H) -#include "regexpr.h" -#endif /* HAVE_SILCDEFS_H */ -#else -#include -#endif - #ifdef SILC_HAVE_PTHREAD #include #endif @@ -285,6 +277,7 @@ extern "C" { #endif /* SILC_DIST_SSH */ /* More SILC util library includes */ +#include "silcregex.h" #include "silcenv.h" #include "silcdll.h" #include "silchashtable.h" diff --git a/lib/contrib/Makefile.ad b/lib/contrib/Makefile.ad index e12bafee..b27d9bd6 100644 --- a/lib/contrib/Makefile.ad +++ b/lib/contrib/Makefile.ad @@ -19,15 +19,9 @@ AUTOMAKE_OPTIONS = 1.0 no-dependencies foreign noinst_LTLIBRARIES = libcontrib.la -if HAVE_REGEX -REGEX = -else -REGEX = regexpr.c -endif - STRINGPREP = nfkc.c rfc3454.c stringprep.c -libcontrib_la_SOURCES = $(STRINGPREP) $(REGEX) +libcontrib_la_SOURCES = $(STRINGPREP) EXTRA_DIST = *.c *.h diff --git a/lib/contrib/regexpr.h b/lib/contrib/regexpr.h deleted file mode 100644 index b6927357..00000000 --- a/lib/contrib/regexpr.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - -regexpr.h - -Author: Tatu Ylonen - -Copyright (c) 1991 Tatu Ylonen, Espoo, Finland - -Permission to use, copy, modify, distribute, and sell this software -and its documentation is hereby granted without fee, provided that the -above copyright notice appears in all source code copies, the name of -Tatu Ylonen is not used to advertise products containing this software -or a derivation thereof, and all modified versions are clearly marked -as such. - -This software is provided "as is" without express or implied warranty. - -Created: Thu Sep 26 17:15:36 1991 ylo -Last modified: Fri Jan 3 12:05:45 1992 ylo - -*/ - -/* $Id$ */ - -#ifndef REGEXPR_H -#define REGEXPR_H - -#define RE_NREGS 10 /* number of registers available */ - -typedef struct re_pattern_buffer -{ - char *buffer; /* compiled pattern */ - int allocated; /* allocated size of compiled pattern */ - int used; /* actual length of compiled pattern */ - char *fastmap; /* fastmap[ch] is true if ch can start pattern */ - char *translate; /* translation to apply during compilation/matching */ - char fastmap_accurate; /* true if fastmap is valid */ - char can_be_null; /* true if can match empty string */ - char uses_registers; /* registers are used and need to be initialized */ - char anchor; /* anchor: 0=none 1=begline 2=begbuf */ -} *regexp_t; - -typedef struct re_registers -{ - int start[RE_NREGS]; /* start offset of region */ - int end[RE_NREGS]; /* end offset of region */ -} *regexp_registers_t; - -/* bit definitions for syntax */ -#define RE_NO_BK_PARENS 1 /* no quoting for parentheses */ -#define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */ -#define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */ -#define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */ -#define RE_NEWLINE_OR 16 /* treat newline as or */ -#define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */ -#define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */ -#define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */ - -/* definitions for some common regexp styles */ -#define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS) -#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR) -#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR) -#define RE_SYNTAX_EMACS 0 - -int re_set_syntax(int syntax); -/* This sets the syntax to use and returns the previous syntax. The - syntax is specified by a bit mask of the above defined bits. */ - -char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled); -/* This compiles the regexp (given in regex and length in regex_size). - This returns NULL if the regexp compiled successfully, and an error - message if an error was encountered. The buffer field must be - initialized to a memory area allocated by malloc (or to NULL) before - use, and the allocated field must be set to its length (or 0 if buffer is - NULL). Also, the translate field must be set to point to a valid - translation table, or NULL if it is not used. */ - -int re_match(regexp_t compiled, char *string, int size, int pos, - regexp_registers_t regs); -/* This tries to match the regexp against the string. This returns the - length of the matched portion, or -1 if the pattern could not be - matched and -2 if an error (such as failure stack overflow) is - encountered. */ - -int re_match_2(regexp_t compiled, char *string1, int size1, - char *string2, int size2, int pos, regexp_registers_t regs, - int mstop); -/* This tries to match the regexp to the concatenation of string1 and - string2. This returns the length of the matched portion, or -1 if the - pattern could not be matched and -2 if an error (such as failure stack - overflow) is encountered. */ - -int re_search(regexp_t compiled, char *string, int size, int startpos, - int range, regexp_registers_t regs); -/* This rearches for a substring matching the regexp. This returns the first - index at which a match is found. range specifies at how many positions to - try matching; positive values indicate searching forwards, and negative - values indicate searching backwards. mstop specifies the offset beyond - which a match must not go. This returns -1 if no match is found, and - -2 if an error (such as failure stack overflow) is encountered. */ - -int re_search_2(regexp_t compiled, char *string1, int size1, - char *string2, int size2, int startpos, int range, - regexp_registers_t regs, int mstop); -/* This is like re_search, but search from the concatenation of string1 and - string2. */ - -void re_compile_fastmap(regexp_t compiled); -/* This computes the fastmap for the regexp. For this to have any effect, - the calling program must have initialized the fastmap field to point - to an array of 256 characters. */ - -char *re_comp(char *s); -/* BSD 4.2 regex library routine re_comp. This compiles the regexp into - an internal buffer. This returns NULL if the regexp was compiled - successfully, and an error message if there was an error. */ - -int re_exec(char *s); -/* BSD 4.2 regexp library routine re_exec. This returns true if the string - matches the regular expression (that is, a matching part is found - anywhere in the string). */ - -/* POSIX Compatibility */ -#define regex_t struct re_pattern_buffer -#define regmatch_t struct re_registers -#define REG_EXTENDED 1 -#define REG_ICASE (REG_EXTENDED << 1) -#define REG_NEWLINE (REG_ICASE << 1) -#define REG_NOSUB (REG_NEWLINE << 1) -#define REG_NOTBOL 1 -#define REG_NOTEOL (REG_NOTBOL << 1) -int regcomp(regex_t *preg, const char *regex, int cflags); -int regexec(const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags); -size_t regerror(int errcode, const regex_t *preg, char *errbuf, - size_t errbuf_size); -void regfree(regex_t *preg); - -#endif /* REGEXPR_H */ diff --git a/lib/silcutil/Makefile.ad b/lib/silcutil/Makefile.ad index b841e442..8e86f36b 100644 --- a/lib/silcutil/Makefile.ad +++ b/lib/silcutil/Makefile.ad @@ -76,7 +76,8 @@ libsilcutil_la_SOURCES = \ silcbase64.c \ silcbitops.c \ silcerrno.c \ - silcgetopt.c + silcgetopt.c \ + silcregex.c #ifdef SILC_DIST_TOOLKIT include_HEADERS = \ @@ -126,7 +127,8 @@ include_HEADERS = \ silcbase64.h \ silcbitops.h \ silcerrno.h \ - silcgetopt.h + silcgetopt.h \ + silcregex.h SILC_EXTRA_DIST = tests #endif SILC_DIST_TOOLKIT diff --git a/lib/silcutil/silcerrno.c b/lib/silcutil/silcerrno.c index 62404a8e..fae62b50 100644 --- a/lib/silcutil/silcerrno.c +++ b/lib/silcutil/silcerrno.c @@ -410,8 +410,28 @@ const char *silc_errno_strings[] = "Address already in use", "Network is down", "End of stream", - "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + "Badly placed parenthesis", + "Bad hexadecimal number", + "Bad match register number", + "Badly placed special character", + "Regular expression too complex", }; /* Map error to string */ diff --git a/lib/silcutil/silcerrno.h b/lib/silcutil/silcerrno.h index a9de2bd9..c13749ac 100644 --- a/lib/silcutil/silcerrno.h +++ b/lib/silcutil/silcerrno.h @@ -126,6 +126,13 @@ typedef enum { SILC_ERR_NET_DOWN = 82, /* Network is down */ SILC_ERR_EOS = 83, /* End of stream */ + /* Regular expression errors */ + SILC_ERR_REGEX_PAREN = 100, /* Unmatched parenthesis */ + SILC_ERR_REGEX_HEX = 101, /* Bad hex number */ + SILC_ERR_REGEX_REG = 102, /* Bad register number */ + SILC_ERR_REGEX_SPECIAL = 103, /* Unmatched special character */ + SILC_ERR_REGEX_TOO_COMPLEX = 104, /* Too complex expression */ + SILC_ERR_MAX, } SilcResult; /***/ diff --git a/lib/contrib/regexpr.c b/lib/silcutil/silcregex.c similarity index 83% rename from lib/contrib/regexpr.c rename to lib/silcutil/silcregex.c index 65917bc7..c11ea8ad 100644 --- a/lib/contrib/regexpr.c +++ b/lib/silcutil/silcregex.c @@ -1,47 +1,112 @@ /* -regexpr.c + regexpr.c -Author: Tatu Ylonen + Author: Tatu Ylonen -Copyright (c) 1991 Tatu Ylonen, Espoo, Finland + Copyright (c) 1991 Tatu Ylonen, Espoo, Finland -Permission to use, copy, modify, distribute, and sell this software -and its documentation is hereby granted without fee, provided that the -above copyright notice appears in all source code copies, the name of -Tatu Ylonen is not used to advertise products containing this software -or a derivation thereof, and all modified versions are clearly marked -as such. + Permission to use, copy, modify, distribute, and sell this software + and its documentation is hereby granted without fee, provided that the + above copyright notice appears in all source code copies, the name of + Tatu Ylonen is not used to advertise products containing this software + or a derivation thereof, and all modified versions are clearly marked + as such. -This software is provided "as is" without express or implied warranty. + This software is provided "as is" without express or implied warranty. -Created: Thu Sep 26 17:14:05 1991 ylo -Last modified: Sun Mar 29 16:47:31 1992 ylo + Created: Thu Sep 26 17:14:05 1991 ylo + Last modified: Sun Mar 29 16:47:31 1992 ylo -This code draws many ideas from the regular expression packages by -Henry Spencer of the University of Toronto and Richard Stallman of the -Free Software Foundation. + This code draws many ideas from the regular expression packages by + Henry Spencer of the University of Toronto and Richard Stallman of the + Free Software Foundation. -Emacs-specific code and syntax table code is almost directly borrowed -from GNU regexp. + Emacs-specific code and syntax table code is almost directly borrowed + from GNU regexp. -$Id$ + The SILC Regex API by Pekka Riikonen, under the same license as the original + code. */ #include "silc.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include "regexpr.h" + +/* Modified for use in SILC Runtime Toolkit. I think we have disabled many + features we could use, for the sake of simple API, which we may want to + extend later. */ + +#define RE_NREGS 128 /* number of registers available */ + +/* bit definitions for syntax */ +#define RE_NO_BK_PARENS 1 /* no quoting for parentheses */ +#define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */ +#define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */ +#define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */ +#define RE_NEWLINE_OR 16 /* treat newline as or */ +#define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */ +#define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */ +#define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */ + +/* definitions for some common regexp styles */ +#define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS) +#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR) +#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR) +#define RE_SYNTAX_EMACS 0 + +/* Registers */ +typedef struct re_registers { + int start[RE_NREGS]; /* start offset of region */ + int end[RE_NREGS]; /* end offset of region */ +} *regexp_registers_t; + +int re_set_syntax(int syntax); +/* This sets the syntax to use and returns the previous syntax. The + syntax is specified by a bit mask of the above defined bits. */ + +SilcResult re_compile_pattern(char *regex, int regex_size, SilcRegex compiled); +/* This compiles the regexp (given in regex and length in regex_size). + This returns NULL if the regexp compiled successfully, and an error + message if an error was encountered. The buffer field must be + initialized to a memory area allocated by malloc (or to NULL) before + use, and the allocated field must be set to its length (or 0 if buffer is + NULL). Also, the translate field must be set to point to a valid + translation table, or NULL if it is not used. */ + +int re_match(SilcRegex compiled, char *string, int size, int pos, + regexp_registers_t regs); +/* This tries to match the regexp against the string. This returns the + length of the matched portion, or -1 if the pattern could not be + matched and -2 if an error (such as failure stack overflow) is + encountered. */ + +int re_match_2(SilcRegex compiled, char *string1, int size1, + char *string2, int size2, int pos, regexp_registers_t regs, + int mstop); +/* This tries to match the regexp to the concatenation of string1 and + string2. This returns the length of the matched portion, or -1 if the + pattern could not be matched and -2 if an error (such as failure stack + overflow) is encountered. */ + +int re_search(SilcRegex compiled, char *string, int size, int startpos, + int range, regexp_registers_t regs); +/* This rearches for a substring matching the regexp. This returns the first + index at which a match is found. range specifies at how many positions to + try matching; positive values indicate searching forwards, and negative + values indicate searching backwards. mstop specifies the offset beyond + which a match must not go. This returns -1 if no match is found, and + -2 if an error (such as failure stack overflow) is encountered. */ + +int re_search_2(SilcRegex compiled, char *string1, int size1, + char *string2, int size2, int startpos, int range, + regexp_registers_t regs, int mstop); +/* This is like re_search, but search from the concatenation of string1 and + string2. */ + +void re_compile_fastmap(SilcRegex compiled); +/* This computes the fastmap for the regexp. For this to have any effect, + the calling program must have initialized the fastmap field to point + to an array of 256 characters. */ #define MACRO_BEGIN do { #define MACRO_END } while (0) @@ -149,10 +214,10 @@ static char re_syntax_table[256]; static void re_compile_initialize() { int a; - + #if !defined(emacs) && !defined(SYNTAX_TABLE) static int syntax_table_inited = 0; - + if (!syntax_table_inited) { syntax_table_inited = 1; @@ -266,10 +331,10 @@ int ch; return 16; } -char *re_compile_pattern(regex, size, bufp) +SilcResult re_compile_pattern(regex, size, bufp) char *regex; int size; -regexp_t bufp; +SilcRegex bufp; { int a, pos, op, current_level, level, opcode; int pattern_offset = 0, alloc; @@ -293,7 +358,7 @@ regexp_t bufp; if (pattern_offset+(amount) > alloc) \ { \ alloc += 256 + (amount); \ - pattern = realloc(pattern, alloc); \ + pattern = silc_realloc(pattern, alloc); \ if (!pattern) \ goto out_of_memory; \ } \ @@ -337,7 +402,7 @@ regexp_t bufp; bufp->buffer = pattern; \ bufp->used = pattern_offset; \ MACRO_END - + #define GETHEX(var) \ MACRO_BEGIN \ char gethex_ch, gethex_value; \ @@ -407,7 +472,7 @@ regexp_t bufp; if (alloc == 0 || pattern == NULL) { alloc = 256; - pattern = malloc(alloc); + pattern = silc_malloc(alloc); if (!pattern) goto out_of_memory; } @@ -608,7 +673,7 @@ regexp_t bufp; case Ropenset: { int complement,prev,offset,range,firstchar; - + SET_LEVEL_START; ALLOC(1+256/8); STORE(Cset); @@ -722,35 +787,35 @@ regexp_t bufp; ALLOC(1); STORE(Cend); SET_FIELDS; - return NULL; + return SILC_OK; op_error: SET_FIELDS; - return "Badly placed special character"; + return SILC_ERR_REGEX_SPECIAL; bad_match_register: SET_FIELDS; - return "Bad match register number"; + return SILC_ERR_REGEX_REG; hex_error: SET_FIELDS; - return "Bad hexadecimal number"; + return SILC_ERR_REGEX_HEX; parenthesis_error: SET_FIELDS; - return "Badly placed parenthesis"; + return SILC_ERR_REGEX_PAREN; out_of_memory: SET_FIELDS; - return "Out of memory"; + return SILC_ERR_OUT_OF_MEMORY; ends_prematurely: SET_FIELDS; - return "Regular expression ends prematurely"; + return SILC_ERR_OVERFLOW; too_complex: SET_FIELDS; - return "Regular expression too complex"; + return SILC_ERR_REGEX_TOO_COMPLEX; } #undef CHARAT #undef NEXTCHAR @@ -873,7 +938,7 @@ int used, pos; visited = small_visited; else { - visited = malloc(used); + visited = silc_malloc(used); if (!visited) return 0; } @@ -882,12 +947,12 @@ int used, pos; memset(visited, 0, used); re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap); if (visited != small_visited) - free(visited); + silc_free(visited); return 1; } void re_compile_fastmap(bufp) -regexp_t bufp; +SilcRegex bufp; { if (!bufp->fastmap || bufp->fastmap_accurate) return; @@ -909,7 +974,7 @@ regexp_t bufp; #define MAX_FAILURES 4100 /* max # of failure points before failing */ int re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop) -regexp_t bufp; +SilcRegex bufp; char *string1, *string2; int size1, size2, pos, mstop; regexp_registers_t regs; @@ -1033,7 +1098,7 @@ regexp_registers_t regs; } } if (failure_stack_start != initial_failure_stack) - free((char *)failure_stack_start); + silc_free((char *)failure_stack_start); return match_end - pos; case Cbol: if (text == string1 || text[-1] == '\n') /* text[-1] always valid */ @@ -1084,7 +1149,7 @@ regexp_registers_t regs; regpartend = regtextend; else regpartend = string1 + size1; - + for (;regtext != regtextend;) { NEXTCHAR(ch); @@ -1244,7 +1309,7 @@ regexp_registers_t regs; if (failure_stack_start != initial_failure_stack) goto error; failure_stack_start = (struct failure_point *) - malloc(MAX_FAILURES * sizeof(*failure_stack_start)); + silc_malloc(MAX_FAILURES * sizeof(*failure_stack_start)); failure_stack_end = failure_stack_start + MAX_FAILURES; memcpy((char *)failure_stack_start, (char *)initial_failure_stack, INITIAL_FAILURES * sizeof(*failure_stack_start)); @@ -1358,12 +1423,12 @@ regexp_registers_t regs; goto continue_matching; } if (failure_stack_start != initial_failure_stack) - free((char *)failure_stack_start); + silc_free((char *)failure_stack_start); return -1; error: if (failure_stack_start != initial_failure_stack) - free((char *)failure_stack_start); + silc_free((char *)failure_stack_start); return -2; } @@ -1372,7 +1437,7 @@ regexp_registers_t regs; #undef PUSH_FAILURE int re_match(bufp, string, size, pos, regs) -regexp_t bufp; +SilcRegex bufp; char *string; int size, pos; regexp_registers_t regs; @@ -1382,7 +1447,7 @@ regexp_registers_t regs; int re_search_2(bufp, string1, size1, string2, size2, pos, range, regs, mstop) -regexp_t bufp; +SilcRegex bufp; char *string1, *string2; int size1, size2, pos, range, mstop; regexp_registers_t regs; @@ -1390,11 +1455,11 @@ regexp_registers_t regs; char *fastmap, *translate, *text, *partstart, *partend; int dir, ret; char anchor; - + assert(size1 >= 0 && size2 >= 0 && pos >= 0 && mstop >= 0); assert(pos + range >= 0 && pos + range <= size1 + size2); assert(pos <= mstop); - + fastmap = bufp->fastmap; translate = bufp->translate; if (fastmap && !bufp->fastmap_accurate) @@ -1495,7 +1560,7 @@ regexp_registers_t regs; } int re_search(bufp, string, size, startpos, range, regs) -regexp_t bufp; +SilcRegex bufp; char *string; int size, startpos, range; regexp_registers_t regs; @@ -1504,219 +1569,82 @@ regexp_registers_t regs; startpos, range, regs, size); } -static struct re_pattern_buffer re_comp_buf; +/****************************** SILC Regex API ******************************/ -char *re_comp(s) -char *s; -{ - if (s == NULL) - { - if (!re_comp_buf.buffer) - return "Out of memory"; - return NULL; - } - if (!re_comp_buf.buffer) - { - /* the buffer will be allocated automatically */ - re_comp_buf.fastmap = malloc(256); - re_comp_buf.translate = NULL; - } - return re_compile_pattern(s, strlen(s), &re_comp_buf); -} +/* Compile regular expression */ -int re_exec(s) -char *s; +SilcBool silc_regex_compile(SilcRegex regexp, const char *regex, + SilcRegexFlags flags) { - int len = strlen(s); - - return re_search(&re_comp_buf, s, len, 0, len, (regexp_registers_t)NULL) >= 0; -} + SilcResult ret; + int syntax = 0; -/* POSIX Compatibility */ + if (!regexp || !regex) { + silc_set_errno(SILC_ERR_INVALID_ARGUMENT); + return FALSE; + } -int regcomp(regex_t *preg, const char *regex, int cflags) -{ - int syntax = 0; - memset(preg, 0, sizeof(*preg)); - if (cflags & REG_EXTENDED) - syntax |= (RE_CONTEXT_INDEP_OPS | RE_NO_BK_PARENS | RE_NO_BK_VBAR); + memset(regexp, 0, sizeof(*regexp)); + + /* Set syntax */ + syntax |= (RE_CONTEXT_INDEP_OPS | RE_NO_BK_PARENS | RE_NO_BK_VBAR); re_set_syntax(syntax); - if (re_compile_pattern((char *)regex, strlen(regex), preg) == NULL) - return 0; - return -1; -} -int regexec(const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags) -{ - int len = strlen(string); - int ret; - - ret = re_search((regex_t *)preg, (char *)string, len, 0, len, (regexp_registers_t)NULL); - if (ret >= 0) - return 0; + /* Compile */ + ret = re_compile_pattern((char *)regex, strlen(regex), regexp); + if (ret != SILC_OK) + silc_set_errno(ret); - return ret; + return ret == SILC_OK; } -size_t regerror(int errcode, const regex_t *preg, char *errbuf, - size_t errbuf_size) -{ - return -1; -} +/* Match compiled regular expression */ -void regfree(regex_t *preg) +SilcBool silc_regex_match(SilcRegex regexp, const char *string, + SilcUInt32 num_match, SilcRegexMatch match, + SilcRegexFlags flags) { - free(preg->buffer); -} + struct re_registers regs; + int ret, i, len = strlen(string); -#ifdef TEST_REGEXP + if (!regexp || !string) { + silc_set_errno(SILC_ERR_INVALID_ARGUMENT); + return FALSE; + } -int main() -{ - char buf[500]; - char *cp; - struct re_pattern_buffer exp; - struct re_registers regs; - int a,pos; - char fastmap[256]; + if (num_match && !match) { + silc_set_errno(SILC_ERR_INVALID_ARGUMENT); + return FALSE; + } - exp.allocated = 0; - exp.buffer = 0; - exp.translate = NULL; - exp.fastmap = fastmap; + /* Internal limit for maximum number of registers */ + if (num_match > RE_NREGS) + num_match = RE_NREGS; - /* re_set_syntax(RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_ANSI_HEX); */ + /* Search */ + ret = re_search(regexp, (char *)string, len, 0, len, + num_match ? ®s : NULL); + if (ret < 0) { + if (ret == -2) + silc_set_errno(SILC_ERR); + else + silc_set_errno(SILC_ERR_NOT_FOUND); + } - while (1) - { - printf("Enter regexp:\n"); - gets(buf); - cp=re_compile_pattern(buf, strlen(buf), &exp); - if (cp) - { - printf("Error: %s\n", cp); - continue; - } - re_compile_fastmap(&exp); - printf("dump:\n"); - for (pos = 0; pos < exp.used;) - { - printf("%d: ", pos); - switch (exp.buffer[pos++]) - { - case Cend: - strcpy(buf, "end"); - break; - case Cbol: - strcpy(buf, "bol"); - break; - case Ceol: - strcpy(buf, "eol"); - break; - case Cset: - strcpy(buf, "set "); - for (a = 0; a < 256/8; a++) - sprintf(buf+strlen(buf)," %02x", - (unsigned char)exp.buffer[pos++]); - break; - case Cexact: - sprintf(buf, "exact '%c' 0x%x", exp.buffer[pos], - (unsigned char)exp.buffer[pos]); - pos++; - break; - case Canychar: - strcpy(buf, "anychar"); - break; - case Cstart_memory: - sprintf(buf, "start_memory %d", exp.buffer[pos++]); - break; - case Cend_memory: - sprintf(buf, "end_memory %d", exp.buffer[pos++]); - break; - case Cmatch_memory: - sprintf(buf, "match_memory %d", exp.buffer[pos++]); - break; - case Cjump: - case Cdummy_failure_jump: - case Cstar_jump: - case Cfailure_jump: - case Cupdate_failure_jump: - a = (unsigned char)exp.buffer[pos++]; - a += (unsigned char)exp.buffer[pos++] << 8; - a = (int)(short)a; - switch (exp.buffer[pos-3]) - { - case Cjump: - cp = "jump"; - break; - case Cstar_jump: - cp = "star_jump"; - break; - case Cfailure_jump: - cp = "failure_jump"; - break; - case Cupdate_failure_jump: - cp = "update_failure_jump"; - break; - case Cdummy_failure_jump: - cp = "dummy_failure_jump"; - break; - default: - cp = "unknown jump"; - break; - } - sprintf(buf, "%s %d", cp, a + pos); - break; - case Cbegbuf: - strcpy(buf,"begbuf"); - break; - case Cendbuf: - strcpy(buf,"endbuf"); - break; - case Cwordbeg: - strcpy(buf,"wordbeg"); - break; - case Cwordend: - strcpy(buf,"wordend"); - break; - case Cwordbound: - strcpy(buf,"wordbound"); - break; - case Cnotwordbound: - strcpy(buf,"notwordbound"); - break; - default: - sprintf(buf, "unknown code %d", - (unsigned char)exp.buffer[pos - 1]); - break; - } - printf("%s\n", buf); - } - printf("can_be_null = %d uses_registers = %d anchor = %d\n", - exp.can_be_null, exp.uses_registers, exp.anchor); - - printf("fastmap:"); - for (a = 0; a < 256; a++) - if (exp.fastmap[a]) - printf(" %d", a); - printf("\n"); - printf("Enter strings. An empty line terminates.\n"); - while (fgets(buf, sizeof(buf), stdin)) - { - if (buf[0] == '\n') - break; - a = re_search(&exp, buf, strlen(buf), 0, strlen(buf), ®s); - printf("search returns %d\n", a); - if (a != -1) - { - for (a = 0; a < RE_NREGS; a++) - { - printf("buf %d: %d to %d\n", a, regs.start[a], regs.end[a]); - } - } - } + if (ret >= 0) { + /* Return matches */ + for (i = 0; i < num_match; i++) { + match[i].start = regs.start[i]; + match[i].end = regs.end[i]; } + } + + return ret >= 0; } -#endif /* TEST_REGEXP */ +/* Free regex */ + +void silc_regex_free(SilcRegex regexp) +{ + silc_free(regexp->buffer); +} diff --git a/lib/silcutil/silcregex.h b/lib/silcutil/silcregex.h index 7fac2ac2..217144ab 100644 --- a/lib/silcutil/silcregex.h +++ b/lib/silcutil/silcregex.h @@ -1,3 +1,4 @@ + /* regexpr.h @@ -58,9 +59,7 @@ * * DESCRIPTION * - * The regular expression context. This context is given as argument - * to all silc_regex_* functions. It is usually statically allocated - * but can be dynamically allocated by silc_malloc. + * The regular expression context. * ***/ typedef struct SilcRegexObject { @@ -83,9 +82,6 @@ typedef struct SilcRegexObject { * * DESCRIPTION * - * The regular expression match context that provides information on the - * found match. It provides the start offset and end offset of the - * found match. * * SOURCE */ diff --git a/lib/silcutil/silcschedule.h b/lib/silcutil/silcschedule.h index c37ecc66..c3120181 100644 --- a/lib/silcutil/silcschedule.h +++ b/lib/silcutil/silcschedule.h @@ -945,11 +945,6 @@ SilcBool silc_schedule_event_connect(SilcSchedule schedule, * Returns FALSE on error or if the `callback' with `context' has not been * connected. Otherwise, returns TRUE. * - * EXAMPLE - * - * silc_schedule_event_connect(schedule, "foo event", NULL, - * foo_signal_callback, foo_context); - * ***/ SilcBool silc_schedule_event_disconnect(SilcSchedule schedule, const char *event, SilcTask task, diff --git a/lib/silcutil/silcstrutil.c b/lib/silcutil/silcstrutil.c index 1655f213..914adc98 100644 --- a/lib/silcutil/silcstrutil.c +++ b/lib/silcutil/silcstrutil.c @@ -121,7 +121,7 @@ int silc_string_compare(char *string1, char *string2) char **silc_string_split(const char *string, char ch, int *ret_count) { - char **splitted = NULL, sep[1], *item, *cp; + char **splitted = NULL, sep[2], *item, *cp; int i = 0, len; if (!string || !ret_count) { @@ -140,9 +140,13 @@ char **silc_string_split(const char *string, char ch, int *ret_count) } sep[0] = ch; + sep[1] = '\0'; cp = (char *)string; - while(cp) { + while (cp) { len = strcspn(cp, sep); + if (!len) + break; + item = silc_memdup(cp, len); if (!item) { silc_free(splitted); @@ -155,10 +159,13 @@ char **silc_string_split(const char *string, char ch, int *ret_count) else cp++; - splitted = silc_realloc(splitted, (i + 1) * sizeof(*splitted)); - if (!splitted) - return NULL; splitted[i++] = item; + + if (cp) { + splitted = silc_realloc(splitted, (i + 1) * sizeof(*splitted)); + if (!splitted) + return NULL; + } } *ret_count = i; @@ -246,18 +253,15 @@ char *silc_string_regex_combine(const char *string1, const char *string2) int silc_string_regex_match(const char *regex, const char *string) { - regex_t preg; - int ret = FALSE; + SilcRegexStruct preg; + SilcBool ret; - if (regcomp(&preg, regex, REG_NOSUB | REG_EXTENDED) != 0) { - silc_set_errno(SILC_ERR_INVALID_ARGUMENT); + if (!silc_regex_compile(&preg, regex, 0)) return FALSE; - } - if (regexec(&preg, string, 0, NULL, 0) == 0) - ret = TRUE; + ret = silc_regex_match(&preg, string, 0, NULL, 0); - regfree(&preg); + silc_regex_free(&preg); return ret; } diff --git a/lib/silcutil/tests/Makefile.am b/lib/silcutil/tests/Makefile.am index 21511a01..7d6f4e31 100644 --- a/lib/silcutil/tests/Makefile.am +++ b/lib/silcutil/tests/Makefile.am @@ -21,7 +21,8 @@ bin_PROGRAMS = test_silcstrutil test_silcstringprep test_silchashtable \ test_silclist test_silcfsm test_silcasync test_silcschedule \ test_silcnet test_silcstack test_silcmime test_silcfdstream \ test_silcatomic test_silcmutex test_silctime test_silcthread \ - test_silcdll test_silcenv test_silctimer test_silcbitops + test_silcdll test_silcenv test_silctimer test_silcbitops \ + test_silcregex test_silcstrutil_SOURCES = test_silcstrutil.c test_silcstringprep_SOURCES = test_silcstringprep.c @@ -42,6 +43,7 @@ test_silcdll_SOURCES = test_silcdll.c test_silcenv_SOURCES = test_silcenv.c test_silctimer_SOURCES = test_silctimer.c test_silcbitops_SOURCES = test_silcbitops.c +test_silcregex_SOURCES = test_silcregex.c LIBS = $(SILC_COMMON_LIBS) LDADD = -L.. -L../.. -lsilc diff --git a/lib/silcutil/tests/test_silcregex.c b/lib/silcutil/tests/test_silcregex.c new file mode 100644 index 00000000..6e79cb0a --- /dev/null +++ b/lib/silcutil/tests/test_silcregex.c @@ -0,0 +1,129 @@ +/* Regex tests */ + +#include "silc.h" + +int main(int argc, char **argv) +{ + SilcBool success = FALSE; + SilcRegexStruct reg; + SilcRegexMatchStruct match[10]; + int i, num_match = 10; + char *regex, *string, *sub; + + if (argc > 1 && !strcmp(argv[1], "-d")) { + silc_log_debug(TRUE); + silc_log_quick(TRUE); + silc_log_debug_hexdump(TRUE); + silc_log_set_debug_string("*regex*,*errno*"); + } + + regex = "foo[0-9]*"; + SILC_LOG_DEBUG(("Regex %s", regex)); + if (!silc_regex_compile(®, regex, 0)) + goto err; + + string = "foo"; + SILC_LOG_DEBUG(("Match %s", string)); + if (!silc_regex_match(®, string, 0, NULL, 0)) + goto err; + + string = "foo20"; + SILC_LOG_DEBUG(("Match %s", string)); + if (!silc_regex_match(®, string, 0, NULL, 0)) + goto err; + + string = "foo20, bar, foo100, foo"; + SILC_LOG_DEBUG(("Match all substrings in %s", string)); + while (silc_regex_match(®, string, 1, match, 0)) { + SILC_LOG_DEBUG(("Match start %d", match[0].start)); + sub = silc_memdup(string + match[0].start, match[0].end - match[0].start); + SILC_LOG_DEBUG(("Match substring '%s'", sub)); + silc_free(sub); + string += match[0].end; + } + + string = "foo20, bar, foo100, Foo, foo0"; + SILC_LOG_DEBUG(("Match all substrings at once in %s", string)); + if (!silc_regex_match(®, string, num_match, match, 0)) + goto err; + + for (i = 0; i < num_match; i++) { + if (match[i].start != -1) { + SILC_LOG_DEBUG(("Match start %d", match[i].start)); + sub = silc_memdup(string + match[i].start, match[i].end - + match[i].start); + SILC_LOG_DEBUG(("Match substring '%s'", sub)); + silc_free(sub); + } + } + + silc_regex_free(®); + + regex = "^(([^:]+)://)?([^:/]+)(:([0-9]+))?(/.*)"; + SILC_LOG_DEBUG(("Regex %s", regex)); + if (!silc_regex_compile(®, regex, 0)) + goto err; + + string = "http://silcnet.org:443/foobar/pelle.html"; + SILC_LOG_DEBUG(("Parse URI")); + if (!silc_regex_match(®, string, num_match, match, 0)) + goto err; + + for (i = 0; i < num_match; i++) { + if (match[i].start != -1) { + SILC_LOG_DEBUG(("Match start %d", match[i].start)); + sub = silc_memdup(string + match[i].start, match[i].end - + match[i].start); + SILC_LOG_DEBUG(("Match substring '%s'", sub)); + silc_free(sub); + } + } + + string = "http://silcnet.org/"; + SILC_LOG_DEBUG(("Parse URI")); + if (!silc_regex_match(®, string, num_match, match, 0)) + goto err; + + for (i = 0; i < num_match; i++) { + if (match[i].start != -1) { + SILC_LOG_DEBUG(("Match start %d", match[i].start)); + sub = silc_memdup(string + match[i].start, match[i].end - + match[i].start); + SILC_LOG_DEBUG(("Match substring '%s'", sub)); + silc_free(sub); + } + } + + silc_regex_free(®); + + regex = "((a)(b))"; + SILC_LOG_DEBUG(("Regex %s", regex)); + if (!silc_regex_compile(®, regex, 0)) + goto err; + + string = "ab"; + SILC_LOG_DEBUG(("Match all substrings at once in %s", string)); + if (!silc_regex_match(®, string, num_match, match, 0)) + goto err; + + for (i = 0; i < num_match; i++) { + if (match[i].start != -1) { + SILC_LOG_DEBUG(("Match start %d", match[i].start)); + sub = silc_memdup(string + match[i].start, match[i].end - + match[i].start); + SILC_LOG_DEBUG(("Match substring '%s'", sub)); + silc_free(sub); + } + } + + silc_regex_free(®); + + success = TRUE; + + err: + SILC_LOG_DEBUG(("Testing was %s", success ? "SUCCESS" : "FAILURE")); + fprintf(stderr, "Testing was %s\n", success ? "SUCCESS" : "FAILURE"); + + return success; +} + -- 2.24.0