3 * Author: Tatu Ylonen <ylo@ngs.fi>
5 * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
7 * Permission to use, copy, modify, distribute, and sell this software
8 * and its documentation for any purpose is hereby granted without
9 * fee, provided that the above copyright notice appear in all copies.
10 * This software is provided "as is" without express or implied
13 * Created: Thu Sep 26 17:14:05 1991 ylo
14 * Last modified: Mon Nov 4 17:06:48 1991 ylo
15 * Ported to Think C: 19 Jan 1992 guido@cwi.nl
17 * This code draws many ideas from the regular expression packages by
18 * Henry Spencer of the University of Toronto and Richard Stallman of
19 * the Free Software Foundation.
21 * Bugs fixed and lots of reorganization by Jeffrey C. Ollie, April
22 * 1997 Thanks for bug reports and ideas from Andrew Kuchling, Tim
23 * Peters, Guido van Rossum, Ka-Ping Yee, Sjoerd Mullender, and
24 * probably one or two others that I'm forgetting.
29 The SILC Regex API and modifications by Pekka Riikonen, under the same
30 license as the original code. We've added the following features:
32 - RE_SYNTAX_POSIX POSIX extended regular expression syntax
33 - RE_REPEAT bounded repeat a{n,m} (RE_SYNTAX_POSIX)
34 - RE_NOTBOL bol fails to match (conforming POSIX regex API)
35 - RE_NOTEOL eol fails to match (conforming POSIX regex API)
36 - SilcStack support compile/match without real memory allocations
39 #include "silcruntime.h"
41 #define RE_NREGS 128 /* number of registers available */
43 /* bit definitions for syntax */
44 #define RE_NO_BK_PARENS 1 /* no quoting for parentheses */
45 #define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */
46 #define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */
47 #define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */
48 #define RE_NEWLINE_OR 16 /* treat newline as or */
49 #define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */
50 #define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */
51 #define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */
52 #define RE_NOTBOL 256 /* bol fails to match */
53 #define RE_NOTEOL 512 /* eol fails to match */
54 #define RE_REPEAT 1024 /* bounded repeat expression */
56 /* definitions for some common regexp styles */
57 #define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS)
58 #define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR)
59 #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
60 #define RE_SYNTAX_POSIX (RE_SYNTAX_AWK|RE_REPEAT)
61 #define RE_SYNTAX_EMACS 0
64 typedef struct re_registers {
65 int start[RE_NREGS]; /* start offset of region */
66 int end[RE_NREGS]; /* end offset of region */
67 } *regexp_registers_t;
69 /* The original code blithely assumed that sizeof(short) == 2. Not
70 * always true. Original instances of "(short)x" were replaced by
71 * SHORT(x), where SHORT is #defined below. */
73 #define SHORT(x) ((x) & 0x8000 ? (x) - 0x10000 : (x))
75 /* The stack implementation is taken from an idea by Andrew Kuchling.
76 * It's a doubly linked list of arrays. The advantages of this over a
77 * simple linked list are that the number of mallocs required are
78 * reduced. It also makes it possible to statically allocate enough
79 * space so that small patterns don't ever need to call malloc.
81 * The advantages over a single array is that is periodically
82 * realloced when more space is needed is that we avoid ever copying
85 /* item_t is the basic stack element. Defined as a union of
86 * structures so that both registers, failure points, and counters can
87 * be pushed/popped from the stack. There's nothing built into the
88 * item to keep track of whether a certain stack item is a register, a
89 * failure point, or a counter. */
116 #define STACK_PAGE_SIZE 256
117 #define NUM_REGISTERS 256
119 /* A 'page' of stack items. */
121 typedef struct item_page_t
123 item_t items[STACK_PAGE_SIZE];
124 struct item_page_t *prev;
125 struct item_page_t *next;
128 typedef struct match_state
130 /* The number of registers that have been pushed onto the stack
131 * since the last failure point. */
135 /* Used to control when registers need to be pushed onto the
140 /* The number of failure points on the stack. */
144 /* Storage for the registers. Each register consists of two
145 * pointers to characters. So register N is represented as
146 * start[N] and end[N]. The pointers must be converted to
147 * offsets from the beginning of the string before returning the
148 * registers to the calling program. */
150 unsigned char *start[NUM_REGISTERS];
151 unsigned char *end[NUM_REGISTERS];
153 /* Keeps track of whether a register has changed recently. */
155 int changed[NUM_REGISTERS];
157 /* Structure to encapsulate the stack. */
160 /* index into the current page. If index == 0 and you need
161 * to pop an item, move to the previous page and set index
162 * = STACK_PAGE_SIZE - 1. Otherwise decrement index to
163 * push a page. If index == STACK_PAGE_SIZE and you need
164 * to push a page move to the next page and set index =
165 * 0. If there is no new next page, allocate a new page
166 * and link it in. Otherwise, increment index to push a
170 item_page_t *current; /* Pointer to the current page. */
171 item_page_t first; /* First page is statically allocated. */
175 /* Initialize a state object */
177 /* #define NEW_STATE(state) \ */
178 /* memset(&state, 0, (void *)(&state.stack) - (void *)(&state)); \ */
179 /* state.stack.current = &state.stack.first; \ */
180 /* state.stack.first.prev = NULL; \ */
181 /* state.stack.first.next = NULL; \ */
182 /* state.stack.index = 0; \ */
183 /* state.level = 1 */
185 #define NEW_STATE(state, nregs) \
188 for (i = 0; i < nregs; i++) \
190 state.start[i] = NULL; \
191 state.end[i] = NULL; \
192 state.changed[i] = 0; \
194 state.stack.current = &state.stack.first; \
195 state.stack.first.prev = NULL; \
196 state.stack.first.next = NULL; \
197 state.stack.index = 0; \
204 /* Free any memory that might have been malloc'd */
206 #define FREE_STATE(state) \
207 while(state.stack.first.next != NULL) \
209 state.stack.current = state.stack.first.next; \
210 state.stack.first.next = state.stack.current->next; \
211 silc_sfree(bufp->rstack, state.stack.current); \
214 /* Discard the top 'count' stack items. */
216 #define STACK_DISCARD(stack, count, on_error) \
217 stack.index -= count; \
218 while (stack.index < 0) \
220 if (stack.current->prev == NULL) \
222 stack.current = stack.current->prev; \
223 stack.index += STACK_PAGE_SIZE; \
226 /* Store a pointer to the previous item on the stack. Used to pop an
227 * item off of the stack. */
229 #define STACK_PREV(stack, top, on_error) \
230 if (stack.index == 0) \
232 if (stack.current->prev == NULL) \
234 stack.current = stack.current->prev; \
235 stack.index = STACK_PAGE_SIZE - 1; \
241 top = &(stack.current->items[stack.index])
243 /* Store a pointer to the next item on the stack. Used to push an item
244 * on to the stack. */
246 #define STACK_NEXT(stack, top, on_error) \
247 if (stack.index == STACK_PAGE_SIZE) \
249 if (stack.current->next == NULL) \
251 stack.current->next = \
252 (item_page_t *)silc_smalloc(bufp->rstack, sizeof(item_page_t)); \
253 if (stack.current->next == NULL) \
255 stack.current->next->prev = stack.current; \
256 stack.current->next->next = NULL; \
258 stack.current = stack.current->next; \
261 top = &(stack.current->items[stack.index++])
263 /* Store a pointer to the item that is 'count' items back in the
264 * stack. STACK_BACK(stack, top, 1, on_error) is equivalent to
265 * STACK_TOP(stack, top, on_error). */
267 #define STACK_BACK(stack, top, count, on_error) \
270 item_page_t *current; \
271 current = stack.current; \
272 index = stack.index - (count); \
275 if (current->prev == NULL) \
277 current = current->prev; \
278 index += STACK_PAGE_SIZE; \
280 top = &(current->items[index]); \
283 /* Store a pointer to the top item on the stack. Execute the
284 * 'on_error' code if there are no items on the stack. */
286 #define STACK_TOP(stack, top, on_error) \
287 if (stack.index == 0) \
289 if (stack.current->prev == NULL) \
291 top = &(stack.current->prev->items[STACK_PAGE_SIZE - 1]); \
295 top = &(stack.current->items[stack.index - 1]); \
298 /* Test to see if the stack is empty */
300 #define STACK_EMPTY(stack) ((stack.index == 0) && \
301 (stack.current->prev == NULL))
303 /* Return the start of register 'reg' */
305 #define GET_REG_START(state, reg) (state.start[reg])
307 /* Return the end of register 'reg' */
309 #define GET_REG_END(state, reg) (state.end[reg])
311 /* Set the start of register 'reg'. If the state of the register needs
312 * saving, push it on the stack. */
314 #define SET_REG_START(state, reg, text, on_error) \
315 if(state.changed[reg] < state.level) \
318 STACK_NEXT(state.stack, item, on_error); \
319 item->reg.num = reg; \
320 item->reg.start = state.start[reg]; \
321 item->reg.end = state.end[reg]; \
322 item->reg.level = state.changed[reg]; \
323 state.changed[reg] = state.level; \
326 state.start[reg] = text
328 /* Set the end of register 'reg'. If the state of the register needs
329 * saving, push it on the stack. */
331 #define SET_REG_END(state, reg, text, on_error) \
332 if(state.changed[reg] < state.level) \
335 STACK_NEXT(state.stack, item, on_error); \
336 item->reg.num = reg; \
337 item->reg.start = state.start[reg]; \
338 item->reg.end = state.end[reg]; \
339 item->reg.level = state.changed[reg]; \
340 state.changed[reg] = state.level; \
343 state.end[reg] = text
345 #define PUSH_FAILURE(state, xcode, xtext, on_error) \
348 STACK_NEXT(state.stack, item, on_error); \
349 item->fail.code = xcode; \
350 item->fail.text = xtext; \
351 item->fail.count = state.count; \
352 item->fail.level = state.level; \
353 item->fail.phantom = 0; \
359 /* Update the last failure point with a new position in the text. */
361 #define UPDATE_FAILURE(state, xtext, on_error) \
364 STACK_BACK(state.stack, item, state.count + 1, on_error); \
365 if (!item->fail.phantom) \
368 STACK_NEXT(state.stack, item2, on_error); \
369 item2->fail.code = item->fail.code; \
370 item2->fail.text = xtext; \
371 item2->fail.count = state.count; \
372 item2->fail.level = state.level; \
373 item2->fail.phantom = 1; \
380 STACK_DISCARD(state.stack, state.count, on_error); \
381 STACK_TOP(state.stack, item, on_error); \
382 item->fail.text = xtext; \
388 #define POP_FAILURE(state, xcode, xtext, on_empty, on_error) \
393 while(state.count > 0) \
395 STACK_PREV(state.stack, item, on_error); \
396 state.start[item->reg.num] = item->reg.start; \
397 state.end[item->reg.num] = item->reg.end; \
398 state.changed[item->reg.num] = item->reg.level; \
401 STACK_PREV(state.stack, item, on_empty); \
402 xcode = item->fail.code; \
403 xtext = item->fail.text; \
404 state.count = item->fail.count; \
405 state.level = item->fail.level; \
408 while (item->fail.text == NULL); \
411 enum regexp_compiled_ops /* opcodes for compiled regexp */
413 Cend, /* end of pattern reached */
414 Cbol, /* beginning of line */
415 Ceol, /* end of line */
416 Cset, /* character set. Followed by 32 bytes of set. */
417 Cexact, /* followed by a byte to match */
418 Canychar, /* matches any character except newline */
419 Cstart_memory, /* set register start addr (followed by reg number) */
420 Cend_memory, /* set register end addr (followed by reg number) */
421 Cmatch_memory, /* match a duplicate of reg contents (regnum follows)*/
422 Cjump, /* followed by two bytes (lsb,msb) of displacement. */
423 Cstar_jump, /* will change to jump/update_failure_jump at runtime */
424 Cfailure_jump, /* jump to addr on failure */
425 Cupdate_failure_jump, /* update topmost failure point and jump */
426 Cdummy_failure_jump, /* push a dummy failure point and jump */
427 Cbegbuf, /* match at beginning of buffer */
428 Cendbuf, /* match at end of buffer */
429 Cwordbeg, /* match at beginning of word */
430 Cwordend, /* match at end of word */
431 Cwordbound, /* match if at word boundary */
432 Cnotwordbound, /* match if not at word boundary */
433 Csyntaxspec, /* matches syntax code (1 byte follows) */
434 Cnotsyntaxspec, /* matches if syntax code does not match (1 byte foll)*/
438 enum regexp_syntax_op /* syntax codes for plain and quoted characters */
440 Rend, /* special code for end of regexp */
441 Rnormal, /* normal character */
442 Ranychar, /* any character except newline */
443 Rquote, /* the quote character */
444 Rbol, /* match beginning of line */
445 Reol, /* match end of line */
446 Roptional, /* match preceding expression optionally */
447 Rstar, /* match preceding expr zero or more times */
448 Rplus, /* match preceding expr one or more times */
449 Ror, /* match either of alternatives */
450 Ropenpar, /* opening parenthesis */
451 Rclosepar, /* closing parenthesis */
452 Rmemory, /* match memory register */
453 Rextended_memory, /* \vnn to match registers 10-99 */
454 Ropenset, /* open set. Internal syntax hard-coded below. */
455 /* the following are gnu extensions to "normal" regexp syntax */
456 Rbegbuf, /* beginning of buffer */
457 Rendbuf, /* end of buffer */
458 Rwordchar, /* word character */
459 Rnotwordchar, /* not word character */
460 Rwordbeg, /* beginning of word */
461 Rwordend, /* end of word */
462 Rwordbound, /* word bound */
463 Rnotwordbound, /* not word bound */
465 Ropenrep, /* opening bounded repeat */
469 #define Swhitespace 2
471 #define Soctaldigit 8
474 #define NUM_LEVELS 5 /* number of precedence levels in use */
475 #define MAX_NESTING 100 /* max nesting level of operators */
476 #define SYNTAX(ch) silc_re_syntax_table[(unsigned char)(ch)]
478 static int silc_regexp_syntax = RE_SYNTAX_POSIX;
479 static int silc_regexp_context_indep_ops;
480 static int silc_regexp_ansi_sequences;
481 static int silc_re_compile_initialized = 0;
482 static unsigned char silc_re_syntax_table[256];
483 static unsigned char silc_regexp_plain_ops[256];
484 static unsigned char silc_regexp_quoted_ops[256];
485 static unsigned char silc_regexp_precedencess[Rnum_ops];
487 void silc_re_compile_initialize(void)
491 static int syntax_table_inited = 0;
493 if (!syntax_table_inited)
495 syntax_table_inited = 1;
496 memset(silc_re_syntax_table, 0, 256);
497 for (a = 'a'; a <= 'z'; a++)
498 silc_re_syntax_table[a] = Sword;
499 for (a = 'A'; a <= 'Z'; a++)
500 silc_re_syntax_table[a] = Sword;
501 for (a = '0'; a <= '9'; a++)
502 silc_re_syntax_table[a] = Sword | Sdigit | Shexdigit;
503 for (a = '0'; a <= '7'; a++)
504 silc_re_syntax_table[a] |= Soctaldigit;
505 for (a = 'A'; a <= 'F'; a++)
506 silc_re_syntax_table[a] |= Shexdigit;
507 for (a = 'a'; a <= 'f'; a++)
508 silc_re_syntax_table[a] |= Shexdigit;
509 silc_re_syntax_table['_'] = Sword;
510 for (a = 9; a <= 13; a++)
511 silc_re_syntax_table[a] = Swhitespace;
512 silc_re_syntax_table[' '] = Swhitespace;
514 silc_re_compile_initialized = 1;
515 for (a = 0; a < 256; a++)
517 silc_regexp_plain_ops[a] = Rnormal;
518 silc_regexp_quoted_ops[a] = Rnormal;
520 for (a = '0'; a <= '9'; a++)
521 silc_regexp_quoted_ops[a] = Rmemory;
522 silc_regexp_plain_ops['\134'] = Rquote;
523 if (silc_regexp_syntax & RE_NO_BK_PARENS)
525 silc_regexp_plain_ops['('] = Ropenpar;
526 silc_regexp_plain_ops[')'] = Rclosepar;
530 silc_regexp_quoted_ops['('] = Ropenpar;
531 silc_regexp_quoted_ops[')'] = Rclosepar;
533 if (silc_regexp_syntax & RE_NO_BK_VBAR)
534 silc_regexp_plain_ops['\174'] = Ror;
536 silc_regexp_quoted_ops['\174'] = Ror;
537 silc_regexp_plain_ops['*'] = Rstar;
538 if (silc_regexp_syntax & RE_BK_PLUS_QM)
540 silc_regexp_quoted_ops['+'] = Rplus;
541 silc_regexp_quoted_ops['?'] = Roptional;
545 silc_regexp_plain_ops['+'] = Rplus;
546 silc_regexp_plain_ops['?'] = Roptional;
548 if (silc_regexp_syntax & RE_NEWLINE_OR)
549 silc_regexp_plain_ops['\n'] = Ror;
550 silc_regexp_plain_ops['\133'] = Ropenset;
551 silc_regexp_plain_ops['\136'] = Rbol;
552 silc_regexp_plain_ops['$'] = Reol;
553 silc_regexp_plain_ops['.'] = Ranychar;
554 if (!(silc_regexp_syntax & RE_NO_GNU_EXTENSIONS))
556 silc_regexp_quoted_ops['w'] = Rwordchar;
557 silc_regexp_quoted_ops['W'] = Rnotwordchar;
558 silc_regexp_quoted_ops['<'] = Rwordbeg;
559 silc_regexp_quoted_ops['>'] = Rwordend;
560 silc_regexp_quoted_ops['b'] = Rwordbound;
561 silc_regexp_quoted_ops['B'] = Rnotwordbound;
562 silc_regexp_quoted_ops['`'] = Rbegbuf;
563 silc_regexp_quoted_ops['\''] = Rendbuf;
565 if (silc_regexp_syntax & RE_ANSI_HEX)
566 silc_regexp_quoted_ops['v'] = Rextended_memory;
567 for (a = 0; a < Rnum_ops; a++)
568 silc_regexp_precedencess[a] = 4;
569 if (silc_regexp_syntax & RE_TIGHT_VBAR)
571 silc_regexp_precedencess[Ror] = 3;
572 silc_regexp_precedencess[Rbol] = 2;
573 silc_regexp_precedencess[Reol] = 2;
577 silc_regexp_precedencess[Ror] = 2;
578 silc_regexp_precedencess[Rbol] = 3;
579 silc_regexp_precedencess[Reol] = 3;
581 if (silc_regexp_syntax & RE_REPEAT)
583 if (silc_regexp_syntax & RE_NO_BK_PARENS)
585 silc_regexp_plain_ops['{'] = Ropenrep;
589 silc_regexp_quoted_ops['{'] = Ropenrep;
592 silc_regexp_precedencess[Rclosepar] = 1;
593 silc_regexp_precedencess[Rend] = 0;
594 silc_regexp_context_indep_ops = (silc_regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
595 silc_regexp_ansi_sequences = (silc_regexp_syntax & RE_ANSI_HEX) != 0;
598 int silc_re_set_syntax(int syntax)
602 ret = silc_regexp_syntax;
603 silc_regexp_syntax = syntax;
604 silc_re_compile_initialize();
608 static int silc_hex_char_to_decimal(int ch)
610 if (ch >= '0' && ch <= '9')
612 if (ch >= 'a' && ch <= 'f')
613 return ch - 'a' + 10;
614 if (ch >= 'A' && ch <= 'F')
615 return ch - 'A' + 10;
619 static int silc_re_compile_fastmap_aux(unsigned char *code, int pos,
620 unsigned char *visited,
621 unsigned char *can_be_null,
622 unsigned char *fastmap)
629 return 0; /* we have already been here */
632 switch (code[pos++]) {
646 for (a = 0; a < 256; a++)
652 syntaxcode = code[pos++];
653 for (a = 0; a < 256; a++)
654 if (SYNTAX(a) & syntaxcode)
660 syntaxcode = code[pos++];
661 for (a = 0; a < 256; a++)
662 if (!(SYNTAX(a) & syntaxcode) )
669 if (*can_be_null == 0)
670 *can_be_null = 2; /* can match null, but only at end of buffer*/
675 for (a = 0; a < 256/8; a++)
676 if (code[pos + a] != 0)
677 for (b = 0; b < 8; b++)
678 if (code[pos + a] & (1 << b))
679 fastmap[(a << 3) + b] = 1;
685 fastmap[(unsigned char)code[pos]] = 1;
690 for (a = 0; a < 256; a++)
703 for (a = 0; a < 256; a++)
709 case Cdummy_failure_jump:
710 case Cupdate_failure_jump:
713 a = (unsigned char)code[pos++];
714 a |= (unsigned char)code[pos++] << 8;
715 pos += (int)SHORT(a);
718 /* argh... the regexp contains empty loops. This is not
719 good, as this may cause a failure stack overflow when
720 matching. Oh well. */
721 /* this path leads nowhere; pursue other paths. */
729 a = (unsigned char)code[pos++];
730 a |= (unsigned char)code[pos++] << 8;
731 a = pos + (int)SHORT(a);
732 return silc_re_compile_fastmap_aux(code, a, visited,
733 can_be_null, fastmap);
742 silc_set_errno(SILC_ERR_REGEX_OPCODE);
749 static int silc_re_do_compile_fastmap(unsigned char *buffer, int used, int pos,
750 unsigned char *can_be_null,
751 unsigned char *fastmap, SilcRegex bufp)
753 unsigned char small_visited[512], *visited;
756 if (used <= sizeof(small_visited))
757 visited = small_visited;
760 silc_stack_push(bufp->rstack, NULL);
761 visited = silc_smalloc(bufp->rstack, used);
763 silc_stack_pop(bufp->rstack);
768 memset(fastmap, 0, 256);
769 memset(visited, 0, used);
770 ret = silc_re_compile_fastmap_aux(buffer, pos, visited,
771 can_be_null, fastmap);
772 if (visited != small_visited) {
773 silc_sfree(bufp->rstack, visited);
774 silc_stack_pop(bufp->rstack);
779 int silc_re_compile_fastmap(SilcRegex bufp)
781 if (!bufp->fastmap || bufp->fastmap_accurate)
783 SILC_ASSERT(bufp->used > 0);
784 if (!silc_re_do_compile_fastmap(bufp->buffer,
788 bufp->fastmap, bufp))
790 if (bufp->buffer[0] == Cbol)
791 bufp->anchor = 1; /* begline */
793 if (bufp->buffer[0] == Cbegbuf)
794 bufp->anchor = 2; /* begbuf */
796 bufp->anchor = 0; /* none */
798 bufp->fastmap_accurate = 1;
805 * ... code for operand of star
807 * 2: ... code after star
809 * We change the star_jump to update_failure_jump if we can determine
810 * that it is safe to do so; otherwise we change it to an ordinary
817 * 2: ... code for operand of plus
819 * 3: ... code after plus
821 * For star_jump considerations this is processed identically to star.
825 static int silc_re_optimize_star_jump(SilcRegex bufp, unsigned char *code)
827 unsigned char map[256];
828 unsigned char can_be_null;
834 int num_instructions = 0;
836 a = (unsigned char)*code++;
837 a |= (unsigned char)*code++ << 8;
840 p1 = code + a + 3; /* skip the failure_jump */
841 /* Check that the jump is within the pattern */
842 if (p1<bufp->buffer || bufp->buffer+bufp->used<p1)
844 silc_set_errno(SILC_ERR_OVERFLOW);
848 SILC_ASSERT(p1[-3] == Cfailure_jump);
850 /* p1 points inside loop, p2 points to after loop */
851 if (!silc_re_do_compile_fastmap(bufp->buffer, bufp->used,
852 (int)(p2 - bufp->buffer),
853 &can_be_null, map, bufp))
854 goto make_normal_jump;
856 /* If we might introduce a new update point inside the
857 * loop, we can't optimize because then update_jump would
858 * update a wrong failure point. Thus we have to be
859 * quite careful here.
862 /* loop until we find something that consumes a character */
886 ch = (unsigned char)*p1++;
888 goto make_normal_jump;
893 for (b = 0; b < 256; b++)
894 if (b != '\n' && map[b])
895 goto make_normal_jump;
900 for (b = 0; b < 256; b++)
901 if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
902 goto make_normal_jump;
908 goto make_normal_jump;
911 /* now we know that we can't backtrack. */
951 case Cupdate_failure_jump:
952 case Cdummy_failure_jump:
954 goto make_normal_jump;
963 /* make_update_jump: */
965 a += 3; /* jump to after the Cfailure_jump */
966 code[0] = Cupdate_failure_jump;
969 if (num_instructions > 1)
971 SILC_ASSERT(num_instructions == 1);
972 /* if the only instruction matches a single character, we can do
974 p1 = code + 3 + a; /* start of sole instruction */
975 if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar ||
976 *p1 == Csyntaxspec || *p1 == Cnotsyntaxspec)
986 static int silc_re_optimize(SilcRegex bufp)
1022 case Cnotsyntaxspec:
1029 if (!silc_re_optimize_star_jump(bufp, code))
1035 case Cupdate_failure_jump:
1037 case Cdummy_failure_jump:
1052 #define NEXTCHAR(var) \
1055 goto ends_prematurely; \
1056 (var) = regex[pos]; \
1060 #define ALLOC(amount) \
1062 if (pattern_offset+(amount) > alloc) \
1064 pattern = silc_srealloc(bufp->rstack, alloc, pattern, \
1065 alloc + 256 + (amount)); \
1066 alloc += 256 + (amount); \
1068 goto out_of_memory; \
1072 #define STORE(ch) pattern[pattern_offset++] = (ch)
1074 #define CURRENT_LEVEL_START (starts[starts_base + current_level])
1076 #define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
1078 #define PUSH_LEVEL_STARTS \
1079 if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
1080 starts_base += NUM_LEVELS; \
1084 #define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
1086 #define PUT_ADDR(offset,addr) \
1088 int disp = (addr) - (offset) - 2; \
1089 pattern[(offset)] = disp & 0xff; \
1090 pattern[(offset)+1] = (disp>>8) & 0xff; \
1093 #define INSERT_JUMP(pos,type,addr) \
1095 int a, p = (pos), t = (type), ad = (addr); \
1096 for (a = pattern_offset - 1; a >= p; a--) \
1097 pattern[a + 3] = pattern[a]; \
1100 pattern_offset += 3; \
1103 #define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
1105 #define SET_FIELDS \
1107 bufp->allocated = alloc; \
1108 bufp->buffer = pattern; \
1109 bufp->used = pattern_offset; \
1112 #define GETHEX(var) \
1114 unsigned char gethex_ch, gethex_value; \
1115 NEXTCHAR(gethex_ch); \
1116 gethex_value = silc_hex_char_to_decimal(gethex_ch); \
1117 if (gethex_value == 16) \
1119 NEXTCHAR(gethex_ch); \
1120 gethex_ch = silc_hex_char_to_decimal(gethex_ch); \
1121 if (gethex_ch == 16) \
1123 (var) = gethex_value * 16 + gethex_ch; \
1126 #define ANSI_TRANSLATE(ch) \
1133 ch = 7; /* audible bell */ \
1139 ch = 8; /* backspace */ \
1145 ch = 12; /* form feed */ \
1151 ch = 10; /* line feed */ \
1157 ch = 13; /* carriage return */ \
1169 ch = 11; /* vertical tab */ \
1172 case 'x': /* hex code */ \
1180 /* other characters passed through */ \
1182 ch = translate[(unsigned char)ch]; \
1188 SilcResult silc_re_compile_pattern(unsigned char *regex, int size,
1197 int pattern_offset = 0, alloc;
1198 int starts[NUM_LEVELS * MAX_NESTING];
1200 int future_jumps[MAX_NESTING];
1202 unsigned char ch = '\0';
1203 unsigned char *pattern;
1204 unsigned char *translate;
1207 int num_open_registers;
1208 int open_registers[RE_NREGS];
1209 int beginning_context;
1211 if (!silc_re_compile_initialized)
1212 silc_re_compile_initialize();
1214 bufp->fastmap_accurate = 0;
1215 bufp->uses_registers = 1;
1216 bufp->num_registers = 1;
1217 translate = bufp->translate;
1218 pattern = bufp->buffer;
1219 alloc = bufp->allocated;
1220 if (alloc == 0 || pattern == NULL)
1223 pattern = silc_smalloc(bufp->rstack, alloc);
1232 num_open_registers = 0;
1235 beginning_context = 1;
1237 /* we use Rend dummy to ensure that pending jumps are updated
1238 (due to low priority of Rend) before exiting the loop. */
1248 ch = translate[(unsigned char)ch];
1249 op = silc_regexp_plain_ops[(unsigned char)ch];
1253 op = silc_regexp_quoted_ops[(unsigned char)ch];
1254 if (op == Rnormal && silc_regexp_ansi_sequences)
1258 level = silc_regexp_precedencess[op];
1259 /* printf("ch='%c' op=%d level=%d current_level=%d
1260 curlevstart=%d\n", ch, op, level, current_level,
1261 CURRENT_LEVEL_START); */
1262 if (level > current_level)
1264 for (current_level++; current_level < level; current_level++)
1269 if (level < current_level)
1271 current_level = level;
1272 for (;num_jumps > 0 &&
1273 future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
1275 PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
1287 store_opcode_and_arg: /* opcode & ch must be set */
1305 SILC_VERIFY(op != Rquote);
1310 if (!beginning_context) {
1311 if (silc_regexp_context_indep_ops)
1321 if (!((pos >= size) ||
1322 ((silc_regexp_syntax & RE_NO_BK_VBAR) ?
1323 (regex[pos] == '\174') :
1324 (pos+1 < size && regex[pos] == '\134' &&
1325 regex[pos+1] == '\174')) ||
1326 ((silc_regexp_syntax & RE_NO_BK_PARENS)?
1327 (regex[pos] == ')'):
1328 (pos+1 < size && regex[pos] == '\134' &&
1329 regex[pos+1] == ')')))) {
1330 if (silc_regexp_context_indep_ops)
1342 if (beginning_context) {
1343 if (silc_regexp_context_indep_ops)
1348 if (CURRENT_LEVEL_START == pattern_offset)
1349 break; /* ignore empty patterns for ? */
1351 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
1352 pattern_offset + 3);
1358 if (beginning_context) {
1359 if (silc_regexp_context_indep_ops)
1364 if (CURRENT_LEVEL_START == pattern_offset)
1365 break; /* ignore empty patterns for + and * */
1368 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
1369 pattern_offset + 6);
1370 INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
1371 if (op == Rplus) /* jump over initial failure_jump */
1372 INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
1373 CURRENT_LEVEL_START + 6);
1379 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
1380 pattern_offset + 6);
1381 if (num_jumps >= MAX_NESTING)
1384 future_jumps[num_jumps++] = pattern_offset;
1393 if (next_register < RE_NREGS)
1395 bufp->uses_registers = 1;
1397 STORE(Cstart_memory);
1398 STORE(next_register);
1399 open_registers[num_open_registers++] = next_register;
1400 bufp->num_registers++;
1411 if (paren_depth <= 0)
1412 goto parenthesis_error;
1414 current_level = silc_regexp_precedencess[Ropenpar];
1416 if (paren_depth < num_open_registers)
1418 bufp->uses_registers = 1;
1421 num_open_registers--;
1422 STORE(open_registers[num_open_registers]);
1429 goto bad_match_register;
1430 SILC_ASSERT(ch >= '0' && ch <= '9');
1431 bufp->uses_registers = 1;
1432 opcode = Cmatch_memory;
1434 goto store_opcode_and_arg;
1436 case Rextended_memory:
1439 if (ch < '0' || ch > '9')
1440 goto bad_match_register;
1442 if (a < '0' || a > '9')
1443 goto bad_match_register;
1444 ch = 10 * (a - '0') + ch - '0';
1445 if (ch == 0 || ch >= RE_NREGS)
1446 goto bad_match_register;
1447 bufp->uses_registers = 1;
1448 opcode = Cmatch_memory;
1449 goto store_opcode_and_arg;
1462 offset = pattern_offset;
1463 for (a = 0; a < 256/8; a++)
1467 ch = translate[(unsigned char)ch];
1473 ch = translate[(unsigned char)ch];
1480 while (ch != '\135' || firstchar)
1483 if (silc_regexp_ansi_sequences && ch == '\134')
1490 for (a = prev; a <= (int)ch; a++)
1491 SETBIT(pattern, offset, a);
1496 if (prev != -1 && ch == '-')
1500 SETBIT(pattern, offset, ch);
1505 ch = translate[(unsigned char)ch];
1508 SETBIT(pattern, offset, '-');
1511 for (a = 0; a < 256/8; a++)
1512 pattern[offset+a] ^= 0xff;
1518 /* The bounded repeat syntax: a{n}, a{n,} and a{n,m}. The first
1519 is compiled as n-1 Rnormals. The second is compiled as n-1
1520 Rnormals and one Rplus. The third is compiled as n-1 Rnormals
1521 and m-n Rnormals with Roptionals. 0 values have special
1523 int min, max, i, alen = 2;
1526 goto normal_char; /* Consider literal */
1528 /* Get the preceding atom */
1530 goto normal_char; /* Consider literal */
1534 a = translate[(unsigned char)a];
1535 op = silc_regexp_plain_ops[(unsigned char)a];
1537 if (op == Ranychar) {
1548 goto normal_char; /* Consider literal */
1551 while (isdigit(ch)) {
1558 goto repeat_value_error;
1564 /* Will not do any matching with a{0} */
1565 pattern_offset -= 2;
1569 /* Store min - 1 many Cexacts. */
1570 for (i = 0; i < min - 1; i++) {
1575 STORE((unsigned char)a);
1584 /* The a{n,} case */
1587 /* Store Rstar with a{0,} */
1592 /* Store min - 1 many Cexacts. */
1593 for (i = 0; i < min - 1; i++) {
1598 STORE((unsigned char)a);
1606 /* The a{n,m} case */
1610 goto repeat_value_error;
1613 while (isdigit(ch)) {
1620 goto repeat_value_error;
1622 goto repeat_value_error;
1625 /* Will not do any matching with a{0,0} */
1626 pattern_offset -= 2;
1634 /* Only optional matching with a{0,m}. */
1635 pattern_offset -= 2;
1637 /* Store min - 1 many Cexacts. */
1638 for (i = 0; min && i < min - 1; i++) {
1643 STORE((unsigned char)a);
1646 /* Store max - min Cexacts and Roptionals. */
1647 for (i = 0; i < max - min; i++) {
1652 STORE((unsigned char)a);
1654 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
1655 pattern_offset + 3);
1674 opcode = Csyntaxspec;
1676 goto store_opcode_and_arg;
1680 opcode = Cnotsyntaxspec;
1682 goto store_opcode_and_arg;
1696 opcode = Cwordbound;
1701 opcode = Cnotwordbound;
1709 beginning_context = (op == Ropenpar || op == Ror);
1711 if (starts_base != 0)
1712 goto parenthesis_error;
1713 SILC_ASSERT(num_jumps == 0);
1717 if (!silc_re_optimize(bufp))
1723 return SILC_ERR_REGEX_SPECIAL;
1727 return SILC_ERR_REGEX_REG;
1731 return SILC_ERR_REGEX_HEX;
1735 return SILC_ERR_REGEX_PAREN;
1739 return SILC_ERR_OUT_OF_MEMORY;
1743 return SILC_ERR_OVERFLOW;
1747 return SILC_ERR_REGEX_TOO_COMPLEX;
1751 return SILC_ERR_REGEX_REPEAT;
1759 #undef CURRENT_LEVEL_START
1760 #undef SET_LEVEL_START
1761 #undef PUSH_LEVEL_STARTS
1762 #undef POP_LEVEL_STARTS
1768 #define PREFETCH if (text == textend) goto fail
1770 #define NEXTCHAR(var) \
1772 var = (unsigned char)*text++; \
1774 var = translate[var]
1776 int silc_re_match(SilcRegex bufp, unsigned char *string, int size, int pos,
1777 regexp_registers_t old_regs, unsigned int flags)
1779 unsigned char *code;
1780 unsigned char *translate;
1781 unsigned char *text;
1782 unsigned char *textstart;
1783 unsigned char *textend;
1789 unsigned char *regstart;
1790 unsigned char *regend;
1794 SILC_ASSERT(pos >= 0 && size >= 0);
1795 SILC_ASSERT(pos <= size);
1797 text = string + pos;
1799 textend = string + size;
1801 code = bufp->buffer;
1803 translate = bufp->translate;
1805 NEW_STATE(state, bufp->num_registers);
1812 match_end = text - textstart;
1815 old_regs->start[0] = pos;
1816 old_regs->end[0] = match_end;
1817 if (!bufp->uses_registers)
1819 for (a = 1; a < RE_NREGS; a++)
1821 old_regs->start[a] = -1;
1822 old_regs->end[a] = -1;
1827 for (a = 1; a < bufp->num_registers; a++)
1829 if ((GET_REG_START(state, a) == NULL) ||
1830 (GET_REG_END(state, a) == NULL))
1832 old_regs->start[a] = -1;
1833 old_regs->end[a] = -1;
1836 old_regs->start[a] = GET_REG_START(state, a) - textstart;
1837 old_regs->end[a] = GET_REG_END(state, a) - textstart;
1839 for (; a < RE_NREGS; a++)
1841 old_regs->start[a] = -1;
1842 old_regs->end[a] = -1;
1847 return match_end - pos;
1851 if (text == textstart || text[-1] == '\n') {
1852 if (flags & RE_NOTBOL)
1854 goto continue_matching;
1860 if (text == textend || *text == '\n') {
1861 if (flags & RE_NOTEOL)
1863 goto continue_matching;
1870 if (code[ch/8] & (1<<(ch & 7)))
1873 goto continue_matching;
1880 if (ch != (unsigned char)*code++)
1882 goto continue_matching;
1889 goto continue_matching;
1894 SET_REG_START(state, reg, text, goto error);
1895 goto continue_matching;
1900 SET_REG_END(state, reg, text, goto error);
1901 goto continue_matching;
1906 regstart = GET_REG_START(state, reg);
1907 regend = GET_REG_END(state, reg);
1908 if ((regstart == NULL) || (regend == NULL))
1909 goto fail; /* or should we just match nothing? */
1910 regsize = regend - regstart;
1912 if (regsize > (textend - text))
1916 for (; regstart < regend; regstart++, text++)
1917 if (translate[*regstart] != translate[*text])
1921 for (; regstart < regend; regstart++, text++)
1922 if (*regstart != *text)
1924 goto continue_matching;
1926 case Cupdate_failure_jump:
1928 UPDATE_FAILURE(state, text, goto error);
1929 /* fall to next case */
1931 /* treat Cstar_jump just like Cjump if it hasn't been optimized */
1935 a = (unsigned char)*code++;
1936 a |= (unsigned char)*code++ << 8;
1937 code += (int)SHORT(a);
1938 if (code<bufp->buffer || bufp->buffer+bufp->used<code) {
1939 silc_set_errno(SILC_ERR_OVERFLOW);
1943 goto continue_matching;
1945 case Cdummy_failure_jump:
1947 unsigned char *failuredest;
1949 a = (unsigned char)*code++;
1950 a |= (unsigned char)*code++ << 8;
1952 SILC_ASSERT(*code == Cfailure_jump);
1953 b = (unsigned char)code[1];
1954 b |= (unsigned char)code[2] << 8;
1955 failuredest = code + (int)SHORT(b) + 3;
1956 if (failuredest<bufp->buffer || bufp->buffer+bufp->used < failuredest) {
1957 silc_set_errno(SILC_ERR_OVERFLOW);
1961 PUSH_FAILURE(state, failuredest, NULL, goto error);
1963 if (code<bufp->buffer || bufp->buffer+bufp->used < code) {
1964 silc_set_errno(SILC_ERR_OVERFLOW);
1968 goto continue_matching;
1972 a = (unsigned char)*code++;
1973 a |= (unsigned char)*code++ << 8;
1975 if (code+a<bufp->buffer || bufp->buffer+bufp->used < code+a) {
1976 silc_set_errno(SILC_ERR_OVERFLOW);
1980 PUSH_FAILURE(state, code + a, text, goto error);
1981 goto continue_matching;
1985 unsigned char *pinst;
1986 a = (unsigned char)*code++;
1987 a |= (unsigned char)*code++ << 8;
1990 if (pinst<bufp->buffer || bufp->buffer+bufp->used<pinst) {
1991 silc_set_errno(SILC_ERR_OVERFLOW);
1995 /* pinst is sole instruction in loop, and it matches a
1996 * single character. Since Crepeat1 was originally a
1997 * Cupdate_failure_jump, we also know that backtracking
1998 * is useless: so long as the single-character
1999 * expression matches, it must be used. Also, in the
2000 * case of +, we've already matched one character, so +
2001 * can't fail: nothing here can cause a failure. */
2008 while (text < textend)
2010 ch = translate[(unsigned char)*text];
2011 if (pinst[ch/8] & (1<<(ch & 7)))
2019 while (text < textend)
2021 ch = (unsigned char)*text;
2022 if (pinst[ch/8] & (1<<(ch & 7)))
2032 ch = (unsigned char)*pinst;
2035 while (text < textend &&
2036 translate[(unsigned char)*text] == ch)
2041 while (text < textend && (unsigned char)*text == ch)
2048 while (text < textend && (unsigned char)*text != '\n')
2054 a = (unsigned char)*pinst;
2057 while (text < textend &&
2058 (SYNTAX(translate[*text]) & a) )
2063 while (text < textend && (SYNTAX(*text) & a) )
2068 case Cnotsyntaxspec:
2070 a = (unsigned char)*pinst;
2073 while (text < textend &&
2074 !(SYNTAX(translate[*text]) & a) )
2079 while (text < textend && !(SYNTAX(*text) & a) )
2087 silc_set_errno(SILC_ERR_REGEX_OPCODE);
2092 /* due to the funky way + and * are compiled, the top
2093 * failure- stack entry at this point is actually a
2094 * success entry -- update it & pop it */
2095 UPDATE_FAILURE(state, text, goto error);
2096 goto fail; /* i.e., succeed <wink/sigh> */
2100 if (text == textstart)
2101 goto continue_matching;
2106 if (text == textend)
2107 goto continue_matching;
2112 if (text == textend)
2114 if (!(SYNTAX(*text) & Sword))
2116 if (text == textstart)
2117 goto continue_matching;
2118 if (!(SYNTAX(text[-1]) & Sword))
2119 goto continue_matching;
2124 if (text == textstart)
2126 if (!(SYNTAX(text[-1]) & Sword))
2128 if (text == textend)
2129 goto continue_matching;
2130 if (!(SYNTAX(*text) & Sword))
2131 goto continue_matching;
2136 /* Note: as in gnu regexp, this also matches at the
2137 * beginning and end of buffer. */
2139 if (text == textstart || text == textend)
2140 goto continue_matching;
2141 if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
2142 goto continue_matching;
2147 /* Note: as in gnu regexp, this never matches at the
2148 * beginning and end of buffer. */
2149 if (text == textstart || text == textend)
2151 if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
2152 goto continue_matching;
2158 if (!(SYNTAX(ch) & (unsigned char)*code++))
2160 goto continue_matching;
2162 case Cnotsyntaxspec:
2165 if (SYNTAX(ch) & (unsigned char)*code++)
2167 goto continue_matching;
2172 silc_set_errno(SILC_ERR_REGEX_OPCODE);
2178 /* Using "break;" in the above switch statement is equivalent to
2181 POP_FAILURE(state, code, text, goto done_matching, goto error);
2182 goto continue_matching;
2185 /* if(translated != NULL) */
2186 /* free(translated); */
2191 /* if (translated != NULL) */
2192 /* free(translated); */
2200 int silc_re_search(SilcRegex bufp, unsigned char *string, int size, int pos,
2201 int range, regexp_registers_t regs, unsigned int flags)
2203 unsigned char *fastmap;
2204 unsigned char *translate;
2205 unsigned char *text;
2206 unsigned char *partstart;
2207 unsigned char *partend;
2210 unsigned char anchor;
2212 SILC_ASSERT(size >= 0 && pos >= 0);
2213 SILC_ASSERT(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
2215 fastmap = bufp->fastmap;
2216 translate = bufp->translate;
2217 if (fastmap && !bufp->fastmap_accurate) {
2218 if (silc_re_compile_fastmap(bufp))
2222 anchor = bufp->anchor;
2223 if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
2241 for (; range >= 0; range--, pos += dir)
2246 { /* searching forwards */
2248 text = string + pos;
2249 partend = string + size;
2252 while (text != partend &&
2253 !fastmap[(unsigned char) translate[(unsigned char)*text]])
2256 while (text != partend && !fastmap[(unsigned char)*text])
2258 pos += text - partstart;
2259 range -= text - partstart;
2260 if (pos == size && bufp->can_be_null == 0)
2264 { /* searching backwards */
2265 text = string + pos;
2266 partstart = string + pos - range;
2269 while (text != partstart &&
2270 !fastmap[(unsigned char)
2271 translate[(unsigned char)*text]])
2274 while (text != partstart &&
2275 !fastmap[(unsigned char)*text])
2277 pos -= partend - text;
2278 range -= partend - text;
2282 { /* anchored to begline */
2283 if (pos > 0 && (string[pos - 1] != '\n'))
2286 SILC_ASSERT(pos >= 0 && pos <= size);
2287 ret = silc_re_match(bufp, string, size, pos, regs, flags);
2296 /****************************** SILC Regex API ******************************/
2298 /* Compile regular expression */
2300 SilcBool silc_regex_compile(SilcRegex regexp, const char *regex,
2301 SilcRegexFlags flags)
2305 if (!regexp || !regex) {
2306 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
2310 memset(regexp, 0, sizeof(*regexp));
2312 /* Get global stack, if set, and create child stack. */
2313 regexp->rstack = silc_stack_get_global();
2315 regexp->rstack = silc_stack_alloc(512, regexp->rstack);
2318 ret = silc_re_compile_pattern((unsigned char *)regex, strlen(regex), regexp);
2320 silc_set_errno(ret);
2322 if (ret != SILC_OK) {
2323 silc_regex_free(regexp);
2324 regexp->rstack = NULL;
2325 regexp->buffer = NULL;
2328 return ret == SILC_OK;
2331 /* Match compiled regular expression */
2333 SilcBool silc_regex_match(SilcRegex regexp, const char *string,
2334 SilcUInt32 string_len, SilcUInt32 num_match,
2335 SilcRegexMatch match, SilcRegexFlags flags)
2337 struct re_registers regs;
2341 if (!regexp || !string) {
2342 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
2346 if (num_match && !match) {
2347 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
2351 /* Internal limit for maximum number of registers */
2352 if (num_match > RE_NREGS)
2353 num_match = RE_NREGS;
2356 if (flags & SILC_REGEX_NOTBOL)
2358 if (flags & SILC_REGEX_NOTEOL)
2362 ret = silc_re_search(regexp, (unsigned char *)string, string_len, 0,
2363 string_len, num_match ? ®s : NULL, f);
2366 silc_set_errno(SILC_ERR_NOT_FOUND);
2370 /* Return matches */
2371 for (i = 0; i < num_match; i++) {
2372 match[i].start = regs.start[i];
2373 match[i].end = regs.end[i];
2382 void silc_regex_free(SilcRegex regexp)
2384 silc_sfree(regexp->rstack, regexp->buffer);
2385 silc_stack_free(regexp->rstack);
2390 SilcBool silc_regex_va(const char *string, SilcUInt32 string_len,
2391 const char *regex, SilcBuffer match, va_list va)
2393 SilcRegexStruct reg;
2394 SilcRegexMatch m = NULL;
2395 SilcBuffer buf, *rets = NULL;
2400 if (!silc_regex_compile(®, regex, 0))
2404 silc_stack_push(stack, NULL);
2406 /* Get match pointers */
2408 rets = silc_smalloc(stack, sizeof(*rets));
2410 silc_stack_pop(stack);
2411 silc_regex_free(®);
2416 while ((buf = va_arg(va, SilcBuffer))) {
2417 rets = silc_srealloc(stack, c * sizeof(*rets),
2418 rets, (c + 1) * sizeof(*rets));
2420 silc_stack_pop(stack);
2421 silc_regex_free(®);
2427 m = silc_smalloc(stack, c * sizeof(*m));
2429 silc_sfree(stack, rets);
2430 silc_stack_pop(stack);
2431 silc_regex_free(®);
2437 if (!silc_regex_match(®, string, string_len, c, m, 0)) {
2438 silc_sfree(stack, m);
2439 silc_sfree(stack, rets);
2440 silc_stack_pop(stack);
2441 silc_regex_free(®);
2445 /* Return matches */
2446 for (i = 0; i < c; i++) {
2447 if (m[i].start == -1) {
2448 silc_buffer_set(rets[i], NULL, 0);
2451 silc_buffer_set(rets[i], (unsigned char *)string + m[i].start,
2452 m[i].end - m[i].start);
2455 silc_sfree(stack, m);
2456 silc_sfree(stack, rets);
2457 silc_stack_pop(stack);
2458 silc_regex_free(®);
2465 SilcBool silc_regex(const char *string, const char *regex,
2466 SilcBuffer match, ...)
2471 if (!string || !regex) {
2472 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
2477 va_start(va, match);
2479 ret = silc_regex_va(string, strlen(string), regex, match, va);
2489 SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex,
2490 SilcBuffer match, ...)
2495 if (!buffer || !regex) {
2496 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
2501 va_start(va, match);
2503 ret = silc_regex_va((const char *)silc_buffer_data(buffer),
2504 silc_buffer_len(buffer), regex, match, va);
2512 /***************************** Substitution API *****************************/
2514 /* Regexp to parse sed substitution command syntax */
2515 #define SILC_REGEXP_SUBST \
2516 "^(/?.+/?[^!s]|[0-9]+|\\$)?(!?s)(/)(.*[^\\])?(/)(.*[^\\])?(/)(!?.+)?"
2518 /* Substitution context */
2520 SilcInt32 addr_number; /* Line number to match, -1 for last line */
2521 SilcUInt32 line; /* Current line number */
2522 char *str_regexp; /* REGEXP to match */
2523 SilcBufferRegexFlags match_flags; /* Match flags */
2524 SilcBufferRegexFlags addr_flags; /* ADDR flags */
2525 SilcBuffer rep; /* REPLACEMENT */
2528 /* Function to check the ADDR match and do rest of the match and
2531 static int silc_subst_addr(SilcStack stack, SilcBuffer buffer, void *value,
2534 SilcSubstContext *ctx = context;
2538 /* If NUMBER was set in ADDR, match for specific line number */
2539 if (ctx->addr_number > 0 && ctx->addr_number != ctx->line &&
2540 !(ctx->addr_flags & SILC_STR_REGEX_NOT))
2542 if (ctx->addr_number > 0 && ctx->addr_number == ctx->line &&
2543 ctx->addr_flags & SILC_STR_REGEX_NOT)
2546 /* Check for last line if ADDR was '$' */
2547 if (buffer->tail != buffer->end && ctx->addr_number == -1 &&
2548 !(ctx->addr_flags & SILC_STR_REGEX_NOT))
2550 if (buffer->tail == buffer->end && ctx->addr_number == -1 &&
2551 ctx->addr_flags & SILC_STR_REGEX_NOT)
2554 /* Match and replace */
2555 return silc_buffer_format(buffer,
2556 SILC_STR_REGEX(ctx->str_regexp, ctx->match_flags),
2557 SILC_STR_REPLACE(silc_buffer_data(ctx->rep) ?
2558 silc_buffer_data(ctx->rep) :
2559 (unsigned char *)"",
2560 silc_buffer_len(ctx->rep)),
2561 SILC_STR_END, SILC_STR_END);
2564 /* Matching and substitution ala sed. */
2566 SilcBool silc_subst(SilcBuffer buffer, const char *subst)
2568 SilcSubstContext ctx;
2569 SilcBufferStruct match, addr, command, exp_start, exp, exp_end;
2570 SilcBufferStruct rep, rep_end, flags;
2571 SilcBufferRegexFlags addr_flags = 0, match_flags = 0;
2572 char *str_addr = "";
2575 memset(&ctx, 0, sizeof(ctx));
2577 if (!buffer || !subst) {
2578 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
2582 SILC_LOG_DEBUG(("Substitution '%s'", subst));
2584 /* Parse the expression syntax */
2585 if (!silc_regex(subst, SILC_REGEXP_SUBST, &match, &addr, &command,
2586 &exp_start, &exp, &exp_end, &rep, &rep_end, &flags, NULL)) {
2587 silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid substitution expression");
2591 /* Check address syntax */
2592 if (silc_buffer_len(&addr)) {
2593 if (*silc_buffer_data(&addr) == '/') {
2594 silc_buffer_pull(&addr, 1);
2595 if (addr.tail[-1] != '/') {
2596 silc_set_errno_reason(SILC_ERR_SYNTAX,
2597 "Invalid address syntax, missing '/'");
2600 silc_buffer_push_tail(&addr, 1);
2602 if (!silc_buffer_len(&addr)) {
2603 silc_set_errno_reason(SILC_ERR_SYNTAX,
2604 "Invalid address syntax, missing regular "
2608 str_addr = silc_memdup(silc_buffer_data(&addr),
2609 silc_buffer_len(&addr));
2611 } else if (*silc_buffer_data(&addr) == '$' &&
2612 silc_buffer_len(&addr) == 1) {
2613 ctx.addr_number = -1;
2615 } else if (isdigit((int)*silc_buffer_data(&addr))) {
2616 ctx.addr_number = *silc_buffer_data(&addr) - '0';
2617 silc_buffer_pull(&addr, 1);
2618 while (silc_buffer_len(&addr) &&
2619 isdigit((int)*silc_buffer_data(&addr))) {
2620 ctx.addr_number *= 10;
2621 ctx.addr_number += *silc_buffer_data(&addr) - '0';
2622 silc_buffer_pull(&addr, 1);
2625 if (silc_buffer_len(&addr)) {
2626 silc_set_errno_reason(SILC_ERR_SYNTAX,
2627 "Invalid address syntax, not a number");
2631 if (ctx.addr_number == 0) {
2632 silc_set_errno_reason(SILC_ERR_SYNTAX,
2633 "Invalid address syntax, line address is 0");
2638 silc_set_errno_reason(SILC_ERR_SYNTAX, "Unsupported address syntax");
2643 /* Check command syntax */
2644 if (!silc_buffer_len(&command) || silc_buffer_len(&command) > 2) {
2645 silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid commmand");
2648 if ((silc_buffer_len(&command) == 1 &&
2649 !silc_buffer_memcmp(&command, "s", 1)) ||
2650 (silc_buffer_len(&command) == 2 &&
2651 !silc_buffer_memcmp(&command, "!s", 2))) {
2652 silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid command");
2655 if (silc_buffer_len(&command) == 2)
2656 addr_flags |= SILC_STR_REGEX_NOT;
2658 /* Check REGEXP syntax */
2659 if (!silc_buffer_len(&exp_start) ||
2660 !silc_buffer_memcmp(&exp_start, "/", 1)) {
2661 silc_set_errno_reason(SILC_ERR_SYNTAX,
2662 "Invalid substitution syntax, missing '/'");
2665 if (!silc_buffer_len(&exp_end) ||
2666 !silc_buffer_memcmp(&exp_end, "/", 1)) {
2667 silc_set_errno_reason(SILC_ERR_SYNTAX,
2668 "Invalid substitution syntax, missing '/'");
2672 /* Check FLAGS syntax */
2673 if (silc_buffer_len(&flags)) {
2674 if (silc_buffer_len(&flags) > 1) {
2675 silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid flags");
2679 /* Check supported flags */
2680 if (silc_buffer_len(&flags) == 1) {
2681 if (silc_buffer_memcmp(&flags, "g", 1)) {
2682 match_flags |= SILC_STR_REGEX_ALL;
2684 silc_set_errno_reason(SILC_ERR_SYNTAX, "Unsupported flag");
2691 match_flags |= SILC_STR_REGEX_INCLUSIVE;
2692 addr_flags |= SILC_STR_REGEX_NL | SILC_STR_REGEX_NO_ADVANCE;
2694 ctx.str_regexp = silc_memdup(silc_buffer_data(&exp),
2695 silc_buffer_len(&exp));
2696 ctx.addr_flags = addr_flags;
2697 ctx.match_flags = match_flags;
2699 /* Unescape escapes from REPLACEMENT */
2700 ctx.rep = silc_buffer_copy(&rep);
2703 if (silc_buffer_len(ctx.rep))
2704 silc_buffer_format(ctx.rep,
2705 SILC_STR_REGEX("\\\\/", (SILC_STR_REGEX_ALL |
2706 SILC_STR_REGEX_INCLUSIVE)),
2707 SILC_STR_REPLACE("/", 1),
2708 SILC_STR_END, SILC_STR_END);
2710 /* If NUMBER or $ is specified, handle NOT flag in the silc_subst_addr */
2711 if (ctx.addr_number)
2712 addr_flags &= ~SILC_STR_REGEX_NOT;
2714 SILC_LOG_DEBUG(("ADDR '%s' flags 0x%x, NUMBER %d", str_addr, addr_flags,
2716 SILC_LOG_DEBUG(("REGEXP '%s' flags 0x%x", ctx.str_regexp, match_flags));
2718 /* Match and replace */
2719 ret = silc_buffer_format(buffer,
2720 SILC_STR_REGEX(str_addr, addr_flags),
2721 SILC_STR_FUNC(silc_subst_addr, NULL, &ctx),
2722 SILC_STR_END, SILC_STR_END);
2725 if (str_addr && strlen(str_addr))
2726 silc_free(str_addr);
2727 silc_free(ctx.str_regexp);
2728 silc_buffer_free(ctx.rep);
2730 return ret >= 0 ? TRUE : FALSE;