5 Author: Tatu Ylonen <ylo@ngs.fi>
7 Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
9 Permission to use, copy, modify, distribute, and sell this software
10 and its documentation is hereby granted without fee, provided that the
11 above copyright notice appears in all source code copies, the name of
12 Tatu Ylonen is not used to advertise products containing this software
13 or a derivation thereof, and all modified versions are clearly marked
16 This software is provided "as is" without express or implied warranty.
18 Created: Thu Sep 26 17:14:05 1991 ylo
19 Last modified: Sun Mar 29 16:47:31 1992 ylo
21 This code draws many ideas from the regular expression packages by
22 Henry Spencer of the University of Toronto and Richard Stallman of the
23 Free Software Foundation.
25 Emacs-specific code and syntax table code is almost directly borrowed
28 The SILC Regex API by Pekka Riikonen, under the same license as the original
35 /* Modified for use in SILC Runtime Toolkit. I think we have disabled many
36 features we could use, for the sake of simple API, which we may want to
39 #define RE_NREGS 128 /* number of registers available */
41 /* bit definitions for syntax */
42 #define RE_NO_BK_PARENS 1 /* no quoting for parentheses */
43 #define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */
44 #define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */
45 #define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */
46 #define RE_NEWLINE_OR 16 /* treat newline as or */
47 #define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */
48 #define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */
49 #define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */
51 /* definitions for some common regexp styles */
52 #define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS)
53 #define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR)
54 #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
55 #define RE_SYNTAX_EMACS 0
58 typedef struct re_registers {
59 int start[RE_NREGS]; /* start offset of region */
60 int end[RE_NREGS]; /* end offset of region */
61 } *regexp_registers_t;
63 int re_set_syntax(int syntax);
64 /* This sets the syntax to use and returns the previous syntax. The
65 syntax is specified by a bit mask of the above defined bits. */
67 SilcResult re_compile_pattern(char *regex, int regex_size, SilcRegex compiled);
68 /* This compiles the regexp (given in regex and length in regex_size).
69 This returns NULL if the regexp compiled successfully, and an error
70 message if an error was encountered. The buffer field must be
71 initialized to a memory area allocated by malloc (or to NULL) before
72 use, and the allocated field must be set to its length (or 0 if buffer is
73 NULL). Also, the translate field must be set to point to a valid
74 translation table, or NULL if it is not used. */
76 int re_match(SilcRegex compiled, char *string, int size, int pos,
77 regexp_registers_t regs);
78 /* This tries to match the regexp against the string. This returns the
79 length of the matched portion, or -1 if the pattern could not be
80 matched and -2 if an error (such as failure stack overflow) is
83 int re_match_2(SilcRegex compiled, char *string1, int size1,
84 char *string2, int size2, int pos, regexp_registers_t regs,
86 /* This tries to match the regexp to the concatenation of string1 and
87 string2. This returns the length of the matched portion, or -1 if the
88 pattern could not be matched and -2 if an error (such as failure stack
89 overflow) is encountered. */
91 int re_search(SilcRegex compiled, char *string, int size, int startpos,
92 int range, regexp_registers_t regs);
93 /* This rearches for a substring matching the regexp. This returns the first
94 index at which a match is found. range specifies at how many positions to
95 try matching; positive values indicate searching forwards, and negative
96 values indicate searching backwards. mstop specifies the offset beyond
97 which a match must not go. This returns -1 if no match is found, and
98 -2 if an error (such as failure stack overflow) is encountered. */
100 int re_search_2(SilcRegex compiled, char *string1, int size1,
101 char *string2, int size2, int startpos, int range,
102 regexp_registers_t regs, int mstop);
103 /* This is like re_search, but search from the concatenation of string1 and
106 void re_compile_fastmap(SilcRegex compiled);
107 /* This computes the fastmap for the regexp. For this to have any effect,
108 the calling program must have initialized the fastmap field to point
109 to an array of 256 characters. */
111 #define MACRO_BEGIN do {
112 #define MACRO_END } while (0)
114 enum regexp_compiled_ops /* opcodes for compiled regexp */
116 Cend, /* end of pattern reached */
117 Cbol, /* beginning of line */
118 Ceol, /* end of line */
119 Cset, /* character set. Followed by 32 bytes of set. */
120 Cexact, /* followed by a byte to match */
121 Canychar, /* matches any character except newline */
122 Cstart_memory, /* set register start addr (followed by reg number) */
123 Cend_memory, /* set register end addr (followed by reg number) */
124 Cmatch_memory, /* match a duplicate of reg contents (regnum follows)*/
125 Cjump, /* followed by two bytes (lsb,msb) of displacement. */
126 Cstar_jump, /* will change to jump/update_failure_jump at runtime */
127 Cfailure_jump, /* jump to addr on failure */
128 Cupdate_failure_jump, /* update topmost failure point and jump */
129 Cdummy_failure_jump, /* push a dummy failure point and jump */
130 Cbegbuf, /* match at beginning of buffer */
131 Cendbuf, /* match at end of buffer */
132 Cwordbeg, /* match at beginning of word */
133 Cwordend, /* match at end of word */
134 Cwordbound, /* match if at word boundary */
135 Cnotwordbound, /* match if not at word boundary */
137 Cemacs_at_dot, /* emacs only: matches at dot */
139 Csyntaxspec, /* matches syntax code (1 byte follows) */
140 Cnotsyntaxspec /* matches if syntax code does not match (1 byte foll)*/
143 enum regexp_syntax_op /* syntax codes for plain and quoted characters */
145 Rend, /* special code for end of regexp */
146 Rnormal, /* normal character */
147 Ranychar, /* any character except newline */
148 Rquote, /* the quote character */
149 Rbol, /* match beginning of line */
150 Reol, /* match end of line */
151 Roptional, /* match preceding expression optionally */
152 Rstar, /* match preceding expr zero or more times */
153 Rplus, /* match preceding expr one or more times */
154 Ror, /* match either of alternatives */
155 Ropenpar, /* opening parenthesis */
156 Rclosepar, /* closing parenthesis */
157 Rmemory, /* match memory register */
158 Rextended_memory, /* \vnn to match registers 10-99 */
159 Ropenset, /* open set. Internal syntax hard-coded below. */
160 /* the following are gnu extensions to "normal" regexp syntax */
161 Rbegbuf, /* beginning of buffer */
162 Rendbuf, /* end of buffer */
163 Rwordchar, /* word character */
164 Rnotwordchar, /* not word character */
165 Rwordbeg, /* beginning of word */
166 Rwordend, /* end of word */
167 Rwordbound, /* word bound */
168 Rnotwordbound, /* not word bound */
170 Remacs_at_dot, /* emacs: at dot */
171 Remacs_syntaxspec, /* syntaxspec */
172 Remacs_notsyntaxspec, /* notsyntaxspec */
177 static int re_compile_initialized = 0;
178 static int regexp_syntax = 0;
179 static unsigned char regexp_plain_ops[256];
180 static unsigned char regexp_quoted_ops[256];
181 static unsigned char regexp_precedences[Rnum_ops];
182 static int regexp_context_indep_ops;
183 static int regexp_ansi_sequences;
185 #define NUM_LEVELS 5 /* number of precedence levels in use */
186 #define MAX_NESTING 100 /* max nesting level of operators */
190 /* This code is for emacs compatibility only. */
197 /* emacs defines NULL in some strange way? */
203 #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
207 char *re_syntax_table;
209 static char re_syntax_table[256];
210 #endif /* SYNTAX_TABLE */
214 static void re_compile_initialize()
218 #if !defined(emacs) && !defined(SYNTAX_TABLE)
219 static int syntax_table_inited = 0;
221 if (!syntax_table_inited)
223 syntax_table_inited = 1;
224 memset(re_syntax_table, 0, 256);
225 for (a = 'a'; a <= 'z'; a++)
226 re_syntax_table[a] = Sword;
227 for (a = 'A'; a <= 'Z'; a++)
228 re_syntax_table[a] = Sword;
229 for (a = '0'; a <= '9'; a++)
230 re_syntax_table[a] = Sword;
232 #endif /* !emacs && !SYNTAX_TABLE */
233 re_compile_initialized = 1;
234 for (a = 0; a < 256; a++)
236 regexp_plain_ops[a] = Rnormal;
237 regexp_quoted_ops[a] = Rnormal;
239 for (a = '0'; a <= '9'; a++)
240 regexp_quoted_ops[a] = Rmemory;
241 regexp_plain_ops['\134'] = Rquote;
242 if (regexp_syntax & RE_NO_BK_PARENS)
244 regexp_plain_ops['('] = Ropenpar;
245 regexp_plain_ops[')'] = Rclosepar;
249 regexp_quoted_ops['('] = Ropenpar;
250 regexp_quoted_ops[')'] = Rclosepar;
252 if (regexp_syntax & RE_NO_BK_VBAR)
253 regexp_plain_ops['\174'] = Ror;
255 regexp_quoted_ops['\174'] = Ror;
256 regexp_plain_ops['*'] = Rstar;
257 if (regexp_syntax & RE_BK_PLUS_QM)
259 regexp_quoted_ops['+'] = Rplus;
260 regexp_quoted_ops['?'] = Roptional;
264 regexp_plain_ops['+'] = Rplus;
265 regexp_plain_ops['?'] = Roptional;
267 if (regexp_syntax & RE_NEWLINE_OR)
268 regexp_plain_ops['\n'] = Ror;
269 regexp_plain_ops['\133'] = Ropenset;
270 regexp_plain_ops['\136'] = Rbol;
271 regexp_plain_ops['$'] = Reol;
272 regexp_plain_ops['.'] = Ranychar;
273 if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
276 regexp_quoted_ops['='] = Remacs_at_dot;
277 regexp_quoted_ops['s'] = Remacs_syntaxspec;
278 regexp_quoted_ops['S'] = Remacs_notsyntaxspec;
280 regexp_quoted_ops['w'] = Rwordchar;
281 regexp_quoted_ops['W'] = Rnotwordchar;
282 regexp_quoted_ops['<'] = Rwordbeg;
283 regexp_quoted_ops['>'] = Rwordend;
284 regexp_quoted_ops['b'] = Rwordbound;
285 regexp_quoted_ops['B'] = Rnotwordbound;
286 regexp_quoted_ops['`'] = Rbegbuf;
287 regexp_quoted_ops['\''] = Rendbuf;
289 if (regexp_syntax & RE_ANSI_HEX)
290 regexp_quoted_ops['v'] = Rextended_memory;
291 for (a = 0; a < Rnum_ops; a++)
292 regexp_precedences[a] = 4;
293 if (regexp_syntax & RE_TIGHT_VBAR)
295 regexp_precedences[Ror] = 3;
296 regexp_precedences[Rbol] = 2;
297 regexp_precedences[Reol] = 2;
301 regexp_precedences[Ror] = 2;
302 regexp_precedences[Rbol] = 3;
303 regexp_precedences[Reol] = 3;
305 regexp_precedences[Rclosepar] = 1;
306 regexp_precedences[Rend] = 0;
307 regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
308 regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
311 int re_set_syntax(syntax)
317 regexp_syntax = syntax;
318 re_compile_initialize();
322 static int hex_char_to_decimal(ch)
325 if (ch >= '0' && ch <= '9')
327 if (ch >= 'a' && ch <= 'f')
328 return ch - 'a' + 10;
329 if (ch >= 'A' && ch <= 'F')
330 return ch - 'A' + 10;
334 SilcResult re_compile_pattern(regex, size, bufp)
339 int a, pos, op, current_level, level, opcode;
340 int pattern_offset = 0, alloc;
341 int starts[NUM_LEVELS * MAX_NESTING], starts_base;
342 int future_jumps[MAX_NESTING], num_jumps;
343 unsigned char ch = 0;
344 char *pattern, *translate;
345 int next_register, paren_depth, num_open_registers, open_registers[RE_NREGS];
346 int beginning_context;
348 #define NEXTCHAR(var) \
351 goto ends_prematurely; \
352 (var) = regex[pos]; \
356 #define ALLOC(amount) \
358 if (pattern_offset+(amount) > alloc) \
360 alloc += 256 + (amount); \
361 pattern = silc_realloc(pattern, alloc); \
363 goto out_of_memory; \
367 #define STORE(ch) pattern[pattern_offset++] = (ch)
369 #define CURRENT_LEVEL_START (starts[starts_base + current_level])
371 #define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
373 #define PUSH_LEVEL_STARTS if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
374 starts_base += NUM_LEVELS; \
378 #define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
380 #define PUT_ADDR(offset,addr) \
382 int disp = (addr) - (offset) - 2; \
383 pattern[(offset)] = disp & 0xff; \
384 pattern[(offset)+1] = (disp>>8) & 0xff; \
387 #define INSERT_JUMP(pos,type,addr) \
389 int a, p = (pos), t = (type), ad = (addr); \
390 for (a = pattern_offset - 1; a >= p; a--) \
391 pattern[a + 3] = pattern[a]; \
394 pattern_offset += 3; \
397 #define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
401 bufp->allocated = alloc; \
402 bufp->buffer = pattern; \
403 bufp->used = pattern_offset; \
406 #define GETHEX(var) \
408 char gethex_ch, gethex_value; \
409 NEXTCHAR(gethex_ch); \
410 gethex_value = hex_char_to_decimal(gethex_ch); \
411 if (gethex_value == 16) \
413 NEXTCHAR(gethex_ch); \
414 gethex_ch = hex_char_to_decimal(gethex_ch); \
415 if (gethex_ch == 16) \
417 (var) = gethex_value * 16 + gethex_ch; \
420 #define ANSI_TRANSLATE(ch) \
426 ch = 7; /* audible bell */ \
430 ch = 8; /* backspace */ \
434 ch = 12; /* form feed */ \
438 ch = 10; /* line feed */ \
442 ch = 13; /* carriage return */ \
450 ch = 11; /* vertical tab */ \
452 case 'x': /* hex code */ \
457 /* other characters passed through */ \
459 ch = translate[(unsigned char)ch]; \
464 if (!re_compile_initialized)
465 re_compile_initialize();
467 bufp->fastmap_accurate = 0;
468 bufp->uses_registers = 0;
469 translate = bufp->translate;
470 pattern = bufp->buffer;
471 alloc = bufp->allocated;
472 if (alloc == 0 || pattern == NULL)
475 pattern = silc_malloc(alloc);
484 num_open_registers = 0;
487 beginning_context = 1;
489 /* we use Rend dummy to ensure that pending jumps are updated (due to
490 low priority of Rend) before exiting the loop. */
500 ch = translate[(unsigned char)ch];
501 op = regexp_plain_ops[(unsigned char)ch];
505 op = regexp_quoted_ops[(unsigned char)ch];
506 if (op == Rnormal && regexp_ansi_sequences)
510 level = regexp_precedences[op];
511 /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n",
512 ch, op, level, current_level, CURRENT_LEVEL_START); */
513 if (level > current_level)
515 for (current_level++; current_level < level; current_level++)
520 if (level < current_level)
522 current_level = level;
523 for (;num_jumps > 0 &&
524 future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
526 PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
535 store_opcode_and_arg: /* opcode & ch must be set */
552 if (!beginning_context) {
553 if (regexp_context_indep_ops)
561 if (!((pos >= size) ||
562 ((regexp_syntax & RE_NO_BK_VBAR) ?
563 (regex[pos] == '\174') :
564 (pos+1 < size && regex[pos] == '\134' &&
565 regex[pos+1] == '\174')) ||
566 ((regexp_syntax & RE_NO_BK_PARENS)?
568 (pos+1 < size && regex[pos] == '\134' &&
569 regex[pos+1] == ')')))) {
570 if (regexp_context_indep_ops)
579 if (beginning_context) {
580 if (regexp_context_indep_ops)
585 if (CURRENT_LEVEL_START == pattern_offset)
586 break; /* ignore empty patterns for ? */
588 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
593 if (beginning_context) {
594 if (regexp_context_indep_ops)
599 if (CURRENT_LEVEL_START == pattern_offset)
600 break; /* ignore empty patterns for + and * */
602 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
604 INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
605 if (op == Rplus) /* jump over initial failure_jump */
606 INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
607 CURRENT_LEVEL_START + 6);
611 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
613 if (num_jumps >= MAX_NESTING)
616 future_jumps[num_jumps++] = pattern_offset;
623 if (next_register < RE_NREGS)
625 bufp->uses_registers = 1;
627 STORE(Cstart_memory);
628 STORE(next_register);
629 open_registers[num_open_registers++] = next_register;
638 if (paren_depth <= 0)
639 goto parenthesis_error;
641 current_level = regexp_precedences[Ropenpar];
643 if (paren_depth < num_open_registers)
645 bufp->uses_registers = 1;
648 num_open_registers--;
649 STORE(open_registers[num_open_registers]);
654 goto bad_match_register;
655 assert(ch >= '0' && ch <= '9');
656 bufp->uses_registers = 1;
657 opcode = Cmatch_memory;
659 goto store_opcode_and_arg;
660 case Rextended_memory:
662 if (ch < '0' || ch > '9')
663 goto bad_match_register;
665 if (a < '0' || a > '9')
666 goto bad_match_register;
667 ch = 10 * (a - '0') + ch - '0';
668 if (ch <= 0 || ch >= RE_NREGS)
669 goto bad_match_register;
670 bufp->uses_registers = 1;
671 opcode = Cmatch_memory;
672 goto store_opcode_and_arg;
675 int complement,prev,offset,range,firstchar;
680 offset = pattern_offset;
681 for (a = 0; a < 256/8; a++)
685 ch = translate[(unsigned char)ch];
691 ch = translate[(unsigned char)ch];
698 while (ch != '\135' || firstchar)
701 if (regexp_ansi_sequences && ch == '\134')
708 for (a = prev; a <= ch; a++)
709 SETBIT(pattern, offset, a);
714 if (prev != -1 && ch == '-')
718 SETBIT(pattern, offset, ch);
723 ch = translate[(unsigned char)ch];
726 SETBIT(pattern, offset, '-');
729 for (a = 0; a < 256/8; a++)
730 pattern[offset+a] ^= 0xff;
741 opcode = Csyntaxspec;
743 goto store_opcode_and_arg;
745 opcode = Cnotsyntaxspec;
747 goto store_opcode_and_arg;
758 opcode = Cnotwordbound;
762 opcode = Cemacs_at_dot;
764 case Remacs_syntaxspec:
767 ch = translate[(unsigned char)ch];
768 opcode = Csyntaxspec;
769 ch = syntax_spec_code[(unsigned char)ch];
770 goto store_opcode_and_arg;
771 case Remacs_notsyntaxspec:
774 ch = translate[(unsigned char)ch];
775 opcode = Cnotsyntaxspec;
776 ch = syntax_spec_code[(unsigned char)ch];
777 goto store_opcode_and_arg;
782 beginning_context = (op == Ropenpar || op == Ror);
784 if (starts_base != 0)
785 goto parenthesis_error;
786 assert(num_jumps == 0);
794 return SILC_ERR_REGEX_SPECIAL;
798 return SILC_ERR_REGEX_REG;
802 return SILC_ERR_REGEX_HEX;
806 return SILC_ERR_REGEX_PAREN;
810 return SILC_ERR_OUT_OF_MEMORY;
814 return SILC_ERR_OVERFLOW;
818 return SILC_ERR_REGEX_TOO_COMPLEX;
825 #undef CURRENT_LEVEL_START
826 #undef SET_LEVEL_START
827 #undef PUSH_LEVEL_STARTS
828 #undef POP_LEVEL_STARTS
834 static void re_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap)
835 char *code, *visited, *can_be_null, *fastmap;
838 int a, b, syntaxcode;
841 return; /* we have already been here */
861 syntaxcode = code[pos++];
862 for (a = 0; a < 256; a++)
863 if (SYNTAX(a) == syntaxcode)
867 syntaxcode = code[pos++];
868 for (a = 0; a < 256; a++)
869 if (SYNTAX(a) != syntaxcode)
874 if (*can_be_null == 0)
875 *can_be_null = 2; /* can match null, but only at end of buffer*/
878 for (a = 0; a < 256/8; a++)
879 if (code[pos + a] != 0)
880 for (b = 0; b < 8; b++)
881 if (code[pos + a] & (1 << b))
882 fastmap[(a << 3) + b] = 1;
886 fastmap[(unsigned char)code[pos]] = 1;
889 for (a = 0; a < 256; a++)
898 for (a = 0; a < 256; a++)
903 case Cdummy_failure_jump:
904 case Cupdate_failure_jump:
906 a = (unsigned char)code[pos++];
907 a |= (unsigned char)code[pos++] << 8;
908 pos += (int)(short)a;
911 /* argh... the regexp contains empty loops. This is not
912 good, as this may cause a failure stack overflow when
913 matching. Oh well. */
914 /* this path leads nowhere; pursue other paths. */
920 a = (unsigned char)code[pos++];
921 a |= (unsigned char)code[pos++] << 8;
922 a = pos + (int)(short)a;
923 re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
926 abort(); /* probably some opcode is missing from this switch */
931 static int re_do_compile_fastmap(buffer, used, pos, can_be_null, fastmap)
932 char *buffer, *fastmap, *can_be_null;
935 char small_visited[512], *visited;
937 if (used <= sizeof(small_visited))
938 visited = small_visited;
941 visited = silc_malloc(used);
946 memset(fastmap, 0, 256);
947 memset(visited, 0, used);
948 re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
949 if (visited != small_visited)
954 void re_compile_fastmap(bufp)
957 if (!bufp->fastmap || bufp->fastmap_accurate)
959 assert(bufp->used > 0);
960 if (!re_do_compile_fastmap(bufp->buffer, bufp->used, 0, &bufp->can_be_null,
963 if (bufp->buffer[0] == Cbol)
964 bufp->anchor = 1; /* begline */
966 if (bufp->buffer[0] == Cbegbuf)
967 bufp->anchor = 2; /* begbuf */
969 bufp->anchor = 0; /* none */
970 bufp->fastmap_accurate = 1;
973 #define INITIAL_FAILURES 128 /* initial # failure points to allocate */
974 #define MAX_FAILURES 4100 /* max # of failure points before failing */
976 int re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop)
978 char *string1, *string2;
979 int size1, size2, pos, mstop;
980 regexp_registers_t regs;
982 struct failure_point { char *text, *partend, *code; }
983 *failure_stack_start, *failure_sp, *failure_stack_end,
984 initial_failure_stack[INITIAL_FAILURES];
985 char *code, *translate, *text, *textend, *partend, *part_2_end;
986 char *regstart_text[RE_NREGS], *regstart_partend[RE_NREGS];
987 char *regend_text[RE_NREGS], *regend_partend[RE_NREGS];
988 int a, b, ch, reg, regch, match_end;
989 char *regtext, *regpartend, *regtextend;
993 if (text == partend) \
995 if (text == textend) \
998 partend = part_2_end; \
1002 #define NEXTCHAR(var) \
1005 (var) = (unsigned char)*text++; \
1007 (var) = (unsigned char)translate[(var)]; \
1010 assert(pos >= 0 && size1 >= 0 && size2 >= 0 && mstop >= 0);
1011 assert(mstop <= size1 + size2);
1012 assert(pos <= mstop);
1016 text = string1 + pos;
1019 partend = string1 + mstop;
1024 partend = string1 + size1;
1025 textend = string2 + mstop - size1;
1027 part_2_end = string2 + mstop - size1;
1031 text = string2 + pos - size1;
1032 partend = string2 + mstop - size1;
1034 part_2_end = partend;
1037 if (bufp->uses_registers && regs != NULL)
1038 for (a = 0; a < RE_NREGS; a++)
1039 regend_text[a] = NULL;
1041 code = bufp->buffer;
1042 translate = bufp->translate;
1043 failure_stack_start = failure_sp = initial_failure_stack;
1044 failure_stack_end = initial_failure_stack + INITIAL_FAILURES;
1047 /* re_search_2 has already done this, and otherwise we get little benefit
1048 from this. So I'll leave this out. */
1049 if (bufp->fastmap_accurate && !bufp->can_be_null &&
1051 !bufp->fastmap[translate ?
1052 (unsigned char)translate[(unsigned char)*text] :
1053 (unsigned char)*text])
1054 return -1; /* it can't possibly match */
1063 if (partend != part_2_end)
1064 match_end = text - string1;
1066 match_end = text - string2 + size1;
1069 regs->start[0] = pos;
1070 regs->end[0] = match_end;
1071 if (!bufp->uses_registers)
1073 for (a = 1; a < RE_NREGS; a++)
1075 regs->start[a] = -1;
1081 for (a = 1; a < RE_NREGS; a++)
1083 if (regend_text[a] == NULL)
1085 regs->start[a] = -1;
1089 if (regstart_partend[a] != part_2_end)
1090 regs->start[a] = regstart_text[a] - string1;
1092 regs->start[a] = regstart_text[a] - string2 + size1;
1093 if (regend_partend[a] != part_2_end)
1094 regs->end[a] = regend_text[a] - string1;
1096 regs->end[a] = regend_text[a] - string2 + size1;
1100 if (failure_stack_start != initial_failure_stack)
1101 silc_free((char *)failure_stack_start);
1102 return match_end - pos;
1104 if (text == string1 || text[-1] == '\n') /* text[-1] always valid */
1108 if (text == string2 + size2 ||
1109 (text == string1 + size1 ?
1110 (size2 == 0 || *string2 == '\n') :
1116 if (code[ch/8] & (1<<(ch & 7)))
1124 if (ch != (unsigned char)*code++)
1134 regstart_text[reg] = text;
1135 regstart_partend[reg] = partend;
1139 regend_text[reg] = text;
1140 regend_partend[reg] = partend;
1144 if (regend_text[reg] == NULL)
1145 goto fail; /* or should we just match nothing? */
1146 regtext = regstart_text[reg];
1147 regtextend = regend_text[reg];
1148 if (regstart_partend[reg] == regend_partend[reg])
1149 regpartend = regtextend;
1151 regpartend = string1 + size1;
1153 for (;regtext != regtextend;)
1156 if (regtext == regpartend)
1158 regch = (unsigned char)*regtext++;
1160 regch = (unsigned char)translate[regch];
1166 /* star is coded as:
1168 ... code for operand of star
1170 2: ... code after star
1171 We change the star_jump to update_failure_jump if we can determine
1172 that it is safe to do so; otherwise we change it to an ordinary
1177 2: ... code for operand of plus
1179 3: ... code after plus
1180 For star_jump considerations this is processed identically
1182 a = (unsigned char)*code++;
1183 a |= (unsigned char)*code++ << 8;
1186 char map[256], can_be_null;
1189 p1 = code + a + 3; /* skip the failure_jump */
1190 assert(p1[-3] == Cfailure_jump);
1192 /* p1 points inside loop, p2 points to after loop */
1193 if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
1194 p2 - bufp->buffer, &can_be_null, map))
1195 goto make_normal_jump;
1196 /* If we might introduce a new update point inside the loop,
1197 we can't optimize because then update_jump would update a
1198 wrong failure point. Thus we have to be quite careful here. */
1200 /* loop until we find something that consumes a character */
1220 ch = (unsigned char)*p1++;
1222 goto make_normal_jump;
1225 for (b = 0; b < 256; b++)
1226 if (b != '\n' && map[b])
1227 goto make_normal_jump;
1230 for (b = 0; b < 256; b++)
1231 if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
1232 goto make_normal_jump;
1236 goto make_normal_jump;
1238 /* now we know that we can't backtrack. */
1239 while (p1 != p2 - 3)
1244 abort(); /* we certainly shouldn't get this inside loop */
1267 case Cnotsyntaxspec:
1273 case Cupdate_failure_jump:
1274 case Cdummy_failure_jump:
1275 goto make_normal_jump;
1277 printf("regexpr.c: processing star_jump: unknown op %d\n", p1[-1]);
1281 goto make_update_jump;
1284 /* printf("changing to normal jump\n"); */
1289 /* printf("changing to update jump\n"); */
1291 a += 3; /* jump to after the Cfailure_jump */
1292 code[-1] = Cupdate_failure_jump;
1295 /* fall to next case */
1296 case Cupdate_failure_jump:
1297 failure_sp[-1].text = text;
1298 failure_sp[-1].partend = partend;
1299 /* fall to next case */
1301 a = (unsigned char)*code++;
1302 a |= (unsigned char)*code++ << 8;
1303 code += (int)(short)a;
1305 case Cdummy_failure_jump:
1307 if (failure_sp == failure_stack_end)
1309 if (failure_stack_start != initial_failure_stack)
1311 failure_stack_start = (struct failure_point *)
1312 silc_malloc(MAX_FAILURES * sizeof(*failure_stack_start));
1313 failure_stack_end = failure_stack_start + MAX_FAILURES;
1314 memcpy((char *)failure_stack_start, (char *)initial_failure_stack,
1315 INITIAL_FAILURES * sizeof(*failure_stack_start));
1316 failure_sp = failure_stack_start + INITIAL_FAILURES;
1318 a = (unsigned char)*code++;
1319 a |= (unsigned char)*code++ << 8;
1321 if (code[-3] == Cdummy_failure_jump)
1322 { /* this is only used in plus */
1323 assert(*code == Cfailure_jump);
1324 b = (unsigned char)code[1];
1325 b |= (unsigned char)code[2] << 8;
1326 failure_sp->code = code + (int)(short)b + 3;
1327 failure_sp->text = NULL;
1332 failure_sp->code = code + a;
1333 failure_sp->text = text;
1334 failure_sp->partend = partend;
1339 if (text == string1)
1343 if (size2 == 0 ? text == string1 + size1 : text == string2 + size2)
1347 if (text == string2 + size2)
1349 if (size2 == 0 && text == string1 + size1)
1351 if (SYNTAX(text == string1 + size1 ? *string1 : *text) != Sword)
1353 if (text == string1)
1355 if (SYNTAX(text[-1]) != Sword)
1359 if (text == string1)
1361 if (SYNTAX(text[-1]) != Sword)
1363 if (text == string2 + size2)
1365 if (size2 == 0 && text == string1 + size1)
1367 if (SYNTAX(*text) == Sword)
1371 /* Note: as in gnu regexp, this also matches at the beginning
1372 and end of buffer. */
1373 if (text == string1 || text == string2 + size2 ||
1374 (size2 == 0 && text == string1 + size1))
1376 if ((SYNTAX(text[-1]) == Sword) ^
1377 (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword))
1381 /* Note: as in gnu regexp, this never matches at the beginning
1382 and end of buffer. */
1383 if (text == string1 || text == string2 + size2 ||
1384 (size2 == 0 && text == string1 + size1))
1386 if (!((SYNTAX(text[-1]) == Sword) ^
1387 (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword)))
1392 if (SYNTAX(ch) != (unsigned char)*code++)
1395 case Cnotsyntaxspec:
1397 if (SYNTAX(ch) != (unsigned char)*code++)
1402 if (PTR_CHAR_POS((unsigned char *)text) + 1 != point)
1415 if (failure_sp != failure_stack_start)
1418 text = failure_sp->text;
1421 partend = failure_sp->partend;
1422 code = failure_sp->code;
1423 goto continue_matching;
1425 if (failure_stack_start != initial_failure_stack)
1426 silc_free((char *)failure_stack_start);
1430 if (failure_stack_start != initial_failure_stack)
1431 silc_free((char *)failure_stack_start);
1439 int re_match(bufp, string, size, pos, regs)
1443 regexp_registers_t regs;
1445 return re_match_2(bufp, string, size, (char *)NULL, 0, pos, regs, size);
1448 int re_search_2(bufp, string1, size1, string2, size2, pos, range, regs,
1451 char *string1, *string2;
1452 int size1, size2, pos, range, mstop;
1453 regexp_registers_t regs;
1455 char *fastmap, *translate, *text, *partstart, *partend;
1459 assert(size1 >= 0 && size2 >= 0 && pos >= 0 && mstop >= 0);
1460 assert(pos + range >= 0 && pos + range <= size1 + size2);
1461 assert(pos <= mstop);
1463 fastmap = bufp->fastmap;
1464 translate = bufp->translate;
1465 if (fastmap && !bufp->fastmap_accurate)
1466 re_compile_fastmap(bufp);
1467 anchor = bufp->anchor;
1468 if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
1483 for (; range >= 0; range--, pos += dir)
1488 { /* searching forwards */
1491 text = string1 + pos;
1492 if (pos + range > size1)
1493 partend = string1 + size1;
1495 partend = string1 + pos + range;
1499 text = string2 + pos - size1;
1500 partend = string2 + pos + range - size1;
1504 while (text != partend &&
1505 !fastmap[(unsigned char)
1506 translate[(unsigned char)*text]])
1509 while (text != partend && !fastmap[(unsigned char)*text])
1511 pos += text - partstart;
1512 range -= text - partstart;
1513 if (pos == size1 + size2 && bufp->can_be_null == 0)
1517 { /* searching backwards */
1520 text = string1 + pos;
1521 partstart = string1 + pos - range;
1525 text = string2 + pos - size1;
1526 if (range < pos - size1)
1527 partstart = string2 + pos - size1 - range;
1529 partstart = string2;
1533 while (text != partstart &&
1534 !fastmap[(unsigned char)
1535 translate[(unsigned char)*text]])
1538 while (text != partstart &&
1539 !fastmap[(unsigned char)*text])
1541 pos -= partend - text;
1542 range -= partend - text;
1546 { /* anchored to begline */
1548 (pos <= size1 ? string1[pos - 1] :
1549 string2[pos - size1 - 1]) != '\n')
1552 assert(pos >= 0 && pos <= size1 + size2);
1553 ret = re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop);
1562 int re_search(bufp, string, size, startpos, range, regs)
1565 int size, startpos, range;
1566 regexp_registers_t regs;
1568 return re_search_2(bufp, string, size, (char *)NULL, 0,
1569 startpos, range, regs, size);
1572 /****************************** SILC Regex API ******************************/
1574 /* Compile regular expression */
1576 SilcBool silc_regex_compile(SilcRegex regexp, const char *regex,
1577 SilcRegexFlags flags)
1582 if (!regexp || !regex) {
1583 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
1587 memset(regexp, 0, sizeof(*regexp));
1590 syntax |= (RE_CONTEXT_INDEP_OPS | RE_NO_BK_PARENS | RE_NO_BK_VBAR);
1591 re_set_syntax(syntax);
1594 ret = re_compile_pattern((char *)regex, strlen(regex), regexp);
1596 silc_set_errno(ret);
1598 return ret == SILC_OK;
1601 /* Match compiled regular expression */
1603 SilcBool silc_regex_match(SilcRegex regexp, const char *string,
1604 SilcUInt32 string_len, SilcUInt32 num_match,
1605 SilcRegexMatch match, SilcRegexFlags flags)
1607 struct re_registers regs;
1610 if (!regexp || !string) {
1611 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
1615 if (num_match && !match) {
1616 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
1620 /* Internal limit for maximum number of registers */
1621 if (num_match > RE_NREGS)
1622 num_match = RE_NREGS;
1625 ret = re_search(regexp, (char *)string, string_len, 0, string_len,
1626 num_match ? ®s : NULL);
1629 silc_set_errno(SILC_ERR);
1631 silc_set_errno(SILC_ERR_NOT_FOUND);
1635 /* Return matches */
1636 for (i = 0; i < num_match; i++) {
1637 match[i].start = regs.start[i];
1638 match[i].end = regs.end[i];
1647 void silc_regex_free(SilcRegex regexp)
1649 silc_free(regexp->buffer);
1654 SilcBool silc_regex_va(const char *string, SilcUInt32 string_len,
1655 const char *regex, SilcBuffer match, va_list va)
1657 SilcRegexStruct reg;
1658 SilcRegexMatch m = NULL;
1659 SilcBuffer buf, *rets = NULL;
1663 if (!silc_regex_compile(®, regex, 0))
1666 /* Get match pointers */
1668 rets = silc_malloc(sizeof(*rets));
1673 while ((buf = va_arg(va, SilcBuffer))) {
1674 rets = silc_realloc(rets, (c + 1) * sizeof(*rets));
1680 m = silc_malloc(c * sizeof(*m));
1688 if (!silc_regex_match(®, string, string_len, c, m, 0)) {
1694 /* Return matches */
1695 for (i = 0; i < c; i++) {
1696 if (m[i].start == -1)
1698 silc_buffer_set(rets[i], (unsigned char *)string + m[i].start,
1699 m[i].end - m[i].start);
1710 SilcBool silc_regex(const char *string, const char *regex,
1711 SilcBuffer match, ...)
1716 if (!string || !regex) {
1717 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
1722 va_start(va, match);
1724 ret = silc_regex_va(string, strlen(string), regex, match, va);
1734 SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex,
1735 SilcBuffer match, ...)
1740 if (!buffer || !regex) {
1741 silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
1746 va_start(va, match);
1748 ret = silc_regex_va((const char *)silc_buffer_data(buffer),
1749 silc_buffer_len(buffer), regex, match, va);