X-Git-Url: http://git.silcnet.org/gitweb/?p=silc.git;a=blobdiff_plain;f=lib%2Fsilcutil%2Fsilcregex.c;h=dba552b7e34b9fb976fcc9b893b6dec6d18a0267;hp=cdd6ecfaa5431545c7f6a354e5faf2df088eb6af;hb=e7b6c157b80152bf9fb9266e6bdd93f9fb0db776;hpb=72c29fbfbf198651c1125ac3d991df7a0cdf8acd diff --git a/lib/silcutil/silcregex.c b/lib/silcutil/silcregex.c index cdd6ecfa..dba552b7 100644 --- a/lib/silcutil/silcregex.c +++ b/lib/silcutil/silcregex.c @@ -27,12 +27,13 @@ /* The SILC Regex API and modifications by Pekka Riikonen, under the same - license as the original code. We've added following features: + license as the original code. We've added the following features: - - RE_NOTBOL - bol fails to match (conforming POSIX) - - RE_NOTEOL - eol fails to match (conforming POSIX) - - RE_REPEAT a{n,m} - bounded repeat (conforming POSIX) - - SilStack support - compile without real memory allocations + - RE_SYNTAX_POSIX POSIX extended regular expression syntax + - RE_REPEAT bounded repeat a{n,m} (RE_SYNTAX_POSIX) + - RE_NOTBOL bol fails to match (conforming POSIX regex API) + - RE_NOTEOL eol fails to match (conforming POSIX regex API) + - SilcStack support compile/match without real memory allocations */ #include "silc.h" @@ -50,21 +51,15 @@ #define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */ #define RE_NOTBOL 256 /* bol fails to match */ #define RE_NOTEOL 512 /* eol fails to match */ -#define RE_REPEAT 1024 /* bounded repeat, must be quoted without - RE_NO_BK_VBAR */ +#define RE_REPEAT 1024 /* bounded repeat expression */ /* definitions for some common regexp styles */ #define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS) #define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR) #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR) +#define RE_SYNTAX_POSIX (RE_SYNTAX_AWK|RE_REPEAT) #define RE_SYNTAX_EMACS 0 -#define Sword 1 -#define Swhitespace 2 -#define Sdigit 4 -#define Soctaldigit 8 -#define Shexdigit 16 - /* Registers */ typedef struct re_registers { int start[RE_NREGS]; /* start offset of region */ @@ -470,22 +465,26 @@ enum regexp_syntax_op /* syntax codes for plain and quoted characters */ Ropenrep, /* opening bounded repeat */ }; -static int re_compile_initialized = 0; -static int regexp_syntax = 0; -static unsigned char regexp_plain_ops[256]; -static unsigned char regexp_quoted_ops[256]; -static unsigned char regexp_precedences[Rnum_ops]; -static int regexp_context_indep_ops; -static int regexp_ansi_sequences; +#define Sword 1 +#define Swhitespace 2 +#define Sdigit 4 +#define Soctaldigit 8 +#define Shexdigit 16 #define NUM_LEVELS 5 /* number of precedence levels in use */ #define MAX_NESTING 100 /* max nesting level of operators */ - -#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)] - -unsigned char re_syntax_table[256]; - -void re_compile_initialize(void) +#define SYNTAX(ch) silc_re_syntax_table[(unsigned char)(ch)] + +static int silc_regexp_syntax = RE_SYNTAX_POSIX; +static int silc_regexp_context_indep_ops; +static int silc_regexp_ansi_sequences; +static int silc_re_compile_initialized = 0; +static unsigned char silc_re_syntax_table[256]; +static unsigned char silc_regexp_plain_ops[256]; +static unsigned char silc_regexp_quoted_ops[256]; +static unsigned char silc_regexp_precedencess[Rnum_ops]; + +void silc_re_compile_initialize(void) { int a; @@ -494,119 +493,119 @@ void re_compile_initialize(void) if (!syntax_table_inited) { syntax_table_inited = 1; - memset(re_syntax_table, 0, 256); + memset(silc_re_syntax_table, 0, 256); for (a = 'a'; a <= 'z'; a++) - re_syntax_table[a] = Sword; + silc_re_syntax_table[a] = Sword; for (a = 'A'; a <= 'Z'; a++) - re_syntax_table[a] = Sword; + silc_re_syntax_table[a] = Sword; for (a = '0'; a <= '9'; a++) - re_syntax_table[a] = Sword | Sdigit | Shexdigit; + silc_re_syntax_table[a] = Sword | Sdigit | Shexdigit; for (a = '0'; a <= '7'; a++) - re_syntax_table[a] |= Soctaldigit; + silc_re_syntax_table[a] |= Soctaldigit; for (a = 'A'; a <= 'F'; a++) - re_syntax_table[a] |= Shexdigit; + silc_re_syntax_table[a] |= Shexdigit; for (a = 'a'; a <= 'f'; a++) - re_syntax_table[a] |= Shexdigit; - re_syntax_table['_'] = Sword; + silc_re_syntax_table[a] |= Shexdigit; + silc_re_syntax_table['_'] = Sword; for (a = 9; a <= 13; a++) - re_syntax_table[a] = Swhitespace; - re_syntax_table[' '] = Swhitespace; + silc_re_syntax_table[a] = Swhitespace; + silc_re_syntax_table[' '] = Swhitespace; } - re_compile_initialized = 1; + silc_re_compile_initialized = 1; for (a = 0; a < 256; a++) { - regexp_plain_ops[a] = Rnormal; - regexp_quoted_ops[a] = Rnormal; + silc_regexp_plain_ops[a] = Rnormal; + silc_regexp_quoted_ops[a] = Rnormal; } for (a = '0'; a <= '9'; a++) - regexp_quoted_ops[a] = Rmemory; - regexp_plain_ops['\134'] = Rquote; - if (regexp_syntax & RE_NO_BK_PARENS) + silc_regexp_quoted_ops[a] = Rmemory; + silc_regexp_plain_ops['\134'] = Rquote; + if (silc_regexp_syntax & RE_NO_BK_PARENS) { - regexp_plain_ops['('] = Ropenpar; - regexp_plain_ops[')'] = Rclosepar; + silc_regexp_plain_ops['('] = Ropenpar; + silc_regexp_plain_ops[')'] = Rclosepar; } else { - regexp_quoted_ops['('] = Ropenpar; - regexp_quoted_ops[')'] = Rclosepar; + silc_regexp_quoted_ops['('] = Ropenpar; + silc_regexp_quoted_ops[')'] = Rclosepar; } - if (regexp_syntax & RE_NO_BK_VBAR) - regexp_plain_ops['\174'] = Ror; + if (silc_regexp_syntax & RE_NO_BK_VBAR) + silc_regexp_plain_ops['\174'] = Ror; else - regexp_quoted_ops['\174'] = Ror; - regexp_plain_ops['*'] = Rstar; - if (regexp_syntax & RE_BK_PLUS_QM) + silc_regexp_quoted_ops['\174'] = Ror; + silc_regexp_plain_ops['*'] = Rstar; + if (silc_regexp_syntax & RE_BK_PLUS_QM) { - regexp_quoted_ops['+'] = Rplus; - regexp_quoted_ops['?'] = Roptional; + silc_regexp_quoted_ops['+'] = Rplus; + silc_regexp_quoted_ops['?'] = Roptional; } else { - regexp_plain_ops['+'] = Rplus; - regexp_plain_ops['?'] = Roptional; + silc_regexp_plain_ops['+'] = Rplus; + silc_regexp_plain_ops['?'] = Roptional; } - if (regexp_syntax & RE_NEWLINE_OR) - regexp_plain_ops['\n'] = Ror; - regexp_plain_ops['\133'] = Ropenset; - regexp_plain_ops['\136'] = Rbol; - regexp_plain_ops['$'] = Reol; - regexp_plain_ops['.'] = Ranychar; - if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS)) + if (silc_regexp_syntax & RE_NEWLINE_OR) + silc_regexp_plain_ops['\n'] = Ror; + silc_regexp_plain_ops['\133'] = Ropenset; + silc_regexp_plain_ops['\136'] = Rbol; + silc_regexp_plain_ops['$'] = Reol; + silc_regexp_plain_ops['.'] = Ranychar; + if (!(silc_regexp_syntax & RE_NO_GNU_EXTENSIONS)) { - regexp_quoted_ops['w'] = Rwordchar; - regexp_quoted_ops['W'] = Rnotwordchar; - regexp_quoted_ops['<'] = Rwordbeg; - regexp_quoted_ops['>'] = Rwordend; - regexp_quoted_ops['b'] = Rwordbound; - regexp_quoted_ops['B'] = Rnotwordbound; - regexp_quoted_ops['`'] = Rbegbuf; - regexp_quoted_ops['\''] = Rendbuf; + silc_regexp_quoted_ops['w'] = Rwordchar; + silc_regexp_quoted_ops['W'] = Rnotwordchar; + silc_regexp_quoted_ops['<'] = Rwordbeg; + silc_regexp_quoted_ops['>'] = Rwordend; + silc_regexp_quoted_ops['b'] = Rwordbound; + silc_regexp_quoted_ops['B'] = Rnotwordbound; + silc_regexp_quoted_ops['`'] = Rbegbuf; + silc_regexp_quoted_ops['\''] = Rendbuf; } - if (regexp_syntax & RE_ANSI_HEX) - regexp_quoted_ops['v'] = Rextended_memory; + if (silc_regexp_syntax & RE_ANSI_HEX) + silc_regexp_quoted_ops['v'] = Rextended_memory; for (a = 0; a < Rnum_ops; a++) - regexp_precedences[a] = 4; - if (regexp_syntax & RE_TIGHT_VBAR) + silc_regexp_precedencess[a] = 4; + if (silc_regexp_syntax & RE_TIGHT_VBAR) { - regexp_precedences[Ror] = 3; - regexp_precedences[Rbol] = 2; - regexp_precedences[Reol] = 2; + silc_regexp_precedencess[Ror] = 3; + silc_regexp_precedencess[Rbol] = 2; + silc_regexp_precedencess[Reol] = 2; } else { - regexp_precedences[Ror] = 2; - regexp_precedences[Rbol] = 3; - regexp_precedences[Reol] = 3; + silc_regexp_precedencess[Ror] = 2; + silc_regexp_precedencess[Rbol] = 3; + silc_regexp_precedencess[Reol] = 3; } - if (regexp_syntax & RE_REPEAT) + if (silc_regexp_syntax & RE_REPEAT) { - if (regexp_syntax & RE_NO_BK_PARENS) + if (silc_regexp_syntax & RE_NO_BK_PARENS) { - regexp_plain_ops['{'] = Ropenrep; + silc_regexp_plain_ops['{'] = Ropenrep; } else { - regexp_quoted_ops['{'] = Ropenrep; + silc_regexp_quoted_ops['{'] = Ropenrep; } } - regexp_precedences[Rclosepar] = 1; - regexp_precedences[Rend] = 0; - regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0; - regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0; + silc_regexp_precedencess[Rclosepar] = 1; + silc_regexp_precedencess[Rend] = 0; + silc_regexp_context_indep_ops = (silc_regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0; + silc_regexp_ansi_sequences = (silc_regexp_syntax & RE_ANSI_HEX) != 0; } -int re_set_syntax(int syntax) +int silc_re_set_syntax(int syntax) { int ret; - ret = regexp_syntax; - regexp_syntax = syntax; - re_compile_initialize(); + ret = silc_regexp_syntax; + silc_regexp_syntax = syntax; + silc_re_compile_initialize(); return ret; } -static int hex_char_to_decimal(int ch) +static int silc_hex_char_to_decimal(int ch) { if (ch >= '0' && ch <= '9') return ch - '0'; @@ -617,10 +616,10 @@ static int hex_char_to_decimal(int ch) return 16; } -static int re_compile_fastmap_aux(unsigned char *code, int pos, - unsigned char *visited, - unsigned char *can_be_null, - unsigned char *fastmap) +static int silc_re_compile_fastmap_aux(unsigned char *code, int pos, + unsigned char *visited, + unsigned char *can_be_null, + unsigned char *fastmap) { int a; int b; @@ -730,7 +729,8 @@ static int re_compile_fastmap_aux(unsigned char *code, int pos, a = (unsigned char)code[pos++]; a |= (unsigned char)code[pos++] << 8; a = pos + (int)SHORT(a); - return re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap); + return silc_re_compile_fastmap_aux(code, a, visited, + can_be_null, fastmap); } case Crepeat1: { @@ -746,9 +746,9 @@ static int re_compile_fastmap_aux(unsigned char *code, int pos, } } -static int re_do_compile_fastmap(unsigned char *buffer, int used, int pos, - unsigned char *can_be_null, - unsigned char *fastmap, SilcRegex bufp) +static int silc_re_do_compile_fastmap(unsigned char *buffer, int used, int pos, + unsigned char *can_be_null, + unsigned char *fastmap, SilcRegex bufp) { unsigned char small_visited[512], *visited; int ret; @@ -767,7 +767,8 @@ static int re_do_compile_fastmap(unsigned char *buffer, int used, int pos, *can_be_null = 0; memset(fastmap, 0, 256); memset(visited, 0, used); - ret = re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap); + ret = silc_re_compile_fastmap_aux(buffer, pos, visited, + can_be_null, fastmap); if (visited != small_visited) { silc_sfree(bufp->rstack, visited); silc_stack_pop(bufp->rstack); @@ -775,16 +776,16 @@ static int re_do_compile_fastmap(unsigned char *buffer, int used, int pos, return ret == 0; } -int re_compile_fastmap(SilcRegex bufp) +int silc_re_compile_fastmap(SilcRegex bufp) { if (!bufp->fastmap || bufp->fastmap_accurate) return 0; SILC_ASSERT(bufp->used > 0); - if (!re_do_compile_fastmap(bufp->buffer, - bufp->used, - 0, - &bufp->can_be_null, - bufp->fastmap, bufp)) + if (!silc_re_do_compile_fastmap(bufp->buffer, + bufp->used, + 0, + &bufp->can_be_null, + bufp->fastmap, bufp)) return -1; if (bufp->buffer[0] == Cbol) bufp->anchor = 1; /* begline */ @@ -821,7 +822,7 @@ int re_compile_fastmap(SilcRegex bufp) * */ -static int re_optimize_star_jump(SilcRegex bufp, unsigned char *code) +static int silc_re_optimize_star_jump(SilcRegex bufp, unsigned char *code) { unsigned char map[256]; unsigned char can_be_null; @@ -847,9 +848,9 @@ static int re_optimize_star_jump(SilcRegex bufp, unsigned char *code) SILC_ASSERT(p1[-3] == Cfailure_jump); p2 = code; /* p1 points inside loop, p2 points to after loop */ - if (!re_do_compile_fastmap(bufp->buffer, bufp->used, - (int)(p2 - bufp->buffer), - &can_be_null, map, bufp)) + if (!silc_re_do_compile_fastmap(bufp->buffer, bufp->used, + (int)(p2 - bufp->buffer), + &can_be_null, map, bufp)) goto make_normal_jump; /* If we might introduce a new update point inside the @@ -982,7 +983,7 @@ static int re_optimize_star_jump(SilcRegex bufp, unsigned char *code) return 1; } -static int re_optimize(SilcRegex bufp) +static int silc_re_optimize(SilcRegex bufp) { unsigned char *code; @@ -1025,7 +1026,7 @@ static int re_optimize(SilcRegex bufp) } case Cstar_jump: { - if (!re_optimize_star_jump(bufp, code)) + if (!silc_re_optimize_star_jump(bufp, code)) { return 0; } @@ -1112,11 +1113,11 @@ static int re_optimize(SilcRegex bufp) { \ unsigned char gethex_ch, gethex_value; \ NEXTCHAR(gethex_ch); \ - gethex_value = hex_char_to_decimal(gethex_ch); \ + gethex_value = silc_hex_char_to_decimal(gethex_ch); \ if (gethex_value == 16) \ goto hex_error; \ NEXTCHAR(gethex_ch); \ - gethex_ch = hex_char_to_decimal(gethex_ch); \ + gethex_ch = silc_hex_char_to_decimal(gethex_ch); \ if (gethex_ch == 16) \ goto hex_error; \ (var) = gethex_value * 16 + gethex_ch; \ @@ -1184,14 +1185,15 @@ static int re_optimize(SilcRegex bufp) } \ } -SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) +SilcResult silc_re_compile_pattern(unsigned char *regex, int size, + SilcRegex bufp) { int a; int pos; int op; int current_level; int level; - int opcode; + int opcode = 0; int pattern_offset = 0, alloc; int starts[NUM_LEVELS * MAX_NESTING]; int starts_base; @@ -1206,8 +1208,8 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) int open_registers[RE_NREGS]; int beginning_context; - if (!re_compile_initialized) - re_compile_initialize(); + if (!silc_re_compile_initialized) + silc_re_compile_initialize(); bufp->used = 0; bufp->fastmap_accurate = 0; bufp->uses_registers = 1; @@ -1244,16 +1246,16 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; - op = regexp_plain_ops[(unsigned char)ch]; + op = silc_regexp_plain_ops[(unsigned char)ch]; if (op == Rquote) { NEXTCHAR(ch); - op = regexp_quoted_ops[(unsigned char)ch]; - if (op == Rnormal && regexp_ansi_sequences) + op = silc_regexp_quoted_ops[(unsigned char)ch]; + if (op == Rnormal && silc_regexp_ansi_sequences) ANSI_TRANSLATE(ch); } } - level = regexp_precedences[op]; + level = silc_regexp_precedencess[op]; /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n", ch, op, level, current_level, CURRENT_LEVEL_START); */ @@ -1306,7 +1308,7 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) case Rbol: { if (!beginning_context) { - if (regexp_context_indep_ops) + if (silc_regexp_context_indep_ops) goto op_error; else goto normal_char; @@ -1317,15 +1319,15 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) case Reol: { if (!((pos >= size) || - ((regexp_syntax & RE_NO_BK_VBAR) ? + ((silc_regexp_syntax & RE_NO_BK_VBAR) ? (regex[pos] == '\174') : (pos+1 < size && regex[pos] == '\134' && regex[pos+1] == '\174')) || - ((regexp_syntax & RE_NO_BK_PARENS)? + ((silc_regexp_syntax & RE_NO_BK_PARENS)? (regex[pos] == ')'): (pos+1 < size && regex[pos] == '\134' && regex[pos+1] == ')')))) { - if (regexp_context_indep_ops) + if (silc_regexp_context_indep_ops) goto op_error; else goto normal_char; @@ -1338,7 +1340,7 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) case Roptional: { if (beginning_context) { - if (regexp_context_indep_ops) + if (silc_regexp_context_indep_ops) goto op_error; else goto normal_char; @@ -1353,15 +1355,15 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) case Rstar: case Rplus: { - store_jump: if (beginning_context) { - if (regexp_context_indep_ops) + if (silc_regexp_context_indep_ops) goto op_error; else goto normal_char; } if (CURRENT_LEVEL_START == pattern_offset) break; /* ignore empty patterns for + and * */ + store_jump: ALLOC(9); INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, pattern_offset + 6); @@ -1409,7 +1411,7 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) if (paren_depth <= 0) goto parenthesis_error; POP_LEVEL_STARTS; - current_level = regexp_precedences[Ropenpar]; + current_level = silc_regexp_precedencess[Ropenpar]; paren_depth--; if (paren_depth < num_open_registers) { @@ -1478,7 +1480,7 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) while (ch != '\135' || firstchar) { firstchar = 0; - if (regexp_ansi_sequences && ch == '\134') + if (silc_regexp_ansi_sequences && ch == '\134') { NEXTCHAR(ch); ANSI_TRANSLATE(ch); @@ -1518,22 +1520,32 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) Rnormals and one Rplus. The third is compiled as n-1 Rnormals and m-n Rnormals with Roptionals. 0 values have special compilation. */ - int min, max, i; + int min, max, i, alen = 2; if (pos >= size) - goto op_error; + goto normal_char; /* Consider literal */ /* Get the preceding atom */ if (pos < 2) - goto op_error; + goto normal_char; /* Consider literal */ pos -= 2; NEXTCHAR(a); + if (translate) + a = translate[(unsigned char)a]; + op = silc_regexp_plain_ops[(unsigned char)a]; + + if (op == Ranychar) { + opcode = Canychar; + a = 0; + alen = 1; + } + NEXTCHAR(ch); /* Get min value */ NEXTCHAR(ch); if (!isdigit(ch)) - goto repeat_value_error; + goto normal_char; /* Consider literal */ min = ch - '0'; NEXTCHAR(ch); while (isdigit(ch)) { @@ -1557,9 +1569,10 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) /* Store min - 1 many Cexacts. */ for (i = 0; i < min - 1; i++) { SET_LEVEL_START; - ALLOC(2); - STORE(Cexact); - STORE((unsigned char)a); + ALLOC(alen); + STORE(opcode); + if (a) + STORE((unsigned char)a); } break; } @@ -1579,9 +1592,10 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) /* Store min - 1 many Cexacts. */ for (i = 0; i < min - 1; i++) { SET_LEVEL_START; - ALLOC(2); - STORE(Cexact); - STORE((unsigned char)a); + ALLOC(alen); + STORE(opcode); + if (a) + STORE((unsigned char)a); } /* Store Rplus */ @@ -1623,17 +1637,19 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) /* Store min - 1 many Cexacts. */ for (i = 0; min && i < min - 1; i++) { SET_LEVEL_START; - ALLOC(2); - STORE(Cexact); - STORE((unsigned char)a); + ALLOC(alen); + STORE(opcode); + if (a) + STORE((unsigned char)a); } /* Store max - min Cexacts and Roptionals. */ for (i = 0; i < max - min; i++) { SET_LEVEL_START; - ALLOC(2); - STORE(Cexact); - STORE((unsigned char)a); + ALLOC(alen); + STORE(opcode); + if (a) + STORE((unsigned char)a); ALLOC(3); INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, pattern_offset + 3); @@ -1698,7 +1714,7 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) ALLOC(1); STORE(Cend); SET_FIELDS; - if (!re_optimize(bufp)) + if (!silc_re_optimize(bufp)) return SILC_ERR; return SILC_OK; @@ -1757,8 +1773,8 @@ SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp) if (translate) \ var = translate[var] -int re_match(SilcRegex bufp, unsigned char *string, int size, int pos, - regexp_registers_t old_regs, unsigned int flags) +int silc_re_match(SilcRegex bufp, unsigned char *string, int size, int pos, + regexp_registers_t old_regs, unsigned int flags) { unsigned char *code; unsigned char *translate; @@ -2181,8 +2197,8 @@ int re_match(SilcRegex bufp, unsigned char *string, int size, int pos, #undef PREFETCH #undef NEXTCHAR -int re_search(SilcRegex bufp, unsigned char *string, int size, int pos, - int range, regexp_registers_t regs, unsigned int flags) +int silc_re_search(SilcRegex bufp, unsigned char *string, int size, int pos, + int range, regexp_registers_t regs, unsigned int flags) { unsigned char *fastmap; unsigned char *translate; @@ -2199,7 +2215,7 @@ int re_search(SilcRegex bufp, unsigned char *string, int size, int pos, fastmap = bufp->fastmap; translate = bufp->translate; if (fastmap && !bufp->fastmap_accurate) { - if (re_compile_fastmap(bufp)) + if (silc_re_compile_fastmap(bufp)) return -2; } @@ -2268,7 +2284,7 @@ int re_search(SilcRegex bufp, unsigned char *string, int size, int pos, continue; } SILC_ASSERT(pos >= 0 && pos <= size); - ret = re_match(bufp, string, size, pos, regs, flags); + ret = silc_re_match(bufp, string, size, pos, regs, flags); if (ret >= 0) return pos; if (ret == -2) @@ -2285,7 +2301,6 @@ SilcBool silc_regex_compile(SilcRegex regexp, const char *regex, SilcRegexFlags flags) { SilcResult ret; - int syntax = 0; if (!regexp || !regex) { silc_set_errno(SILC_ERR_INVALID_ARGUMENT); @@ -2299,16 +2314,17 @@ SilcBool silc_regex_compile(SilcRegex regexp, const char *regex, if (regexp->rstack) regexp->rstack = silc_stack_alloc(512, regexp->rstack); - /* Set syntax */ - syntax |= (RE_CONTEXT_INDEP_OPS | RE_NO_BK_PARENS | - RE_NO_BK_VBAR | RE_REPEAT); - re_set_syntax(syntax); - /* Compile */ - ret = re_compile_pattern((char *)regex, strlen(regex), regexp); + ret = silc_re_compile_pattern((char *)regex, strlen(regex), regexp); if (ret != SILC_OK) silc_set_errno(ret); + if (ret != SILC_OK) { + silc_regex_free(regexp); + regexp->rstack = NULL; + regexp->buffer = NULL; + } + return ret == SILC_OK; } @@ -2343,8 +2359,8 @@ SilcBool silc_regex_match(SilcRegex regexp, const char *string, f |= RE_NOTEOL; /* Search */ - ret = re_search(regexp, (char *)string, string_len, 0, string_len, - num_match ? ®s : NULL, f); + ret = silc_re_search(regexp, (char *)string, string_len, 0, string_len, + num_match ? ®s : NULL, f); if (ret < 0) { if (ret == -1) silc_set_errno(SILC_ERR_NOT_FOUND); @@ -2377,50 +2393,69 @@ SilcBool silc_regex_va(const char *string, SilcUInt32 string_len, SilcRegexStruct reg; SilcRegexMatch m = NULL; SilcBuffer buf, *rets = NULL; + SilcStack stack; int i, c = 0; /* Compile */ if (!silc_regex_compile(®, regex, 0)) return FALSE; + stack = reg.rstack; + silc_stack_push(stack, NULL); + /* Get match pointers */ if (match) { - rets = silc_malloc(sizeof(*rets)); - if (!rets) + rets = silc_smalloc(stack, sizeof(*rets)); + if (!rets) { + silc_stack_pop(stack); + silc_regex_free(®); return FALSE; + } rets[c++] = match; while ((buf = va_arg(va, SilcBuffer))) { - rets = silc_realloc(rets, (c + 1) * sizeof(*rets)); - if (!rets) + rets = silc_srealloc(stack, c * sizeof(*rets), + rets, (c + 1) * sizeof(*rets)); + if (!rets) { + silc_stack_pop(stack); + silc_regex_free(®); return FALSE; + } rets[c++] = buf; } - m = silc_malloc(c * sizeof(*m)); + m = silc_smalloc(stack, c * sizeof(*m)); if (!m) { - silc_free(rets); + silc_sfree(stack, rets); + silc_stack_pop(stack); + silc_regex_free(®); return FALSE; } } /* Match */ if (!silc_regex_match(®, string, string_len, c, m, 0)) { - silc_free(m); - silc_free(rets); + silc_sfree(stack, m); + silc_sfree(stack, rets); + silc_stack_pop(stack); + silc_regex_free(®); return FALSE; } /* Return matches */ for (i = 0; i < c; i++) { - if (m[i].start == -1) + if (m[i].start == -1) { + silc_buffer_set(rets[i], NULL, 0); continue; + } silc_buffer_set(rets[i], (unsigned char *)string + m[i].start, m[i].end - m[i].start); } - silc_free(m); - silc_free(rets); + silc_sfree(stack, m); + silc_sfree(stack, rets); + silc_stack_pop(stack); + silc_regex_free(®); return TRUE; } @@ -2473,3 +2508,224 @@ SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex, return ret; } + +/***************************** Substitution API *****************************/ + +/* Regexp to parse sed substitution command syntax */ +#define SILC_REGEXP_SUBST \ + "^(/?.+/?[^!s]|[0-9]+|\\$)?(!?s)(/)(.*[^\\])?(/)(.*[^\\])?(/)(!?.+)?" + +/* Substitution context */ +typedef struct { + SilcInt32 addr_number; /* Line number to match, -1 for last line */ + SilcUInt32 line; /* Current line number */ + char *str_regexp; /* REGEXP to match */ + SilcBufferRegexFlags match_flags; /* Match flags */ + SilcBufferRegexFlags addr_flags; /* ADDR flags */ + SilcBuffer rep; /* REPLACEMENT */ +} SilcSubstContext; + +/* Function to check the ADDR match and do rest of the match and + substitution. */ + +static int silc_subst_addr(SilcStack stack, SilcBuffer buffer, void *value, + void *context) +{ + SilcSubstContext *ctx = context; + + ctx->line++; + + /* If NUMBER was set in ADDR, match for specific line number */ + if (ctx->addr_number > 0 && ctx->addr_number != ctx->line && + !(ctx->addr_flags & SILC_STR_REGEX_NOT)) + return 0; + if (ctx->addr_number > 0 && ctx->addr_number == ctx->line && + ctx->addr_flags & SILC_STR_REGEX_NOT) + return 0; + + /* Check for last line if ADDR was '$' */ + if (buffer->tail != buffer->end && ctx->addr_number == -1 && + !(ctx->addr_flags & SILC_STR_REGEX_NOT)) + return 0; + if (buffer->tail == buffer->end && ctx->addr_number == -1 && + ctx->addr_flags & SILC_STR_REGEX_NOT) + return 0; + + /* Match and replace */ + return silc_buffer_format(buffer, + SILC_STR_REGEX(ctx->str_regexp, ctx->match_flags), + SILC_STR_REPLACE(silc_buffer_data(ctx->rep) ? + silc_buffer_data(ctx->rep) : + (unsigned char *)"", + silc_buffer_len(ctx->rep)), + SILC_STR_END, SILC_STR_END); +} + +/* Matching and substitution ala sed. */ + +SilcBool silc_subst(SilcBuffer buffer, const char *subst) +{ + SilcSubstContext ctx; + SilcBufferStruct match, addr, command, exp_start, exp, exp_end; + SilcBufferStruct rep, rep_end, flags; + SilcBufferRegexFlags addr_flags = 0, match_flags = 0; + char *str_addr = ""; + int ret = -1; + + memset(&ctx, 0, sizeof(ctx)); + + if (!buffer || !subst) { + silc_set_errno(SILC_ERR_INVALID_ARGUMENT); + goto out; + } + + SILC_LOG_DEBUG(("Substitution '%s'", subst)); + + /* Parse the expression syntax */ + if (!silc_regex(subst, SILC_REGEXP_SUBST, &match, &addr, &command, + &exp_start, &exp, &exp_end, &rep, &rep_end, &flags, NULL)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid substitution expression"); + goto out; + } + + /* Check address syntax */ + if (silc_buffer_len(&addr)) { + if (*silc_buffer_data(&addr) == '/') { + silc_buffer_pull(&addr, 1); + if (addr.tail[-1] != '/') { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid address syntax, missing '/'"); + goto out; + } + silc_buffer_push_tail(&addr, 1); + + if (!silc_buffer_len(&addr)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid address syntax, missing regular " + "expression"); + goto out; + } + str_addr = silc_memdup(silc_buffer_data(&addr), + silc_buffer_len(&addr)); + + } else if (*silc_buffer_data(&addr) == '$' && + silc_buffer_len(&addr) == 1) { + ctx.addr_number = -1; + + } else if (isdigit((int)*silc_buffer_data(&addr))) { + ctx.addr_number = *silc_buffer_data(&addr) - '0'; + silc_buffer_pull(&addr, 1); + while (silc_buffer_len(&addr) && + isdigit((int)*silc_buffer_data(&addr))) { + ctx.addr_number *= 10; + ctx.addr_number += *silc_buffer_data(&addr) - '0'; + silc_buffer_pull(&addr, 1); + } + + if (silc_buffer_len(&addr)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid address syntax, not a number"); + goto out; + } + + if (ctx.addr_number == 0) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid address syntax, line address is 0"); + goto out; + } + + } else { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Unsupported address syntax"); + goto out; + } + } + + /* Check command syntax */ + if (!silc_buffer_len(&command) || silc_buffer_len(&command) > 2) { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid commmand"); + goto out; + } + if ((silc_buffer_len(&command) == 1 && + !silc_buffer_memcmp(&command, "s", 1)) || + (silc_buffer_len(&command) == 2 && + !silc_buffer_memcmp(&command, "!s", 2))) { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid command"); + goto out; + } + if (silc_buffer_len(&command) == 2) + addr_flags |= SILC_STR_REGEX_NOT; + + /* Check REGEXP syntax */ + if (!silc_buffer_len(&exp_start) || + !silc_buffer_memcmp(&exp_start, "/", 1)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid substitution syntax, missing '/'"); + goto out; + } + if (!silc_buffer_len(&exp_end) || + !silc_buffer_memcmp(&exp_end, "/", 1)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid substitution syntax, missing '/'"); + goto out; + } + + /* Check FLAGS syntax */ + if (silc_buffer_len(&flags)) { + if (silc_buffer_len(&flags) > 1) { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid flags"); + goto out; + } + + /* Check supported flags */ + if (silc_buffer_len(&flags) == 1) { + if (silc_buffer_memcmp(&flags, "g", 1)) { + match_flags |= SILC_STR_REGEX_ALL; + } else { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Unsupported flag"); + goto out; + } + } + } + + /* Set flags */ + match_flags |= SILC_STR_REGEX_INCLUSIVE; + addr_flags |= SILC_STR_REGEX_NL | SILC_STR_REGEX_NO_ADVANCE; + + ctx.str_regexp = silc_memdup(silc_buffer_data(&exp), + silc_buffer_len(&exp)); + ctx.addr_flags = addr_flags; + ctx.match_flags = match_flags; + + /* Unescape escapes from REPLACEMENT */ + ctx.rep = silc_buffer_copy(&rep); + if (!ctx.rep) + goto out; + if (silc_buffer_len(ctx.rep)) + silc_buffer_format(ctx.rep, + SILC_STR_REGEX("\\\\/", (SILC_STR_REGEX_ALL | + SILC_STR_REGEX_INCLUSIVE)), + SILC_STR_REPLACE("/", 1), + SILC_STR_END, SILC_STR_END); + + /* If NUMBER or $ is specified, handle NOT flag in the silc_subst_addr */ + if (ctx.addr_number) + addr_flags &= ~SILC_STR_REGEX_NOT; + + SILC_LOG_DEBUG(("ADDR '%s' flags 0x%x, NUMBER %d", str_addr, addr_flags, + ctx.addr_number)); + SILC_LOG_DEBUG(("REGEXP '%s' flags 0x%x", ctx.str_regexp, match_flags)); + + /* Match and replace */ + ret = silc_buffer_format(buffer, + SILC_STR_REGEX(str_addr, addr_flags), + SILC_STR_FUNC(silc_subst_addr, NULL, &ctx), + SILC_STR_END, SILC_STR_END); + + out: + if (str_addr && strlen(str_addr)) + silc_free(str_addr); + silc_free(ctx.str_regexp); + silc_buffer_free(ctx.rep); + + return ret >= 0 ? TRUE : FALSE; +}