/*
The SILC Regex API and modifications by Pekka Riikonen, under the same
- license as the original code. We've added following features:
+ license as the original code. We've added the following features:
- - RE_NOTBOL - bol fails to match (conforming POSIX)
- - RE_NOTEOL - eol fails to match (conforming POSIX)
- - RE_REPEAT a{n,m} - bounded repeat (conforming POSIX)
- - SilStack support - compile without real memory allocations
+ - RE_SYNTAX_POSIX POSIX extended regular expression syntax
+ - RE_REPEAT bounded repeat a{n,m} (RE_SYNTAX_POSIX)
+ - RE_NOTBOL bol fails to match (conforming POSIX regex API)
+ - RE_NOTEOL eol fails to match (conforming POSIX regex API)
+ - SilcStack support compile/match without real memory allocations
*/
#include "silc.h"
#define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */
#define RE_NOTBOL 256 /* bol fails to match */
#define RE_NOTEOL 512 /* eol fails to match */
-#define RE_REPEAT 1024 /* bounded repeat, must be quoted without
- RE_NO_BK_VBAR */
+#define RE_REPEAT 1024 /* bounded repeat expression */
/* definitions for some common regexp styles */
#define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS)
#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR)
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
+#define RE_SYNTAX_POSIX (RE_SYNTAX_AWK|RE_REPEAT)
#define RE_SYNTAX_EMACS 0
-#define Sword 1
-#define Swhitespace 2
-#define Sdigit 4
-#define Soctaldigit 8
-#define Shexdigit 16
-
/* Registers */
typedef struct re_registers {
int start[RE_NREGS]; /* start offset of region */
Ropenrep, /* opening bounded repeat */
};
-static int re_compile_initialized = 0;
-static int regexp_syntax = 0;
-static unsigned char regexp_plain_ops[256];
-static unsigned char regexp_quoted_ops[256];
-static unsigned char regexp_precedences[Rnum_ops];
-static int regexp_context_indep_ops;
-static int regexp_ansi_sequences;
+#define Sword 1
+#define Swhitespace 2
+#define Sdigit 4
+#define Soctaldigit 8
+#define Shexdigit 16
#define NUM_LEVELS 5 /* number of precedence levels in use */
#define MAX_NESTING 100 /* max nesting level of operators */
-
-#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
-
-unsigned char re_syntax_table[256];
-
-void re_compile_initialize(void)
+#define SYNTAX(ch) silc_re_syntax_table[(unsigned char)(ch)]
+
+static int silc_regexp_syntax = RE_SYNTAX_POSIX;
+static int silc_regexp_context_indep_ops;
+static int silc_regexp_ansi_sequences;
+static int silc_re_compile_initialized = 0;
+static unsigned char silc_re_syntax_table[256];
+static unsigned char silc_regexp_plain_ops[256];
+static unsigned char silc_regexp_quoted_ops[256];
+static unsigned char silc_regexp_precedencess[Rnum_ops];
+
+void silc_re_compile_initialize(void)
{
int a;
if (!syntax_table_inited)
{
syntax_table_inited = 1;
- memset(re_syntax_table, 0, 256);
+ memset(silc_re_syntax_table, 0, 256);
for (a = 'a'; a <= 'z'; a++)
- re_syntax_table[a] = Sword;
+ silc_re_syntax_table[a] = Sword;
for (a = 'A'; a <= 'Z'; a++)
- re_syntax_table[a] = Sword;
+ silc_re_syntax_table[a] = Sword;
for (a = '0'; a <= '9'; a++)
- re_syntax_table[a] = Sword | Sdigit | Shexdigit;
+ silc_re_syntax_table[a] = Sword | Sdigit | Shexdigit;
for (a = '0'; a <= '7'; a++)
- re_syntax_table[a] |= Soctaldigit;
+ silc_re_syntax_table[a] |= Soctaldigit;
for (a = 'A'; a <= 'F'; a++)
- re_syntax_table[a] |= Shexdigit;
+ silc_re_syntax_table[a] |= Shexdigit;
for (a = 'a'; a <= 'f'; a++)
- re_syntax_table[a] |= Shexdigit;
- re_syntax_table['_'] = Sword;
+ silc_re_syntax_table[a] |= Shexdigit;
+ silc_re_syntax_table['_'] = Sword;
for (a = 9; a <= 13; a++)
- re_syntax_table[a] = Swhitespace;
- re_syntax_table[' '] = Swhitespace;
+ silc_re_syntax_table[a] = Swhitespace;
+ silc_re_syntax_table[' '] = Swhitespace;
}
- re_compile_initialized = 1;
+ silc_re_compile_initialized = 1;
for (a = 0; a < 256; a++)
{
- regexp_plain_ops[a] = Rnormal;
- regexp_quoted_ops[a] = Rnormal;
+ silc_regexp_plain_ops[a] = Rnormal;
+ silc_regexp_quoted_ops[a] = Rnormal;
}
for (a = '0'; a <= '9'; a++)
- regexp_quoted_ops[a] = Rmemory;
- regexp_plain_ops['\134'] = Rquote;
- if (regexp_syntax & RE_NO_BK_PARENS)
+ silc_regexp_quoted_ops[a] = Rmemory;
+ silc_regexp_plain_ops['\134'] = Rquote;
+ if (silc_regexp_syntax & RE_NO_BK_PARENS)
{
- regexp_plain_ops['('] = Ropenpar;
- regexp_plain_ops[')'] = Rclosepar;
+ silc_regexp_plain_ops['('] = Ropenpar;
+ silc_regexp_plain_ops[')'] = Rclosepar;
}
else
{
- regexp_quoted_ops['('] = Ropenpar;
- regexp_quoted_ops[')'] = Rclosepar;
+ silc_regexp_quoted_ops['('] = Ropenpar;
+ silc_regexp_quoted_ops[')'] = Rclosepar;
}
- if (regexp_syntax & RE_NO_BK_VBAR)
- regexp_plain_ops['\174'] = Ror;
+ if (silc_regexp_syntax & RE_NO_BK_VBAR)
+ silc_regexp_plain_ops['\174'] = Ror;
else
- regexp_quoted_ops['\174'] = Ror;
- regexp_plain_ops['*'] = Rstar;
- if (regexp_syntax & RE_BK_PLUS_QM)
+ silc_regexp_quoted_ops['\174'] = Ror;
+ silc_regexp_plain_ops['*'] = Rstar;
+ if (silc_regexp_syntax & RE_BK_PLUS_QM)
{
- regexp_quoted_ops['+'] = Rplus;
- regexp_quoted_ops['?'] = Roptional;
+ silc_regexp_quoted_ops['+'] = Rplus;
+ silc_regexp_quoted_ops['?'] = Roptional;
}
else
{
- regexp_plain_ops['+'] = Rplus;
- regexp_plain_ops['?'] = Roptional;
+ silc_regexp_plain_ops['+'] = Rplus;
+ silc_regexp_plain_ops['?'] = Roptional;
}
- if (regexp_syntax & RE_NEWLINE_OR)
- regexp_plain_ops['\n'] = Ror;
- regexp_plain_ops['\133'] = Ropenset;
- regexp_plain_ops['\136'] = Rbol;
- regexp_plain_ops['$'] = Reol;
- regexp_plain_ops['.'] = Ranychar;
- if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
+ if (silc_regexp_syntax & RE_NEWLINE_OR)
+ silc_regexp_plain_ops['\n'] = Ror;
+ silc_regexp_plain_ops['\133'] = Ropenset;
+ silc_regexp_plain_ops['\136'] = Rbol;
+ silc_regexp_plain_ops['$'] = Reol;
+ silc_regexp_plain_ops['.'] = Ranychar;
+ if (!(silc_regexp_syntax & RE_NO_GNU_EXTENSIONS))
{
- regexp_quoted_ops['w'] = Rwordchar;
- regexp_quoted_ops['W'] = Rnotwordchar;
- regexp_quoted_ops['<'] = Rwordbeg;
- regexp_quoted_ops['>'] = Rwordend;
- regexp_quoted_ops['b'] = Rwordbound;
- regexp_quoted_ops['B'] = Rnotwordbound;
- regexp_quoted_ops['`'] = Rbegbuf;
- regexp_quoted_ops['\''] = Rendbuf;
+ silc_regexp_quoted_ops['w'] = Rwordchar;
+ silc_regexp_quoted_ops['W'] = Rnotwordchar;
+ silc_regexp_quoted_ops['<'] = Rwordbeg;
+ silc_regexp_quoted_ops['>'] = Rwordend;
+ silc_regexp_quoted_ops['b'] = Rwordbound;
+ silc_regexp_quoted_ops['B'] = Rnotwordbound;
+ silc_regexp_quoted_ops['`'] = Rbegbuf;
+ silc_regexp_quoted_ops['\''] = Rendbuf;
}
- if (regexp_syntax & RE_ANSI_HEX)
- regexp_quoted_ops['v'] = Rextended_memory;
+ if (silc_regexp_syntax & RE_ANSI_HEX)
+ silc_regexp_quoted_ops['v'] = Rextended_memory;
for (a = 0; a < Rnum_ops; a++)
- regexp_precedences[a] = 4;
- if (regexp_syntax & RE_TIGHT_VBAR)
+ silc_regexp_precedencess[a] = 4;
+ if (silc_regexp_syntax & RE_TIGHT_VBAR)
{
- regexp_precedences[Ror] = 3;
- regexp_precedences[Rbol] = 2;
- regexp_precedences[Reol] = 2;
+ silc_regexp_precedencess[Ror] = 3;
+ silc_regexp_precedencess[Rbol] = 2;
+ silc_regexp_precedencess[Reol] = 2;
}
else
{
- regexp_precedences[Ror] = 2;
- regexp_precedences[Rbol] = 3;
- regexp_precedences[Reol] = 3;
+ silc_regexp_precedencess[Ror] = 2;
+ silc_regexp_precedencess[Rbol] = 3;
+ silc_regexp_precedencess[Reol] = 3;
}
- if (regexp_syntax & RE_REPEAT)
+ if (silc_regexp_syntax & RE_REPEAT)
{
- if (regexp_syntax & RE_NO_BK_PARENS)
+ if (silc_regexp_syntax & RE_NO_BK_PARENS)
{
- regexp_plain_ops['{'] = Ropenrep;
+ silc_regexp_plain_ops['{'] = Ropenrep;
}
else
{
- regexp_quoted_ops['{'] = Ropenrep;
+ silc_regexp_quoted_ops['{'] = Ropenrep;
}
}
- regexp_precedences[Rclosepar] = 1;
- regexp_precedences[Rend] = 0;
- regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
- regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
+ silc_regexp_precedencess[Rclosepar] = 1;
+ silc_regexp_precedencess[Rend] = 0;
+ silc_regexp_context_indep_ops = (silc_regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
+ silc_regexp_ansi_sequences = (silc_regexp_syntax & RE_ANSI_HEX) != 0;
}
-int re_set_syntax(int syntax)
+int silc_re_set_syntax(int syntax)
{
int ret;
- ret = regexp_syntax;
- regexp_syntax = syntax;
- re_compile_initialize();
+ ret = silc_regexp_syntax;
+ silc_regexp_syntax = syntax;
+ silc_re_compile_initialize();
return ret;
}
-static int hex_char_to_decimal(int ch)
+static int silc_hex_char_to_decimal(int ch)
{
if (ch >= '0' && ch <= '9')
return ch - '0';
return 16;
}
-static int re_compile_fastmap_aux(unsigned char *code, int pos,
- unsigned char *visited,
- unsigned char *can_be_null,
- unsigned char *fastmap)
+static int silc_re_compile_fastmap_aux(unsigned char *code, int pos,
+ unsigned char *visited,
+ unsigned char *can_be_null,
+ unsigned char *fastmap)
{
int a;
int b;
a = (unsigned char)code[pos++];
a |= (unsigned char)code[pos++] << 8;
a = pos + (int)SHORT(a);
- return re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
+ return silc_re_compile_fastmap_aux(code, a, visited,
+ can_be_null, fastmap);
}
case Crepeat1:
{
}
}
-static int re_do_compile_fastmap(unsigned char *buffer, int used, int pos,
- unsigned char *can_be_null,
- unsigned char *fastmap, SilcRegex bufp)
+static int silc_re_do_compile_fastmap(unsigned char *buffer, int used, int pos,
+ unsigned char *can_be_null,
+ unsigned char *fastmap, SilcRegex bufp)
{
unsigned char small_visited[512], *visited;
int ret;
*can_be_null = 0;
memset(fastmap, 0, 256);
memset(visited, 0, used);
- ret = re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
+ ret = silc_re_compile_fastmap_aux(buffer, pos, visited,
+ can_be_null, fastmap);
if (visited != small_visited) {
silc_sfree(bufp->rstack, visited);
silc_stack_pop(bufp->rstack);
return ret == 0;
}
-int re_compile_fastmap(SilcRegex bufp)
+int silc_re_compile_fastmap(SilcRegex bufp)
{
if (!bufp->fastmap || bufp->fastmap_accurate)
return 0;
SILC_ASSERT(bufp->used > 0);
- if (!re_do_compile_fastmap(bufp->buffer,
- bufp->used,
- 0,
- &bufp->can_be_null,
- bufp->fastmap, bufp))
+ if (!silc_re_do_compile_fastmap(bufp->buffer,
+ bufp->used,
+ 0,
+ &bufp->can_be_null,
+ bufp->fastmap, bufp))
return -1;
if (bufp->buffer[0] == Cbol)
bufp->anchor = 1; /* begline */
*
*/
-static int re_optimize_star_jump(SilcRegex bufp, unsigned char *code)
+static int silc_re_optimize_star_jump(SilcRegex bufp, unsigned char *code)
{
unsigned char map[256];
unsigned char can_be_null;
SILC_ASSERT(p1[-3] == Cfailure_jump);
p2 = code;
/* p1 points inside loop, p2 points to after loop */
- if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
- (int)(p2 - bufp->buffer),
- &can_be_null, map, bufp))
+ if (!silc_re_do_compile_fastmap(bufp->buffer, bufp->used,
+ (int)(p2 - bufp->buffer),
+ &can_be_null, map, bufp))
goto make_normal_jump;
/* If we might introduce a new update point inside the
return 1;
}
-static int re_optimize(SilcRegex bufp)
+static int silc_re_optimize(SilcRegex bufp)
{
unsigned char *code;
}
case Cstar_jump:
{
- if (!re_optimize_star_jump(bufp, code))
+ if (!silc_re_optimize_star_jump(bufp, code))
{
return 0;
}
{ \
unsigned char gethex_ch, gethex_value; \
NEXTCHAR(gethex_ch); \
- gethex_value = hex_char_to_decimal(gethex_ch); \
+ gethex_value = silc_hex_char_to_decimal(gethex_ch); \
if (gethex_value == 16) \
goto hex_error; \
NEXTCHAR(gethex_ch); \
- gethex_ch = hex_char_to_decimal(gethex_ch); \
+ gethex_ch = silc_hex_char_to_decimal(gethex_ch); \
if (gethex_ch == 16) \
goto hex_error; \
(var) = gethex_value * 16 + gethex_ch; \
} \
}
-SilcResult re_compile_pattern(unsigned char *regex, int size, SilcRegex bufp)
+SilcResult silc_re_compile_pattern(unsigned char *regex, int size,
+ SilcRegex bufp)
{
int a;
int pos;
int op;
int current_level;
int level;
- int opcode;
+ int opcode = 0;
int pattern_offset = 0, alloc;
int starts[NUM_LEVELS * MAX_NESTING];
int starts_base;
int open_registers[RE_NREGS];
int beginning_context;
- if (!re_compile_initialized)
- re_compile_initialize();
+ if (!silc_re_compile_initialized)
+ silc_re_compile_initialize();
bufp->used = 0;
bufp->fastmap_accurate = 0;
bufp->uses_registers = 1;
NEXTCHAR(ch);
if (translate)
ch = translate[(unsigned char)ch];
- op = regexp_plain_ops[(unsigned char)ch];
+ op = silc_regexp_plain_ops[(unsigned char)ch];
if (op == Rquote)
{
NEXTCHAR(ch);
- op = regexp_quoted_ops[(unsigned char)ch];
- if (op == Rnormal && regexp_ansi_sequences)
+ op = silc_regexp_quoted_ops[(unsigned char)ch];
+ if (op == Rnormal && silc_regexp_ansi_sequences)
ANSI_TRANSLATE(ch);
}
}
- level = regexp_precedences[op];
+ level = silc_regexp_precedencess[op];
/* printf("ch='%c' op=%d level=%d current_level=%d
curlevstart=%d\n", ch, op, level, current_level,
CURRENT_LEVEL_START); */
case Rbol:
{
if (!beginning_context) {
- if (regexp_context_indep_ops)
+ if (silc_regexp_context_indep_ops)
goto op_error;
else
goto normal_char;
case Reol:
{
if (!((pos >= size) ||
- ((regexp_syntax & RE_NO_BK_VBAR) ?
+ ((silc_regexp_syntax & RE_NO_BK_VBAR) ?
(regex[pos] == '\174') :
(pos+1 < size && regex[pos] == '\134' &&
regex[pos+1] == '\174')) ||
- ((regexp_syntax & RE_NO_BK_PARENS)?
+ ((silc_regexp_syntax & RE_NO_BK_PARENS)?
(regex[pos] == ')'):
(pos+1 < size && regex[pos] == '\134' &&
regex[pos+1] == ')')))) {
- if (regexp_context_indep_ops)
+ if (silc_regexp_context_indep_ops)
goto op_error;
else
goto normal_char;
case Roptional:
{
if (beginning_context) {
- if (regexp_context_indep_ops)
+ if (silc_regexp_context_indep_ops)
goto op_error;
else
goto normal_char;
case Rstar:
case Rplus:
{
- store_jump:
if (beginning_context) {
- if (regexp_context_indep_ops)
+ if (silc_regexp_context_indep_ops)
goto op_error;
else
goto normal_char;
}
if (CURRENT_LEVEL_START == pattern_offset)
break; /* ignore empty patterns for + and * */
+ store_jump:
ALLOC(9);
INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
pattern_offset + 6);
if (paren_depth <= 0)
goto parenthesis_error;
POP_LEVEL_STARTS;
- current_level = regexp_precedences[Ropenpar];
+ current_level = silc_regexp_precedencess[Ropenpar];
paren_depth--;
if (paren_depth < num_open_registers)
{
while (ch != '\135' || firstchar)
{
firstchar = 0;
- if (regexp_ansi_sequences && ch == '\134')
+ if (silc_regexp_ansi_sequences && ch == '\134')
{
NEXTCHAR(ch);
ANSI_TRANSLATE(ch);
Rnormals and one Rplus. The third is compiled as n-1 Rnormals
and m-n Rnormals with Roptionals. 0 values have special
compilation. */
- int min, max, i;
+ int min, max, i, alen = 2;
if (pos >= size)
- goto op_error;
+ goto normal_char; /* Consider literal */
/* Get the preceding atom */
if (pos < 2)
- goto op_error;
+ goto normal_char; /* Consider literal */
pos -= 2;
NEXTCHAR(a);
+ if (translate)
+ a = translate[(unsigned char)a];
+ op = silc_regexp_plain_ops[(unsigned char)a];
+
+ if (op == Ranychar) {
+ opcode = Canychar;
+ a = 0;
+ alen = 1;
+ }
+
NEXTCHAR(ch);
/* Get min value */
NEXTCHAR(ch);
if (!isdigit(ch))
- goto repeat_value_error;
+ goto normal_char; /* Consider literal */
min = ch - '0';
NEXTCHAR(ch);
while (isdigit(ch)) {
/* Store min - 1 many Cexacts. */
for (i = 0; i < min - 1; i++) {
SET_LEVEL_START;
- ALLOC(2);
- STORE(Cexact);
- STORE((unsigned char)a);
+ ALLOC(alen);
+ STORE(opcode);
+ if (a)
+ STORE((unsigned char)a);
}
break;
}
/* Store min - 1 many Cexacts. */
for (i = 0; i < min - 1; i++) {
SET_LEVEL_START;
- ALLOC(2);
- STORE(Cexact);
- STORE((unsigned char)a);
+ ALLOC(alen);
+ STORE(opcode);
+ if (a)
+ STORE((unsigned char)a);
}
/* Store Rplus */
/* Store min - 1 many Cexacts. */
for (i = 0; min && i < min - 1; i++) {
SET_LEVEL_START;
- ALLOC(2);
- STORE(Cexact);
- STORE((unsigned char)a);
+ ALLOC(alen);
+ STORE(opcode);
+ if (a)
+ STORE((unsigned char)a);
}
/* Store max - min Cexacts and Roptionals. */
for (i = 0; i < max - min; i++) {
SET_LEVEL_START;
- ALLOC(2);
- STORE(Cexact);
- STORE((unsigned char)a);
+ ALLOC(alen);
+ STORE(opcode);
+ if (a)
+ STORE((unsigned char)a);
ALLOC(3);
INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
pattern_offset + 3);
ALLOC(1);
STORE(Cend);
SET_FIELDS;
- if (!re_optimize(bufp))
+ if (!silc_re_optimize(bufp))
return SILC_ERR;
return SILC_OK;
if (translate) \
var = translate[var]
-int re_match(SilcRegex bufp, unsigned char *string, int size, int pos,
- regexp_registers_t old_regs, unsigned int flags)
+int silc_re_match(SilcRegex bufp, unsigned char *string, int size, int pos,
+ regexp_registers_t old_regs, unsigned int flags)
{
unsigned char *code;
unsigned char *translate;
#undef PREFETCH
#undef NEXTCHAR
-int re_search(SilcRegex bufp, unsigned char *string, int size, int pos,
- int range, regexp_registers_t regs, unsigned int flags)
+int silc_re_search(SilcRegex bufp, unsigned char *string, int size, int pos,
+ int range, regexp_registers_t regs, unsigned int flags)
{
unsigned char *fastmap;
unsigned char *translate;
fastmap = bufp->fastmap;
translate = bufp->translate;
if (fastmap && !bufp->fastmap_accurate) {
- if (re_compile_fastmap(bufp))
+ if (silc_re_compile_fastmap(bufp))
return -2;
}
continue;
}
SILC_ASSERT(pos >= 0 && pos <= size);
- ret = re_match(bufp, string, size, pos, regs, flags);
+ ret = silc_re_match(bufp, string, size, pos, regs, flags);
if (ret >= 0)
return pos;
if (ret == -2)
SilcRegexFlags flags)
{
SilcResult ret;
- int syntax = 0;
if (!regexp || !regex) {
silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
if (regexp->rstack)
regexp->rstack = silc_stack_alloc(512, regexp->rstack);
- /* Set syntax */
- syntax |= (RE_CONTEXT_INDEP_OPS | RE_NO_BK_PARENS |
- RE_NO_BK_VBAR | RE_REPEAT);
- re_set_syntax(syntax);
-
/* Compile */
- ret = re_compile_pattern((char *)regex, strlen(regex), regexp);
+ ret = silc_re_compile_pattern((char *)regex, strlen(regex), regexp);
if (ret != SILC_OK)
silc_set_errno(ret);
+ if (ret != SILC_OK) {
+ silc_regex_free(regexp);
+ regexp->rstack = NULL;
+ regexp->buffer = NULL;
+ }
+
return ret == SILC_OK;
}
f |= RE_NOTEOL;
/* Search */
- ret = re_search(regexp, (char *)string, string_len, 0, string_len,
- num_match ? ®s : NULL, f);
+ ret = silc_re_search(regexp, (char *)string, string_len, 0, string_len,
+ num_match ? ®s : NULL, f);
if (ret < 0) {
if (ret == -1)
silc_set_errno(SILC_ERR_NOT_FOUND);
SilcRegexStruct reg;
SilcRegexMatch m = NULL;
SilcBuffer buf, *rets = NULL;
+ SilcStack stack;
int i, c = 0;
/* Compile */
if (!silc_regex_compile(®, regex, 0))
return FALSE;
+ stack = reg.rstack;
+ silc_stack_push(stack, NULL);
+
/* Get match pointers */
if (match) {
- rets = silc_malloc(sizeof(*rets));
- if (!rets)
+ rets = silc_smalloc(stack, sizeof(*rets));
+ if (!rets) {
+ silc_stack_pop(stack);
+ silc_regex_free(®);
return FALSE;
+ }
rets[c++] = match;
while ((buf = va_arg(va, SilcBuffer))) {
- rets = silc_realloc(rets, (c + 1) * sizeof(*rets));
- if (!rets)
+ rets = silc_srealloc(stack, c * sizeof(*rets),
+ rets, (c + 1) * sizeof(*rets));
+ if (!rets) {
+ silc_stack_pop(stack);
+ silc_regex_free(®);
return FALSE;
+ }
rets[c++] = buf;
}
- m = silc_malloc(c * sizeof(*m));
+ m = silc_smalloc(stack, c * sizeof(*m));
if (!m) {
- silc_free(rets);
+ silc_sfree(stack, rets);
+ silc_stack_pop(stack);
+ silc_regex_free(®);
return FALSE;
}
}
/* Match */
if (!silc_regex_match(®, string, string_len, c, m, 0)) {
- silc_free(m);
- silc_free(rets);
+ silc_sfree(stack, m);
+ silc_sfree(stack, rets);
+ silc_stack_pop(stack);
+ silc_regex_free(®);
return FALSE;
}
/* Return matches */
for (i = 0; i < c; i++) {
- if (m[i].start == -1)
+ if (m[i].start == -1) {
+ silc_buffer_set(rets[i], NULL, 0);
continue;
+ }
silc_buffer_set(rets[i], (unsigned char *)string + m[i].start,
m[i].end - m[i].start);
}
- silc_free(m);
- silc_free(rets);
+ silc_sfree(stack, m);
+ silc_sfree(stack, rets);
+ silc_stack_pop(stack);
+ silc_regex_free(®);
return TRUE;
}
return ret;
}
+
+/***************************** Substitution API *****************************/
+
+/* Regexp to parse sed substitution command syntax */
+#define SILC_REGEXP_SUBST \
+ "^(/?.+/?[^!s]|[0-9]+|\\$)?(!?s)(/)(.*[^\\])?(/)(.*[^\\])?(/)(!?.+)?"
+
+/* Substitution context */
+typedef struct {
+ SilcInt32 addr_number; /* Line number to match, -1 for last line */
+ SilcUInt32 line; /* Current line number */
+ char *str_regexp; /* REGEXP to match */
+ SilcBufferRegexFlags match_flags; /* Match flags */
+ SilcBufferRegexFlags addr_flags; /* ADDR flags */
+ SilcBuffer rep; /* REPLACEMENT */
+} SilcSubstContext;
+
+/* Function to check the ADDR match and do rest of the match and
+ substitution. */
+
+static int silc_subst_addr(SilcStack stack, SilcBuffer buffer, void *value,
+ void *context)
+{
+ SilcSubstContext *ctx = context;
+
+ ctx->line++;
+
+ /* If NUMBER was set in ADDR, match for specific line number */
+ if (ctx->addr_number > 0 && ctx->addr_number != ctx->line &&
+ !(ctx->addr_flags & SILC_STR_REGEX_NOT))
+ return 0;
+ if (ctx->addr_number > 0 && ctx->addr_number == ctx->line &&
+ ctx->addr_flags & SILC_STR_REGEX_NOT)
+ return 0;
+
+ /* Check for last line if ADDR was '$' */
+ if (buffer->tail != buffer->end && ctx->addr_number == -1 &&
+ !(ctx->addr_flags & SILC_STR_REGEX_NOT))
+ return 0;
+ if (buffer->tail == buffer->end && ctx->addr_number == -1 &&
+ ctx->addr_flags & SILC_STR_REGEX_NOT)
+ return 0;
+
+ /* Match and replace */
+ return silc_buffer_format(buffer,
+ SILC_STR_REGEX(ctx->str_regexp, ctx->match_flags),
+ SILC_STR_REPLACE(silc_buffer_data(ctx->rep) ?
+ silc_buffer_data(ctx->rep) :
+ (unsigned char *)"",
+ silc_buffer_len(ctx->rep)),
+ SILC_STR_END, SILC_STR_END);
+}
+
+/* Matching and substitution ala sed. */
+
+SilcBool silc_subst(SilcBuffer buffer, const char *subst)
+{
+ SilcSubstContext ctx;
+ SilcBufferStruct match, addr, command, exp_start, exp, exp_end;
+ SilcBufferStruct rep, rep_end, flags;
+ SilcBufferRegexFlags addr_flags = 0, match_flags = 0;
+ char *str_addr = "";
+ int ret = -1;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (!buffer || !subst) {
+ silc_set_errno(SILC_ERR_INVALID_ARGUMENT);
+ goto out;
+ }
+
+ SILC_LOG_DEBUG(("Substitution '%s'", subst));
+
+ /* Parse the expression syntax */
+ if (!silc_regex(subst, SILC_REGEXP_SUBST, &match, &addr, &command,
+ &exp_start, &exp, &exp_end, &rep, &rep_end, &flags, NULL)) {
+ silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid substitution expression");
+ goto out;
+ }
+
+ /* Check address syntax */
+ if (silc_buffer_len(&addr)) {
+ if (*silc_buffer_data(&addr) == '/') {
+ silc_buffer_pull(&addr, 1);
+ if (addr.tail[-1] != '/') {
+ silc_set_errno_reason(SILC_ERR_SYNTAX,
+ "Invalid address syntax, missing '/'");
+ goto out;
+ }
+ silc_buffer_push_tail(&addr, 1);
+
+ if (!silc_buffer_len(&addr)) {
+ silc_set_errno_reason(SILC_ERR_SYNTAX,
+ "Invalid address syntax, missing regular "
+ "expression");
+ goto out;
+ }
+ str_addr = silc_memdup(silc_buffer_data(&addr),
+ silc_buffer_len(&addr));
+
+ } else if (*silc_buffer_data(&addr) == '$' &&
+ silc_buffer_len(&addr) == 1) {
+ ctx.addr_number = -1;
+
+ } else if (isdigit((int)*silc_buffer_data(&addr))) {
+ ctx.addr_number = *silc_buffer_data(&addr) - '0';
+ silc_buffer_pull(&addr, 1);
+ while (silc_buffer_len(&addr) &&
+ isdigit((int)*silc_buffer_data(&addr))) {
+ ctx.addr_number *= 10;
+ ctx.addr_number += *silc_buffer_data(&addr) - '0';
+ silc_buffer_pull(&addr, 1);
+ }
+
+ if (silc_buffer_len(&addr)) {
+ silc_set_errno_reason(SILC_ERR_SYNTAX,
+ "Invalid address syntax, not a number");
+ goto out;
+ }
+
+ if (ctx.addr_number == 0) {
+ silc_set_errno_reason(SILC_ERR_SYNTAX,
+ "Invalid address syntax, line address is 0");
+ goto out;
+ }
+
+ } else {
+ silc_set_errno_reason(SILC_ERR_SYNTAX, "Unsupported address syntax");
+ goto out;
+ }
+ }
+
+ /* Check command syntax */
+ if (!silc_buffer_len(&command) || silc_buffer_len(&command) > 2) {
+ silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid commmand");
+ goto out;
+ }
+ if ((silc_buffer_len(&command) == 1 &&
+ !silc_buffer_memcmp(&command, "s", 1)) ||
+ (silc_buffer_len(&command) == 2 &&
+ !silc_buffer_memcmp(&command, "!s", 2))) {
+ silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid command");
+ goto out;
+ }
+ if (silc_buffer_len(&command) == 2)
+ addr_flags |= SILC_STR_REGEX_NOT;
+
+ /* Check REGEXP syntax */
+ if (!silc_buffer_len(&exp_start) ||
+ !silc_buffer_memcmp(&exp_start, "/", 1)) {
+ silc_set_errno_reason(SILC_ERR_SYNTAX,
+ "Invalid substitution syntax, missing '/'");
+ goto out;
+ }
+ if (!silc_buffer_len(&exp_end) ||
+ !silc_buffer_memcmp(&exp_end, "/", 1)) {
+ silc_set_errno_reason(SILC_ERR_SYNTAX,
+ "Invalid substitution syntax, missing '/'");
+ goto out;
+ }
+
+ /* Check FLAGS syntax */
+ if (silc_buffer_len(&flags)) {
+ if (silc_buffer_len(&flags) > 1) {
+ silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid flags");
+ goto out;
+ }
+
+ /* Check supported flags */
+ if (silc_buffer_len(&flags) == 1) {
+ if (silc_buffer_memcmp(&flags, "g", 1)) {
+ match_flags |= SILC_STR_REGEX_ALL;
+ } else {
+ silc_set_errno_reason(SILC_ERR_SYNTAX, "Unsupported flag");
+ goto out;
+ }
+ }
+ }
+
+ /* Set flags */
+ match_flags |= SILC_STR_REGEX_INCLUSIVE;
+ addr_flags |= SILC_STR_REGEX_NL | SILC_STR_REGEX_NO_ADVANCE;
+
+ ctx.str_regexp = silc_memdup(silc_buffer_data(&exp),
+ silc_buffer_len(&exp));
+ ctx.addr_flags = addr_flags;
+ ctx.match_flags = match_flags;
+
+ /* Unescape escapes from REPLACEMENT */
+ ctx.rep = silc_buffer_copy(&rep);
+ if (!ctx.rep)
+ goto out;
+ if (silc_buffer_len(ctx.rep))
+ silc_buffer_format(ctx.rep,
+ SILC_STR_REGEX("\\\\/", (SILC_STR_REGEX_ALL |
+ SILC_STR_REGEX_INCLUSIVE)),
+ SILC_STR_REPLACE("/", 1),
+ SILC_STR_END, SILC_STR_END);
+
+ /* If NUMBER or $ is specified, handle NOT flag in the silc_subst_addr */
+ if (ctx.addr_number)
+ addr_flags &= ~SILC_STR_REGEX_NOT;
+
+ SILC_LOG_DEBUG(("ADDR '%s' flags 0x%x, NUMBER %d", str_addr, addr_flags,
+ ctx.addr_number));
+ SILC_LOG_DEBUG(("REGEXP '%s' flags 0x%x", ctx.str_regexp, match_flags));
+
+ /* Match and replace */
+ ret = silc_buffer_format(buffer,
+ SILC_STR_REGEX(str_addr, addr_flags),
+ SILC_STR_FUNC(silc_subst_addr, NULL, &ctx),
+ SILC_STR_END, SILC_STR_END);
+
+ out:
+ if (str_addr && strlen(str_addr))
+ silc_free(str_addr);
+ silc_free(ctx.str_regexp);
+ silc_buffer_free(ctx.rep);
+
+ return ret >= 0 ? TRUE : FALSE;
+}