Logo Search packages:      
Sourcecode: hexer version File versions  Download package

regex.c

/* regex.c  8/19/1995
 * Regular expressions, version 0.3
 *
 * This file provides a set of functions for dealing with binary regular
 * expressions, where binary means that some extesions have been made to
 * the standard UN*X-regular expressions making it possible to search for
 * an arbitrary byte pattern.
 *
 * NOTE:  In some esoteric cases the function `regex_match()' does not find
 *   the longest string that matches a given regex.  Example:
 *   the expression
 *     a*\(abcd\|bc\)
 *   doesn't not match the whole string `aaabcd' but only the substring
 *   `aaabc`.  I'm not sure if this is a bug, since `vi' and 'vim' do the
 *   same.
 */

/* Copyright (c) 1995,1996 Sascha Demetrio
 * Copyright (c) 2009 Peter Pentchev
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *    If you modify any part of HEXER and resitribute it, you must add
 *    a notice to the `README' file and the modified source files containing
 *    information about the  changes you made.  I do not want to take
 *    credit or be blamed for your modifications.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *    If you modify any part of HEXER and resitribute it in binary form,
 *    you must supply a `README' file containing information about the
 *    changes you made.
 * 3. The name of the developer may not be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * HEXER WAS DEVELOPED BY SASCHA DEMETRIO.
 * THIS SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
 * NOT MAKE USE OF THIS WORK.
 *
 * DISCLAIMER:
 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE DEVELOPER BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "defs.h"

#undef EOF

#include "regex.h"

#define REGEX_MAXMATCH_DFL (1 << 10) /* default value for `rx_maxmatch'. */
#define REGEX_MAX_PARS (64) /* the maximum number of opened parentheses */
#define REGEX_MAX_BRANCH (64) /* the maximum number of branches per command */
#define REGEX_BLOCKSIZE_EXP 12

#define REGEX_BLOCKSIZE (1 << REGEX_BLOCKSIZE_EXP)
#define REGEX_BLOCKMASK (REGEX_BLOCKSIZE - 1)

/* The linear encoding of the RE generated by `regex_compile()' and
 * executed by `regex_match()' is:
 * (long ro)    `ro' is the offset of the replace code of the expression.
 * <search expression>
 *              the search expression may contain any of the commands below;
 *              each expression and subexpression is terminated with an
 *              `EOX' (end of expression) command.
 * (long 0)     a zero long is placed between the search expression and the
 *              replace expression.  there's no technical reason for that,
 *              it just makes the debugging a little bit easier.
 * <replace expression>
 *              the replace expression is made up of `STRING' and
 *              `BACKREF'-commands. it's terminated with an `EOX'-command.
 *              this part may be empty.
 * (long 0)     a trailing 0, just for fun.
 */

enum regex_e {  /* arguments
                 *   description. */

  ANY_CHAR = 1, /*   matches any character. */
  ANY_OF,       /* (char n) <list of `n' characters>
                 *   matches any of the characters in the list.  the list
             *   has to be sorted. */
  ANY_BUT,      /* (char n) <list of `n' characters>
                 *   matches any but the listed characters.  the list has to
             *   be sorted. */
  CHAR,         /* (char c)
                 *   matches the character `c'. */
  STRING,       /* (long n) <string of length `n'>
                 *   matches the given string exactly.  the null-character is
             *   *not* interpreted as the end of the string. */
  PAR_OPEN,     /*   opening parenthesis. */
  PAR_CLOSE,    /* (long slot)
                 *   closing parenthesis.  the input matched since the last
             *   unmatched opening parenthesis is stored in slot `n'. */
  BACKREF,      /* (long slot)
                 *   matches the string stored in the given slot exactly.
             *   if the slot is undefined, it matches the empty string. */
  BRANCH,       /* (long n) (long m)
                 *   the following `n' bytes are two (`EOX'-terminated,
             *   respectively) expressions, the first of which is `m'
             *   bytes long.  these expressions are read as two alternate
             *   branches. */
  REPEAT,       /* (long min) (long max) (long n)
                 *   the following `n' bytes are an `EOX'-terminated
             *   expression E.  the `REPEAT'-expression R matches, if E
             *   matches at least `min' succsessive strings.  R matches
             *   at most `max' succsessive E strings.  if `max < 0',
             *   `max' is set to `rx_maxmatch'. */
  FIXREPEAT,    /* (long min) (long max) (long len) (long n)
                 *   works like `REPEAT', but all matches found have to be
             *   of the same (fixed) length `len'.  if `max < 0',
             *   `max' is set to infinity, i.e. the number of matches
             *   is not limited by `rx_maxmatch'. */
  BEGIN_WORD,   /*   matches the empty string, if it is preceded immediately
                 *   by the beginning of the buffer/file or a
             *   non-alphanumerical character that is not an underscore. */
  END_WORD,     /*   matches the empty string, if it is followed immediately
                 *   by the end of the buffer/file or a non-alphanumerical
             *   character that is not an underscore. */
  BEGIN_LINE,   /*   matches the empty string at the beginning of a line. */
  END_LINE,     /*   matches the empty string at the end of a line.
                 *   `\n' and `\r' are treated as end-of-line markers. */
  EOF,          /*   matches the empty string at the end of the file. */
  EOX           /*   end-of-expression.  */
};

#define SIZE_BRANCH 3
#define SIZE_REPEAT 4
#define SIZE_FIXREPEAT 5

enum error_e {
  E_no_error = 0,
  E_invalid_range,
  E_invalid_character,
  E_unmatched_opening_parenthesis,
  E_unmatched_closing_parenthesis,
  E_operator_without_operand,
  E_malformed_operator,
  E_operand_with_variable_length,
  E_unmatched_bracket,
  E_interrupt
};

char *rx_error_msg[] = {
  "", /* no error */ "invalid range", "invalid character", "unmatched `('",
  "unmatched `)'", "operator without operand", "malformed operator",
  "operand of `++' and `**' must have fixed length", "unmatched `['",
  "interrupted", 0
};

int rx_error;
static int interrupt = 0;
volatile int *rx_interrupt = &interrupt;

#undef isoct
#undef ishex
#define isoct(x) ((x) >= '0' && (x) < '8')
#define ishex(x) (isdigit(x) || (tolower(x) >= 'a' && tolower(x) <= 'f'))

#undef u_char
#undef u_short
#undef u_long
#define u_char unsigned char
#define u_short unsigned short
#define u_long unsigned long

static long (*rx_read)(char *target, long count);
  /* pointer to a function used for reading from the buffer/file.
   */
static long (*rx_seek)(long position);
  /* pointer to a function used for setting the pointer in the buffer/file.
   */
static long (*rx_tell)(void);
  /* pointer to a function returning the current position if the buffer/file.
   */
static long rx_size = 0;
  /* `rx_size == 0': the end of the buffer/file has not yet been hit.
   *   that means there are at least `REGEX_BLOCKSIZE' unread characters
   *   left.
   * `rx_size > 0': the end of the buffer/file has been hit.  the size of
   *   the buffer/file is `rx_size'.
   */
static int rx_start;
  /* character that a match has to start with.  if `rx_start == -1', matches
   * may start with different characters.
   */
static long rx_match_skip;
  /* if `regex_match()' (`regex_match_()' actually) matches an expression
   * (or subexpression), `rx_match_skip' is set to the position next
   * to the last character of that match.
   */

int rx_nomagic = 0;
  /* `rx_nomagic == 1': all special characters exept `*' and `[' have to be
   *   prefixed with `\'.
   */
int rx_allmagic = 0;
  /* `rx_allmagic == 1': special characters don't have to be prefixed
   *   with `\'.
   */
long rx_maxmatch = REGEX_MAXMATCH_DFL;
  /* `rx_maxmatch' is the maximum number of matches a repeat operator
   * can match, i.e. the search string `.*' matches at most `rx_maxmatch'
   * characters.
   */

int rx_special_nl = 0;
  /* if `rx_special_nl' is set, the newline character (0xa) is treated as a
   * special cahacter, i.e. the dot and `ANY_BUT'-ranges ("[^...]") do
   * *not* match a newline character.
   */

static char buffer_[3 * REGEX_BLOCKSIZE];
static char *buffer = buffer_ + REGEX_BLOCKSIZE;
static char *bp;
static long buffer_base;

#define REGEX_ADVANCE {                                                       \
  if (bp >= buffer + REGEX_BLOCKSIZE) {                                       \
    long ii;                                                                   \
    rx_memmove(buffer - REGEX_BLOCKSIZE, buffer, 2 * REGEX_BLOCKSIZE);        \
    buffer_base += REGEX_BLOCKSIZE;                                           \
    rx_seek(buffer_base + REGEX_BLOCKSIZE);                                   \
    ii = rx_read(buffer + REGEX_BLOCKSIZE, REGEX_BLOCKSIZE);                   \
    if (ii < REGEX_BLOCKSIZE) {                                                \
      rx_size = buffer_base + REGEX_BLOCKSIZE + ii;                            \
      memset(buffer + REGEX_BLOCKSIZE + ii, 0, REGEX_BLOCKSIZE - ii);           \
    }                                                                         \
    bp -= REGEX_BLOCKSIZE;                                                    \
  } else if (bp < buffer) {                                                   \
    rx_memmove(buffer + REGEX_BLOCKSIZE, buffer, REGEX_BLOCKSIZE);            \
    buffer_base -= REGEX_BLOCKSIZE;                                           \
    if (position >= REGEX_BLOCKSIZE) {                                        \
      rx_seek(buffer_base - REGEX_BLOCKSIZE);                                 \
      rx_read(buffer - REGEX_BLOCKSIZE, 2 * REGEX_BLOCKSIZE);                 \
    } else {                                                                  \
      rx_seek(buffer_base);                                                   \
      rx_read(buffer, REGEX_BLOCKSIZE);                                       \
    }                                                                         \
    bp += REGEX_BLOCKSIZE;                                                    \
  }                                                                           \
}

#if !HAVE_MEMMOVE
  static void
rx_memmove(char *t, const char *s, const long count)
{
  register long i;

  if (t > s)
    for (i = count - 1; i >= 0; --i) t[i] = s[i];
  else
    for (i = 0; i < count; ++i) t[i] = s[i];
}
/* rx_memmove */
#else
#define rx_memmove(a, b, c) (void)memmove(a, b, c)
#endif

  int
regex_init(long (*read)(char *, long), long (*seek)(long), long (*tell)(void))
{
  rx_read = read;
  rx_seek = seek;
  rx_tell = tell;
  return 0;
}
/* regex_init */

  static int
regex_set_position(long position)
  /* NOTE: `bp' and `buffer_base' *must* be initialized before calling
   *   `regex_set_position'.
   */
{
  long i;

  if (position - buffer_base >= 0) {
    if (position - buffer_base > REGEX_BLOCKSIZE) {
      if (position - buffer_base < 2 * REGEX_BLOCKSIZE) {
      bp = buffer + (position & REGEX_BLOCKMASK);
      rx_memmove(buffer - REGEX_BLOCKSIZE, buffer, 2 * REGEX_BLOCKSIZE);
      buffer_base += REGEX_BLOCKSIZE;
      rx_seek(buffer_base);
      i = rx_read(buffer + REGEX_BLOCKSIZE, REGEX_BLOCKSIZE);
      if (i < REGEX_BLOCKSIZE) {
        rx_size = buffer_base + i;
        memset(buffer + REGEX_BLOCKSIZE + i, 0, REGEX_BLOCKSIZE - i);
      }
      return 0;
      } else
        goto rebuild_buffer;
    }
    bp = buffer + (position & REGEX_BLOCKMASK);
    return 0;
  }

rebuild_buffer:
  bp = buffer + (position & REGEX_BLOCKMASK);
  buffer_base = position & ~REGEX_BLOCKMASK;
  rx_seek(buffer_base);
  i = rx_read(buffer, 2 * REGEX_BLOCKSIZE);
  if (i < 2 * REGEX_BLOCKSIZE) {
    rx_size = buffer_base + i;
    memset(buffer + i, 0, 2 * REGEX_BLOCKSIZE - i);
  }
  return 0;
}
/* regex_set_position */

#define RX_NSLOTS 256

static long rx_store_begin[RX_NSLOTS];
static long rx_store_end[RX_NSLOTS];
static char *rx_store_match[RX_NSLOTS];
static int rx_store_initialized = 0;

  static int
regex_store(long slot, long begin, long end)
  /* The text between the positions `begin' (incl.) and `end' (excl.) is
   * stored in slot `slot'.  (Actually only the `begin' and `end' positions
   * are stored.)
   */
{
  assert(slot >= 0 && slot < RX_NSLOTS);
  assert(end >= begin);
  if (!rx_store_initialized) {
    memset(rx_store_begin, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_end, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_match, 0, RX_NSLOTS * sizeof(char *));
    rx_store_initialized = 1;
  }
  if (rx_store_match[slot]) {
    free((char *)rx_store_match[slot]);
    rx_store_match[slot] = 0;
  }
  rx_store_begin[slot] = begin;
  rx_store_end[slot] = end;
  return 0;
}
/* regex_store */

  static char *
regex_ref(int slot)
  /* The contents of undefined slots default to an empty string, therefore
   * the return value is always a valid (non-zero) pointer.
   */
{
  long position, count;

  assert(slot >= 0 && slot < RX_NSLOTS);
  if (!rx_store_initialized) {
    memset(rx_store_begin, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_end, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_match, 0, RX_NSLOTS * sizeof(char *));
    rx_store_initialized = 1;
  }
  position = rx_store_begin[slot];
  count = rx_store_end[slot] - position;
  if (!rx_store_match[slot] && count) {
    rx_store_match[slot] = malloc(count);
    rx_seek(position);
    rx_read(rx_store_match[slot], count);
    return rx_store_match[slot];
  } else if (rx_store_match[slot])
    return rx_store_match[slot];
  else
    return "";
}
/* regex_ref */

  static long
regex_ref_len(int slot)
  /* The length of an undefined slot is 0.
   */
{
  long position, count;

  assert(slot >= 0 && slot < RX_NSLOTS);
  if (!rx_store_initialized) {
    memset(rx_store_begin, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_end, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_match, 0, RX_NSLOTS * sizeof(char *));
    rx_store_initialized = 1;
  }
  position = rx_store_begin[slot];
  count = rx_store_end[slot] - position;
  return count;
}
/* regex_ref_len */

  static void
regex_clear(void)
  /* clear all slots.
   */
{
  int i;

  if (!rx_store_initialized) {
    memset(rx_store_begin, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_end, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_match, 0, RX_NSLOTS * sizeof(char *));
    rx_store_initialized = 1;
  } else {
    for (i = 0; i < RX_NSLOTS; ++i) {
      if (rx_store_match[i]) {
      free((char *)rx_store_match[i]);
      rx_store_match[i] = 0;
      }
      rx_store_begin[i] = rx_store_end[i] = 0;
    }
  }
}
/* regex_clear */

  int
regex_reset()
{
  rx_error = 0;

  return 0;
}
/* regex_reset */

  long
regex_search(regex, begin, end, start, direction,
             replace_str, replace_len, match_len)
  long *regex;
  long begin;
  long end;
  long start;
  int direction; /* >=0: forward; <0: reverse. */
  char **replace_str;
  long *replace_len;  /* the replace-string for the match is written to
                   * `*replace_str'/`replace_len'.  the memory for
                   * `*replace_str' is allocated by `regex_match()'
                   * via `malloc()'. */
  long *match_len;
{
  long position;
  int i;
  char *bp1;
  long base1;

  direction = direction < 0 ? -1 : 1;
  if (end < 0) end = (u_long)~0 >> 1;
  position = start;

  /* initialize the buffer */
  bp = buffer + (position & REGEX_BLOCKMASK);
  buffer_base = position & ~REGEX_BLOCKMASK;
  rx_seek(buffer_base);
  i = rx_read(buffer, 2 * REGEX_BLOCKSIZE);
  if (i < 2 * REGEX_BLOCKSIZE) {
    rx_size = buffer_base + i;
    memset(buffer + i, 0, 2 * REGEX_BLOCKSIZE - i);
  } else
    rx_size = 0;

  if (rx_size && end > rx_size) end = rx_size;
  if (rx_size && position >= rx_size) position = rx_size - 1;
  while (position < end && position >= begin) {
    if (rx_start >= 0)
      while (*bp != rx_start && position < end) {
      if (*rx_interrupt) {
        rx_error = (int)E_interrupt;
          *rx_interrupt = 0;
        regex_clear();
        return -1;
      }
      bp += direction;
      position += direction;
      REGEX_ADVANCE;
      if (rx_size && end > rx_size) end = rx_size;
      }
    bp1 = bp;
    base1 = buffer_base;
    if (regex_match(regex, position, replace_str, replace_len, match_len))
      return position;
    if (rx_error) return -1;
    if (rx_size && end > rx_size) end = rx_size;
    position += direction;
    if (base1 == buffer_base) {
      bp = bp1 + direction;
      REGEX_ADVANCE;
    } else {
      regex_set_position(position);
      if (rx_size && end > rx_size) end = rx_size;
    }
  }
  return -1;
}
/* regex_search */

  static int
regex_match_(long *regex, long position, int *parstack, int *parsp)
{
  long *pp = regex;
  long p = position;

  enum regex_e opcode;
  long min, max;
  long operand;
  long operand2;
  char *bp1;
  long base1;
  int i;

  int local_parstack[REGEX_MAX_PARS];
  int local_parsp = *parsp;
    /* Stack of unmatched parentheses while running the regex-code.
     */

  for (i = local_parsp - 1; i >= 0; --i) local_parstack[i] = parstack[i];
  if (rx_error) return 0;
  regex_set_position(p);
  for (; !rx_error;) {
    if (*rx_interrupt) {
      rx_error = (int)E_interrupt;
      regex_clear();
      *rx_interrupt = 0;
      return 0;
    }
    if ((opcode = (enum regex_e)*pp++) == EOX) goto match;
    if (rx_size) {
      if (opcode == EOF && p >= rx_size) goto match;
      if (p > rx_size) goto fail;
      if (p == rx_size && opcode != END_WORD && opcode != END_LINE) goto fail;
    }
    assert(local_parsp >= 0);
    switch (opcode) {
      case ANY_CHAR:
        if (rx_special_nl && *bp == '\n') goto fail;
      ++p, ++bp;
      break;
      case CHAR:
      if (*bp++ != *pp++) goto fail;
      ++p;
      break;
      case STRING:
        operand = *pp++; /* length of the string. */
      if (rx_size && p + operand > rx_size) goto fail;
      /* if (memcmp(bp, pp, operand)) goto fail; */
        for (; operand; --operand) if (*pp++ != *bp++) goto fail;
        /*
      pp += operand;
      bp += operand;
        */
      p += operand;
      break;
      case ANY_OF:
        operand = *pp++; /* number of characters */
      for (i = 0; i < operand && *bp > *pp; ++pp, ++i);
      if (*bp != *pp) goto fail;
      pp += operand - i;
      ++p, ++bp;
      break;
      case ANY_BUT:
        operand = *pp++; /* number of characters */
        for (i = 0; i < operand && *bp > *pp; ++pp, ++i);
      if (*bp == *pp) goto fail;
        if (rx_special_nl && *bp == '\n') goto fail;
      pp += operand - i;
      ++p, ++bp;
      break;
      case PAR_OPEN:
        local_parstack[local_parsp++] = p;
      break;
      case PAR_CLOSE:
        operand = *pp++; /* number of the slot. */
      regex_store(operand, local_parstack[--local_parsp], p);
      break;
      case BACKREF:
        operand = *pp++; /* number of the slot. */
      if (memcmp(bp, regex_ref(operand), i = regex_ref_len(operand)))
          goto fail;
      bp += i;
      p += i;
      if (i >= REGEX_BLOCKSIZE) regex_set_position(p);
      break;
      case BRANCH:
        operand = *pp++;
        operand2 = *pp++;
      bp1 = bp;
      base1 = buffer_base;
      if (regex_match_(pp, p, local_parstack, &local_parsp)) {
        /* see if the first branch does the job... */
        if (regex_match_(pp + operand, rx_match_skip,
                           local_parstack, &local_parsp)) {
          p = rx_match_skip;
          goto match;
            /* NOTE:  this is not really correct, since we might have
             *   found a longer match if we had taken the other branch,
             *   but... `vi' does the same. */
        }
        }
      /* the first branch didn't work, so the second branch has to. */
      pp += operand2;
      if (base1 == buffer_base) bp = bp1; else regex_set_position(p);
      break;
      case FIXREPEAT:
      min = *pp++;
      max = *pp++;
      operand = *pp++;
      operand2 = *pp++;
      for (i = 0; i < min; ++i) {
        if (!regex_match_(pp, p, local_parstack, &local_parsp)) goto fail;
        assert(rx_match_skip - p == operand);
        p = rx_match_skip;
      }
      if (max < 0) max = (long)((u_long)~0 >> 1);
      if (max > min) {
        long count;
        long p_rec;
        for (count = 0, p_rec = p; i < max; ++i) {
          if (!regex_match_(pp, p, local_parstack, &local_parsp)) break;
          ++count;
          assert(rx_match_skip - p == operand);
          p = rx_match_skip;
        }
        pp += operand2;
        while (count && !rx_error) {
          if (regex_match_(pp, p, local_parstack, &local_parsp)) {
            p = rx_match_skip;
            goto match;
          }
          p = p_rec + --count * operand;
        }
        regex_set_position(p);
      } else
        pp += operand2;
      break;
      case REPEAT:
      min = pp[0];
      max = pp[1];
      operand = pp[2];
        pp += 3;
      for (i = 0; i < min; ++i) {
        if (!regex_match_(pp, p, local_parstack, &local_parsp)) goto fail;
        p = rx_match_skip;
        regex_set_position(p);
      }
      if (max < 0) max = rx_maxmatch;
      if (max > min) {
        long *stack, sp;
        stack = (long *)malloc((max - min + 1) * sizeof(long));
        for (sp = 0; max < 0 ? 1 : i < max; ++i) {
          if (!regex_match_(pp, p, local_parstack, &local_parsp)) break;
          stack[sp++] = p;
          p = rx_match_skip;
        }
        pp += operand;
        while (sp && !rx_error) {
          if (regex_match_(pp, p, local_parstack, &local_parsp)) {
            p = rx_match_skip;
            free((char *)stack);
            goto match;
          }
          p = stack[--sp];
        }
        regex_set_position(p);
        free((char *)stack);
      } else
        pp += operand;
      break;
      case BEGIN_WORD:
        if (!p) break;
        regex_set_position(--p);
      ++p, ++bp;
      if (isalnum(bp[-1]) || bp[-1] == '_') goto fail;
        break;
      case END_WORD:
        if (rx_size && p == rx_size) break;
      if (isalnum(bp[0]) || bp[0] == '_') goto fail;
        if (!isalnum(bp[-1]) && bp[-1] != '_') goto fail;
      break;
      case BEGIN_LINE:
        if (!p) break;
      regex_set_position(--p);
      ++p, ++bp;
      if (bp[-1] != '\n') goto fail;
      break;
      case END_LINE:
        if (rx_size && p == rx_size) break;
      if (bp[0] != '\n' && bp[0] != '\r') goto fail;
      break;
      case EOF:
        goto fail;
      default:
        abort();
    }
  } /* for */

  goto fail;

match:
  rx_match_skip = p;
  if (parstack) {
    for (i = local_parsp - 1; i >= 0; --i) parstack[i] = local_parstack[i];
    *parsp = local_parsp;
  }
  return 1;

fail:
  return 0;
}
/* regex_match_ */

  int
regex_match(regex, position, replace_str, replace_len, match_len)
  long *regex;        /* pointer to the regex-code to be processed. */
  long position;      /* position in the input stream. */
  char **replace_str;
  long *replace_len;  /* if a match is found at `position',
                       * the replace-string for the match is written to
                   * `*replace_str'/`replace_len'.  the memory for
                   * `*replace_str' is allocated by `regex_match()'
                   * via `malloc()'. */
  long *match_len;    /* if a match is found at `position',
                       * the length of the match is written to
                       * `*match_len'. */
{
  long *pp = regex + *regex;
  int i;
  int zero = 0;

  if (regex_match_(regex + 1, position, 0, &zero)) {
    *match_len = rx_match_skip - position;
    if (!*pp) {
      *replace_str = (char *)malloc(1);
      **replace_str = 0;
      *replace_len = 0;
    } else {
      char *cp;
      long *pp_rec = pp;
      /* store the match to slot 0. */
      regex_store(0, position, rx_match_skip);
      /* check out how long the replace-string will be. */
      for (*replace_len = 0;;) {
      if (*pp == (long)EOX) {
        ++pp;
        break;
      }
      switch ((enum regex_e)*pp++) {
        case STRING:
          *replace_len += *pp;
          pp += *pp + 1;
          break;
        case CHAR:
          ++*replace_len;
          ++pp;
          break;
        case BACKREF:
          *replace_len += regex_ref_len(*pp++);
          break;
        default:
          abort();
      }
      }
      assert(!*pp);
      cp = *replace_str =
        (char *)malloc(*replace_len + !*replace_len);
      for (pp = pp_rec;;) {
      long reflen;
      if (*pp == (long)EOX) break;
      switch ((enum regex_e)*pp++) {
        case STRING:
            for (i = 0; i < *pp; ++i) cp[i] = pp[i + 1];
          cp += *pp;
          pp += *pp + 1;
          break;
        case CHAR:
          *cp++ = *pp++;
          break;
        case BACKREF:
          memcpy(cp, regex_ref(*pp), reflen = regex_ref_len(*pp));
          cp += reflen;
          ++pp;
          break;
        default:
          abort();
      }
      }
    }
    return 1;
  }
  return 0;
}
/* regex_match */

  long *
regex_compile(str, replace)
  char *str;
  char *replace;
  /* Compile the regular expression `str' and return a pointer to the
   * compiled regex-code.  If `replace' is non-zero, the string `replace'
   * is interpreted as a replace-string, which may contain `\'-escape
   * sequences including references to parenthesized matches (`\0',
   * `\1', ... `\9'), where `\0' stands for the whole match.
   */
{
# define REGEX_EOF(position) (rx_size ? (position) >= rx_size : 0)
# define EAT_WHITESPACE { while (*cp == ' ' || *cp == '\t') ++cp; }
# define GET_OCT(o, n) {                                                      \
  for ((o) = 0, i = 0; i < (n) && isoct(*cp) && ((o) << 1) >= 0; ++i, ++cp)   \
    (o) *= 8, (o) += *cp - '0'; }
# define GET_HEX(h, n) {                                                      \
  for ((h) = 0, i = 0; i < (n) && ishex(*cp) && ((h) << 1) >= 0; ++i, ++cp)   \
    (h) *= 16, (h) += *cp > '9' ? tolower(*cp) - 'a' + 10 : *cp - '0'; }
# define GET_DEC(d, n) {                                                      \
  for ((d) = 0, i = 0; i < (n) && isdigit(*cp) && ((d) << 1) >= 0; ++i, ++cp) \
    (d) *= 10, (d) += *cp - '0'; }
# define GET_NUMBER(n) {                                                      \
  if (*cp == '0')                                                             \
    if (*++cp == 'x') { ++cp; GET_HEX((n), 32); } else { GET_OCT((n), 32); }  \
  else { GET_DEC((n), 32); } }

  char *cp, *s;

  int pc;
    /* Parentheses counter; number of unmatched parentheses.
     */
  long *par[1024];
    /* Stack of positions of opening parentheses.
     * For `pc > 0', `par[pc]' points to the first regex-command after the
     * `PAR_OPEN'-command that opened the current parentheses-level `pc'.
     * `par[0]' is equal to `regex'.
     */
  long *branch[1024];
    /* Stack of positions of branches.
     * The top element points to the first regex-command in the branch
     * currently generated (in the current parentheses-level `pc').
     */

  /* We need to know the positions of *all* successive `BRANCH'-commands
   * for *all* levels of parentheses.  This means we need a stack of
   * stacks.  The first stack pointer is `pc', the parentheses counter.
   */
  long *branch_n[REGEX_MAX_PARS][REGEX_MAX_BRANCH];
    /* On par-level `pc', `branch_n[pc][x]' points to the `x'th `BRANCH'-
     * command on that level.
     */
  int branch_len[REGEX_MAX_PARS][REGEX_MAX_BRANCH];
    /* On par-level `pc', `branch_len[pc][x]' holds the length of the `x'th
     * `BRANCH'-command on that level.  If `branch_len[pc][x] < 0' the]
     * length of that branch is variable.
     */
  int branch_n_c[REGEX_MAX_PARS];
    /* On par-level `pc' there are `branch_n_c[pc]' `BRANCH'-commands.
     */

# define BRANCH_LEN_INC(x) {                                                  \
    if (branch_len[pc][branch_n_c[pc]] >= 0)                                  \
      branch_len[pc][branch_n_c[pc]] += (x);                                  \
    exp_len = (x);                                                            \
  }

  long ref_len[256];
    /* If we want to know the length of all strings (if unique) matching
     * an expression which contains back-references, we need to store the
     * lengths of all references.
     */
  long *expression;
    /* Pointer to the beginning of the current expression.  This pointer
     * is relevant for processing operators (like `\?', `\*', `\+', ...).
     */
  int exp_len;
    /* length of the current expression.
     */
  int escape;
    /* Set to 1, if the last read character was a '\\', else 0.
     */
  int start;
  int slot;
    /* Number of slots used by pairs of parentheses.  The slot 0 is reserved
     * for the whole expression.
     */
  static long *regex = 0;
  int regex_size = 0;
  const int regex_blocksize = 1 << 16;
  long *pp;
  char escape_char[128];

  int i, j;

  escape_char['a'] = '\a'; escape_char['b'] = '\b'; escape_char['f'] = '\f';
  escape_char['n'] = '\n'; escape_char['r'] = '\r'; escape_char['t'] = '\t';
  escape_char['v'] = '\v';

  /* we don't know yet, how much memory will be needed to store the resulting
   * regex-code, so we allocate `regex_blocksize' bytes in the first place.
   * if it turns out to be insufficient, we simply cancel the compilation,
   * reallocate the double amount of memory and restart from the beginning.
   * the following macro will take care of watching the memory-consumption:
   */
# define CHECK_PP {                                                           \
    if (pp - regex > regex_size * regex_blocksize - 512) goto compile;        \
  }

  if (regex) free((char *)regex);

compile:
  if (regex_size) {
    regex_size <<= 1;
    regex =
      (long *)realloc(regex, regex_size * regex_blocksize * sizeof(long));
  } else
    ++regex_size, regex =
      (long *)malloc(regex_blocksize * sizeof(long));

  par[0] = regex + 1;
  branch[0] = regex + 1;
  branch_n_c[0] = 0;
  branch_len[0][0] = 0;
  pp = regex;
  cp = str;
  expression = 0;
  exp_len = 0;
  escape = 0;
  start = 1;
  pc = 0;
  slot = 1;

  rx_start = -1;
  branch[-1] = regex;

  ++pp; /* leave space for the replace offset */
  while (*cp) {
    switch (*cp++) {
      /* escape character */
      case '\\':
        if (escape) {
        expression = pp;
        *pp++ = CHAR;
        *pp++ = '\\';
        if (start) rx_start = '\\';
        BRANCH_LEN_INC(1);
      } else {
        ++escape;
        continue;
      }
      break;

      /* simple escapes */
      case 'a': case 'b': case 'f': case 'n': case 'r': case 't': case 'v':
        if (escape) {
        expression = pp;
        *pp++ = CHAR;
        *pp++ = escape_char[(int)cp[-1]];
        if (start) rx_start = pp[-1];
        BRANCH_LEN_INC(1);
      } else
        goto Default;
      break;
        
      /* octal, hex, decimal escapes */
      case 'o':
        /* The strings `\0', `\1', ... `\9' are interpreted as back
       * references, therefore octal escape sequences have to be
       * prefixed with `\o'.
       */
        if (escape) {
        int oct;
        expression = pp;
        if (*cp == 'o') { /* octal string */
          long *length;
          ++cp;
          *pp++ = STRING;
          *(length = pp++) = 1;
          EAT_WHITESPACE;
          for (;; ++*length) {
            GET_OCT(oct, 3);
            if (oct > 255) { rx_error = (int)E_invalid_character; break; }
            *pp++ = oct;
            CHECK_PP;
            if (start) rx_start = pp[-1], start = 0;
            EAT_WHITESPACE;
            if (!isoct(*cp)) break;
          }
          BRANCH_LEN_INC(*length);
        } else { /* octal character */
          GET_OCT(oct, 3);
          if (oct > 255) { rx_error = (int)E_invalid_character; break; }
          *pp++ = CHAR;
          *pp++ = oct;
          if (start) rx_start = pp[-1];
          BRANCH_LEN_INC(1);
        }
      } else
        goto Default;
      break;
      case 'x':
        if (escape) {
        int hex;
        expression = pp;
        if (*cp == 'x') { /* hex string */
          long *length;
          ++cp;
          *pp++ = STRING;
          *(length = pp++) = 1;
          EAT_WHITESPACE;
          for (;; ++*length) {
            GET_HEX(hex, 2);
            *pp++ = hex;
            CHECK_PP;
            if (start) rx_start = pp[-1], start = 0;
            EAT_WHITESPACE;
            if (!ishex(*cp)) break;
          }
          BRANCH_LEN_INC(*length);
        } else { /* hex character */
          GET_HEX(hex, 2);
          *pp++ = (long)CHAR;
          *pp++ = hex;
          if (start) rx_start = pp[-1];
          BRANCH_LEN_INC(1);
        }
      } else
        goto Default;
      break;
      case 'd':  /* special: decimal */
        if (escape) {
        int dec;
        expression = pp;
        if (*cp == 'd') { /* decimal string */
          long *length;
          ++cp;
          *pp++ = STRING;
          *(length = pp++) = 1;
          EAT_WHITESPACE;
          for (;; ++*length) {
            GET_DEC(dec, 3);
            if (dec > 255) { rx_error = (int)E_invalid_character; break; }
            CHECK_PP;
            *pp++ = dec;
            if (start) rx_start = pp[-1], start = 0;
            EAT_WHITESPACE;
            if (!isdigit(*cp)) break;
          }
          BRANCH_LEN_INC(*length);
        } else { /* decimal character */
          GET_DEC(dec, 3);
          if (dec > 255) { rx_error = (int)E_invalid_character; break; }
          *pp++ = CHAR;
          *pp++ = dec;
          if (start) rx_start = pp[-1];
          BRANCH_LEN_INC(1);
        }
      } else
        goto Default;
      break;

      /* ranges */
      case '[':
        if ((escape && rx_nomagic) || (!escape && !rx_nomagic)) {
        u_char any[1024], table[256];
        int nescape = 0;
        i = 0;
        expression = pp;
        if (*cp == '^')
          ++cp, *pp++ = ANY_BUT;
        else
          *pp++ = ANY_OF;
        do {
          switch (*cp++) {
            case 0:
              rx_error = (int)E_unmatched_bracket;
            break;
            case '\\':
              if (!nescape) { nescape = 1; continue; } else goto Default1;
            break;
            case '-':
              if (*cp == ']' || !i)
              any[i++] = '-';
            else {
              if (any[i - 1] > *cp) {
                rx_error = (int)E_invalid_range;
                break;
              }
              for (j = any[i - 1] + 1; j <= *cp; ++j) any[i++] = j;
              ++cp;
            }
            break;
            case 'a': case 'b': case 'f': case 'n': case 'r':
            case 't': case 'v':
            if (nescape)
              any[i++] = escape_char[(int)cp[-1]];
            else
              goto Default1;
            break;
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7':
              if (nescape) {
              /* using back references in a range doesn't make sense, so
               * we treat them as octal characters. */
              --cp;
            } else
              goto Default1;
            /* fall through */
            case 'o':  /* octal character */
              if (nescape) {
              int oct;
              GET_OCT(oct, 3);
              if (oct > 255) {
                    rx_error = (int)E_invalid_character;
                    break;
                  }
              any[i++] = oct;
            } else
              goto Default1;
            break;
            case 'd':  /* decimal character */
              if (nescape) {
              int dec;
              GET_DEC(dec, 3);
              if (dec > 255) {
                    rx_error = (int)E_invalid_character;
                    break;
                  }
              any[i++] = dec;
            } else
              goto Default1;
            break;
            case 'x':  /* hex character */
              if (nescape) {
              int hex;
              GET_HEX(hex, 2);
              any[i++] = hex;
            } else
              goto Default1;
            break;
            default: Default1:
              any[i++] = cp[-1];
          }
          if (rx_error) break;
          nescape = 0;
        } while (*cp != ']');
        if (rx_error) break;
        ++cp;
        /* sort the list and remove duplicate entries. */
        memset(table, 0, 256);
        for (j = 0; j < i; ++j) table[any[j]] = 1;
        for (i = 0, j = 0; j < 256; ++j) if (table[j]) any[i++] = j;
        *pp++ = i;
        for (j = 0; j < i; ++j) *pp++ = any[j];
        BRANCH_LEN_INC(1);
      } else
        goto Default;
      break;

      /* parentheses, branches */
      case '(':
        if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
        expression = 0;
        *pp++ = PAR_OPEN;
        ++pc;
        branch[pc] = par[pc] = pp;
        branch_len[pc][0] = 0;
        branch_n_c[pc] = 0;
      } else
        goto Default;
      break;
      case '|':
        if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
        expression = 0;
        *pp++ = EOX; /* end of branch */
        /* insert a `BRANCH'-command at `branch[pc]'. */
        for (i = -1; pp + i >= branch[pc]; --i)
          pp[i + SIZE_BRANCH] = pp[i];
        branch[pc][0] = BRANCH;
        branch[pc][2] = pp - branch[pc];
        /* store the position of this `BRANCH' command. */
        branch_n[pc][branch_n_c[pc]++] = branch[pc];
        branch_len[pc][branch_n_c[pc]] = 0;
        pp += SIZE_BRANCH;
        branch[pc] = pp;
        if (!pc) rx_start = -1;
      } else
        goto Default;
      break;
      case ')':
        if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
        /* NOTE: if the parenthesized expression is split up into
         * multiple branches, the last branch is *not* terminated
         * by an `EOX'. */
        *pp++ = PAR_CLOSE;
        *pp++ = slot++;
        /* first we'll check if all branches have equal length. */
        j = branch_len[pc][0];
        if (branch_n_c[pc]) {
          for (i = 1; i <= branch_n_c[pc]; ++i)
            if (j != branch_len[pc][i]) { i = 0; break; }
        } else
          i = 1;
        /* fill in the `n'-field (see description of the `BRANCH'-command)
         * of all `BRANCH'-commands of the current par-level `pc'. */
        while (branch_n_c[pc]--) {
          branch_n[pc][branch_n_c[pc]][1] =
            pp - (branch_n[pc][branch_n_c[pc]] + SIZE_BRANCH + 2);
        }
        expression = par[pc] - 1;
        if (!pc--) {
          rx_error = (int)E_unmatched_closing_parenthesis;
          break;
        }
        /* if `i' is set, `j' holds the uniqe length of all branches. */
        if (i) {
          BRANCH_LEN_INC(j);
        } else {
          branch_len[pc][branch_n_c[pc]] = -1;
          exp_len = -1;
        }
        ref_len[slot - 1] = i ? j : -1;
      } else
        goto Default;
      break;

      /* back references */
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
        if (escape) {
        expression = pp;
        *pp++ = BACKREF;
        *pp++ = cp[-1] - '0';
        if (ref_len[cp[-1] - '0'] >= 0) {
          BRANCH_LEN_INC(ref_len[cp[-1] - '0']);
        } else {
          branch_len[pc][branch_n_c[pc]] = -1;
          exp_len = -1;
        }
      } else
        goto Default;
        break;
        
      /* repeat operators */
      case '=': /* ... to stay compatible with VIM */
      case '?':
      if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
        if (!expression) {
          rx_error = (int)E_operator_without_operand;
          break;
        }
        *pp++ = EOX; /* end of expression */
        /* insert a `REPEAT'-command at `expression'. */
        for (i = -1; pp + i >= expression; --i)
          pp[i + SIZE_REPEAT] = pp[i];
        expression[0] = REPEAT;
        expression[1] = 0;
        expression[2] = 1;
        expression[3] = pp - expression;
        pp += SIZE_REPEAT;
        branch_len[pc][branch_n_c[pc]] = -1;
        expression = 0;
      } else
        goto Default;
      break;
      case '+':
      if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
        int no_limit = 0;
        if (!expression) {
          rx_error = (int)E_operator_without_operand;
          break;
        }
        if (*cp == '+') {
          if (exp_len < 0) {
            rx_error = (int)E_operand_with_variable_length;
            break;
          }
          no_limit = 1;
          ++cp;
        }
        *pp++ = EOX; /* end of expression */
        if (exp_len < 0) {
          /* insert a `REPEAT'-command at `expression'. */
          for (i = -1; pp + i >= expression; --i)
            pp[i + SIZE_REPEAT] = pp[i];
          expression[0] = REPEAT;
          expression[1] = 1;
          expression[2] = -1;
          expression[3] = pp - expression;
          pp += SIZE_REPEAT;
        } else {
          /* insert a `FIXREPEAT'-command at `expression'. */
          for (i = -1; pp + i >= expression; --i)
            pp[i + SIZE_FIXREPEAT] = pp[i];
          expression[0] = FIXREPEAT;
          expression[1] = 1;
          expression[2] = no_limit ? -1 : rx_maxmatch;
          expression[3] = exp_len;
          expression[4] = pp - expression;
          pp += SIZE_FIXREPEAT;
          exp_len = -1;
        }
        branch_len[pc][branch_n_c[pc]] = -1;
        expression = 0;
      } else
        goto Default;
      break;
      case '*':
        if ((escape && rx_nomagic) || (!escape && !rx_nomagic)) {
        int no_limit = 0;
        if (!expression) {
          rx_error = (int)E_operator_without_operand;
          break;
        }
        if (*cp == '*') {
          if (exp_len < 0) {
            rx_error = (int)E_operand_with_variable_length;
            break;
          }
          no_limit = 1;
          ++cp;
        }
        *pp++ = EOX; /* end of expression */
        if (exp_len < 0) {
          /* insert a `REPEAT'-command at `expression'. */
          for (i = -1; pp + i >= expression; --i)
            pp[i + SIZE_REPEAT] = pp[i];
          expression[0] = REPEAT;
          expression[1] = 0;
          expression[2] = -1;
          expression[3] = pp - expression;
          pp += SIZE_REPEAT;
        } else {
          /* insert a `FIXREPEAT'-command at `expression'. */
          for (i = -1; pp + i >= expression; --i)
            pp[i + SIZE_FIXREPEAT] = pp[i];
          expression[0] = FIXREPEAT;
          expression[1] = 0;
          expression[2] = no_limit ? -1 : rx_maxmatch;
          expression[3] = exp_len;
          expression[4] = pp - expression;
          pp += SIZE_FIXREPEAT;
          exp_len = -1;
        }
        branch_len[pc][branch_n_c[pc]] = -1;
        expression = 0;
      } else
        goto Default;
      break;
      case '{':
      if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
        long min, max;
        if (!expression) {
          rx_error = (int)E_operator_without_operand;
          break;
        }
        EAT_WHITESPACE;
        GET_NUMBER(min);
        EAT_WHITESPACE;
          if (*cp == '\\') ++cp;
        if (*cp == '}')
          max = min;
        else {
          if (*cp++ != ',') { rx_error = (int)E_malformed_operator; break; }
          EAT_WHITESPACE;
          GET_NUMBER(max);
          EAT_WHITESPACE;
        }
          if (*cp == '\\') ++cp;
        if ((max && max < min) || *cp++ != '}') {
          rx_error = (int)E_malformed_operator;
          break;
        }
        if (!max) max = exp_len < 0 ? rx_maxmatch : -1;
        *pp++ = EOX; /* end of expression */
        if (exp_len < 0) {
          /* insert a `REPEAT'-command at `expression'. */
          for (i = -1; pp + i >= expression; --i)
            pp[i + SIZE_REPEAT] = pp[i];
          expression[0] = REPEAT;
          expression[1] = min;
          expression[2] = max;
          expression[3] = pp - expression;
          pp += SIZE_REPEAT;
        } else {
          /* insert a `FIXREPEAT'-command at `expression'. */
          for (i = -1; pp + i >= expression; --i)
            pp[i + SIZE_FIXREPEAT] = pp[i];
          expression[0] = FIXREPEAT;
          expression[1] = min;
          expression[2] = max;
          expression[3] = exp_len;
          expression[4] = pp - expression;
          pp += SIZE_FIXREPEAT;
          exp_len = -1;
        }
        if (min != max)
          branch_len[pc][branch_n_c[pc]] = -1;
        else
          BRANCH_LEN_INC(min);
        expression = 0;
      } else
        goto Default;
      break;

      /* context specifiers */
      case '<':
      if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
        *pp++ = BEGIN_WORD;
        expression = 0;
      } else
        goto Default;
      break;
      case '>':
      if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
        *pp++ = END_WORD;
        expression = 0;
      } else
        goto Default;
      break;
      case '^':
        if (!escape) {
        *pp++ = BEGIN_LINE;
        expression = 0;
      } else
        goto Default;
      break;
      case '$':
        if (!escape) {
        *pp++ = END_LINE;
        expression = 0;
      } else
        goto Default;
      break;

      /* any character */
      case '.':
        if ((escape && rx_nomagic) || (!escape && !rx_nomagic)) {
        expression = pp;
        *pp++ = ANY_CHAR;
        BRANCH_LEN_INC(1);
      } else
        goto Default;
      break;
      
      /* single character */
      default: Default:
        expression = pp;
      if (start) rx_start = cp[-1];
        *pp++ = CHAR;
      *pp++ = cp[-1];
      BRANCH_LEN_INC(1);
      break;
    } /* switch */
    start = escape = 0;
    CHECK_PP;
    if (rx_error) break;
  } /* while */

  if (pc) {
    if (!rx_error) rx_error = (int)E_unmatched_opening_parenthesis;
    goto exit_regex_compile;
  }
    
  if (branch_n_c[0]) {
    /* NOTE: if the whole expression is split up into multiple branches, the
     * last branch is *not* terminated by an `EOX'.
     */
    /* fill in the `n'-field (see description of the `BRANCH'-command)
     * of all `BRANCH'-commands. */
    while (branch_n_c[0]--) {
      branch_n[pc][branch_n_c[0]][1] =
      pp - (branch_n[pc][branch_n_c[0]] + SIZE_BRANCH);
    }
  }
  *pp++ = EOX;
  *pp++ = 0;

  *regex = pp - regex; /* set the replace offset */
  if (!replace) {
    *pp++ = 0;
    goto exit_regex_compile;
  }
  /* compile the `replace'-string:  we only need two commands for the
   * replace code:  `STRING' and `BACKREF'.
   */
  cp = replace;
  escape = 0;
  s = (char *)malloc(strlen(replace) + 1);
  j = 0;
  while (*cp) {
    switch (*cp++) {
      /* escape character */
      case '\\':
        if (!escape) {
        ++escape;
        continue;
      } else
        goto Default2;
      break;

      /* simple escapes */
      case 'a': case 'b': case 'f': case 'n': case 'r': case 't': case 'v':
        if (escape) s[j++] = escape_char[(int)cp[-1]]; else goto Default2;
      break;
        
      /* octal, hex, decimal escapes */
      case 'o':
        /* The strings `\0', `\1', ... `\9' are interpreted as back
       * references, therefore octal escape sequences have to be
       * prefixed with `\o'.
       */
        if (escape) {
        int oct;
        if (*cp == 'o') { /* octal string */
          ++cp;
          EAT_WHITESPACE;
          for (;;) {
            GET_OCT(oct, 3);
            if (oct > 255) { rx_error = (int)E_invalid_character; break; }
            s[j++] = (char)oct;
            EAT_WHITESPACE;
            if (!isoct(*cp)) { ++cp; break; }
          }
        } else { /* octal character */
          GET_OCT(oct, 3);
          if (oct > 255) { rx_error = (int)E_invalid_character; break; }
          s[j++] = (char)oct;
        }
      } else
        goto Default2;
      break;
      case 'x':
        if (escape) {
        int hex;
        if (*cp == 'x') { /* hex string */
          ++cp;
          EAT_WHITESPACE;
          for (;;) {
            GET_HEX(hex, 2);
            s[j++] = (char)hex;
            EAT_WHITESPACE;
            if (!ishex(*cp)) { ++cp; break; }
          }
        } else { /* hex character */
          GET_HEX(hex, 2);
          s[j++] = (char)hex;
        }
      } else
        goto Default2;
      break;
      case 'd':
        if (escape) {
        int dec;
        if (*cp == 'd') { /* decimal string */
          ++cp;
          EAT_WHITESPACE;
          for (;;) {
            GET_DEC(dec, 3);
            if (dec > 255) { rx_error = (int)E_invalid_character; break; }
            s[j++] = (char)dec;
            EAT_WHITESPACE;
            if (!isdigit(*cp)) { ++cp; break; }
          }
        } else { /* decimal character */
          GET_DEC(dec, 3);
          if (dec > 255) { rx_error = (int)E_invalid_character; break; }
          s[j++] = (char)dec;
        }
      } else
        goto Default2;
      break;

      /* back references */
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
        if (escape) {
        if (j) {
          if (pp - regex + j > regex_size * regex_blocksize - 64)
            goto compile;
            /* it is sufficient lo leave 64 bytes of extra space here,
             * because there won't be any `ANY_OF' of `ANY_BUT' commands
             * in the replace-code. */
          *pp++ = STRING;
          *pp++ = j;
          for (i = 0; i < j; ++i) *pp++ = s[i];
          j = 0;
        }
        *pp++ = BACKREF;
        *pp++ = cp[-1] - '0';
      } else
        goto Default2;
      break;

      default: Default2:
        s[j++] = cp[-1];
      break;
    } /* switch */
    escape = 0;
    if (rx_error) break;
    CHECK_PP;
  } /* while */
  if (j) {
    if (pp - regex + j > regex_size * regex_blocksize - 64) goto compile;
    *pp++ = STRING;
    *pp++ = j;
    for (i = 0; i < j; ++i) *pp++ = s[i];
  }
  *pp++ = EOX;
  *pp = 0;

exit_regex_compile:
  return rx_error ? 0 : regex;
}
/* regex_compile */

#if 0
  static void
regex_list_(regex, count, indent, base)
  unsigned long *regex;
  int count;
  int indent;
  int base;
{
  unsigned long *pp = regex;
  int j;

  for (; pp - regex < count;) {
    printf("%3i : ", (pp - regex) + base);
    for (j = 0; j < indent; ++j) putchar(' ');
    switch (*pp++) {
    case ANY_CHAR:
      printf("ANY_CHAR\n");
      break;
    case ANY_OF: 
      printf("ANY_OF <%lu> [", *pp);
      for (j = 0; j < *pp; ++j)
        if (isprint(pp[j + 1]))
          printf("%lu", pp[j + 1]);
        else
          printf("\\x%02lx", pp[j + 1]);
      printf("]\n");
      pp += *pp + 1;
      break;
    case ANY_BUT:
      printf("ANY_BUT <%lu> [", *pp);
      for (j = 0; j < *pp; ++j)
        if (isprint(pp[j + 1]))
          printf("%lu", pp[j + 1]);
        else
          printf("\\x%02lx", pp[j + 1]);
      printf("]\n");
      pp += *pp + 1;
      break;
    case CHAR:
      printf("CHAR ");
      if (isprint(*pp))
        printf("'%lu'\n", *pp);
      else
        printf("'\\x%02lx'\n", *pp);
      ++pp;
      break;
    case STRING:
      printf("STRING <%lu> \"", *pp);
      for (j = 0; j < *pp; ++j)
        if (isprint(pp[j + 1]))
          printf("%lu", pp[j + 1]);
        else
          printf("\\x%02lx", pp[j + 1]);
      pp += *pp + 1;
      printf("\"\n");
      break;
    case PAR_OPEN:
      printf("PAR_OPEN\n");
      break;
    case PAR_CLOSE:
      printf("PAR_CLOSE <slot=%lu>\n", *pp);
      ++pp;
      break;
    case BACKREF:
      printf("BACKREF <slot=%lu>\n", *pp);
      ++pp;
      break;
    case BRANCH:
      printf("BRANCH <%lu> <%lu>\n", pp[0], pp[1]);
      regex_list_(pp + 2, pp[1], indent + 2, base + (pp - regex) + 2);
      regex_list_(pp + 2 + pp[1], pp[0] - pp[1], indent + 2,
                  base + (pp - regex) + 2 + pp[1]);
      pp += 2 + pp[0];
      break;
    case REPEAT:
      printf("REPEAT <min=%lu> <max=%lu> <%lu>\n", pp[0], pp[1], pp[2]);
      regex_list_(pp + 3, pp[2], indent + 2, base + (pp - regex) + 3);
      pp += 3 + pp[2];
      break;
    case FIXREPEAT:
      printf("FIXREPEAT <min=%lu> <max=%lu> <len=%lu> <%lu>\n",
             pp[0], pp[1], pp[2], pp[3]);
      regex_list_(pp + 4, pp[3], indent + 2, base + (pp - regex) + 4);
      pp += 4 + pp[3];
      break;
    case BEGIN_WORD:
      printf("BEGIN_WORD\n");
      break;
    case END_WORD:
      printf("END_WORD\n");
      break;
    case BEGIN_LINE:
      printf("BEGIN_LINE\n");
      break;
    case END_LINE:
      printf("END_LINE\n");
      break;
    case EOF:
      printf("EOF\n");
      break;
    case EOX:
      printf("EOX\n");
      break;
    case 0:
      printf("--\n");
      return;
    default:
      printf("illegal opcode 0x%lx\n", pp[-1]);
      return;
    }
  }
}
/* regex_list_ */
#endif

/* end of regex.c */


/* VIM configuration: (do not delete this line)
 *
 * vim:bk:nodg:efm=%f\:%l\:%m:hid:icon:
 * vim:sw=2:sm:textwidth=79:ul=1024:wrap:
 */

Generated by  Doxygen 1.6.0   Back to index