No OneTemporary
Actions

Size

50 KB

Subscribers

None

View Options

	diff --git a/regexp/jimregexp.c b/regexp/jimregexp.c
	index 2ea3a7d13..cab83b920 100644
	--- a/regexp/jimregexp.c
	+++ b/regexp/jimregexp.c
	@@ -1,1900 +1,1904 @@
	/*
	* vi:se ts=8:
	*
	* regcomp and regexec -- regsub and regerror are elsewhere
	*
	* Copyright (c) 1986 by University of Toronto.
	* Written by Henry Spencer. Not derived from licensed software.
	*
	* Permission is granted to anyone to use this software for any
	* purpose on any computer system, and to redistribute it freely,
	* subject to the following restrictions:
	*
	* 1. The author is not responsible for the consequences of use of
	* this software, no matter how awful, even if they arise
	* from defects in it.
	*
	* 2. The origin of this software must not be misrepresented, either
	* by explicit claim or by omission.
	*
	* 3. Altered versions must be plainly marked as such, and must not
	* be misrepresented as being the original software.
	*** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
	*** hoptoad!gnu, on 27 Dec 1986, to add \n as an alternative to \|
	*** to assist in implementing egrep.
	*** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
	*** hoptoad!gnu, on 27 Dec 1986, to add \< and \> for word-matching
	*** as in BSD grep and ex.
	*** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
	*** hoptoad!gnu, on 28 Dec 1986, to optimize characters quoted with \.
	*** THIS IS AN ALTERED VERSION. It was altered by James A. Woods,
	*** ames!jaw, on 19 June 1987, to quash a regcomp() redundancy.
	*** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
	*** seiwald@vix.com, on 28 August 1993, for use in jam. Regmagic.h
	*** was moved into regexp.h, and the include of regexp.h now uses "'s
	*** to avoid conflicting with the system regexp.h. Const, bless its
	*** soul, was removed so it can compile everywhere. The declaration
	*** of strchr() was in conflict on AIX, so it was removed (as it is
	*** happily defined in string.h).
	*** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
	*** seiwald@perforce.com, on 20 January 2000, to use function prototypes.
	*** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
	*** seiwald@perforce.com, on 05 November 2002, to const string literals.
	*
	* THIS IS AN ALTERED VERSION. It was altered by Steve Bennett <steveb@workware.net.au>
	* on 16 October 2010, to remove static state and add better Tcl ARE compatibility.
	* This includes counted repetitions, UTF-8 support, character classes,
	* shorthand character classes, increased number of parentheses to 100,
	* backslash escape sequences. It also removes \n as an alternative to \|.
	*
	* Beware that some of this code is subtly aware of the way operator
	* precedence is structured in regular expressions. Serious changes in
	* regular-expression syntax might require a total rethink.
	*/

	#if defined(JIM_REGEXP)
	#include <stdio.h>
	#include <ctype.h>
	#include <stdlib.h>
	#include <string.h>

	#include "jimregexp.h"
	#include "utf8.h"

	#define UCHAR(c) ((unsigned char)(c))

	/* An arbitrary limit, but this seems enough. Must be less than 1000. */
	#define REG_MAX_PAREN 100

	/*
	* Structure for regexp "program". This is essentially a linear encoding
	* of a nondeterministic finite-state machine (aka syntax charts or
	* "railroad normal form" in parsing technology). Each node is an opcode
	* plus a "next" pointer, possibly plus an operand. "Next" pointers of
	* all nodes except BRANCH implement concatenation; a "next" pointer with
	* a BRANCH on both ends of it is connecting two alternatives. (Here we
	* have one of the subtle syntax dependencies: an individual BRANCH (as
	* opposed to a collection of them) is never concatenated with anything
	* because of operator precedence.) The operand of some types of node is
	* a literal string; for others, it is a node leading into a sub-FSM. In
	* particular, the operand of a BRANCH node is the first node of the branch.
	* (NB this is not a tree structure: the tail of the branch connects
	* to the thing following the set of BRANCHes.) The opcodes are:
	*/

	/* definition number opnd? meaning */
	#define END 0 /* no End of program. */
	#define BOL 1 /* no Match "" at beginning of line. */
	#define EOL 2 /* no Match "" at end of line. */
	#define ANY 3 /* no Match any one character. */
	#define ANYOF 4 /* str Match any character in this string. */
	#define ANYBUT 5 /* str Match any character not in this string. */
	#define BRANCH 6 /* node Match this alternative, or the next... */
	#define BACK 7 /* no Match "", "next" ptr points backward. */
	#define EXACTLY 8 /* str Match this string. */
	#define NOTHING 9 /* no Match empty string. */
	#define REP 10 /* max,min Match this (simple) thing [min,max] times. */
	#define REPMIN 11 /* max,min Match this (simple) thing [min,max] times, minimal match. */
	#define REPX 12 /* max,min Match this (complex) thing [min,max] times. */
	#define REPXMIN 13 /* max,min Match this (complex) thing [min,max] times, minimal match. */
	#define BOLX 14 /* no Match "" at beginning of input. */
	#define EOLX 15 /* no Match "" at end of input. */
	#define WORDA 16 /* no Match "" at wordchar, where prev is nonword */
	#define WORDZ 17 /* no Match "" at nonwordchar, where prev is word */

	#define OPENNC 1000 /* no Non-capturing parentheses - must be OPEN-1 */
	#define OPEN 1001 /* no Mark this point in input as start of #n. */
	/* OPEN+1 is number 1, etc. */

	/* must not be any other opts between OPEN and CLOSE */

	#define CLOSENC 2000 /* no Non-capturing parentheses - must be CLOSE-1 */
	#define CLOSE 2001 /* no Analogous to OPEN. */
	#define CLOSE_END (CLOSE+REG_MAX_PAREN)

	/*
	* The first word of the regexp internal "program" is actually this magic
	* number; the start node begins in the second word.
	*/
	#define REG_MAGIC 0xFADED00D

	/*
	* Opcode notes:
	*
	* BRANCH The set of branches constituting a single choice are hooked
	* together with their "next" pointers, since precedence prevents
	* anything being concatenated to any individual branch. The
	* "next" pointer of the last BRANCH in a choice points to the
	* thing following the whole choice. This is also where the
	* final "next" pointer of each individual branch points; each
	* branch starts with the operand node of a BRANCH node.
	*
	* BACK Normal "next" pointers all implicitly point forward; BACK
	* exists to make loop structures possible.
	*
	* REP,REPX Repeated matches ('?', '*', '+' and {min,max}) are implemented
	* as either simple repeats (REP) or complex repeats (REPX).
	* These opcodes include a "min" and "max" count after the opcode.
	* This is followed by a fourth "current count" word that is
	* only used by REPX, as it implements a recursive match.
	* REPMIN and REPXMIN are identical except they implement minimal repeats.
	*
	* OPEN,CLOSE ...are numbered at compile time.
	*/

	/*
	* A node is one word of opcode followed by one word of "next" pointer.
	* The "next" pointer value is a positive offset from the opcode of the node
	* containing it.
	* An operand, if any, simply follows the node. (Note that much of the
	* code generation knows about this implicit relationship.)
	*/
	#define OP(preg, p) (preg->program[p])
	#define NEXT(preg, p) (preg->program[p + 1])
	#define OPERAND(p) ((p) + 2)

	/*
	* See regmagic.h for one further detail of program structure.
	*/


	/*
	* Utility definitions.
	*/

	#define FAIL(R,M) { (R)->err = (M); return (M); }
	#define ISMULT(c) ((c) == '*' \|\| (c) == '+' \|\| (c) == '?' \|\| (c) == '{')
	#define META "^$.[()\|?{+*"

	/*
	* Flags to be passed up and down.
	*/
	#define HASWIDTH 1 /* Known never to match null string. */
	#define SIMPLE 2 /* Simple enough to be STAR/PLUS operand. */
	#define SPSTART 4 /* Starts with * or +. */
	#define WORST 0 /* Worst case. */

	#define MAX_REP_COUNT 1000000

	/*
	* Forward declarations for regcomp()'s friends.
	*/
	static int reg(regex_t preg, int paren / Parenthesized? /, int flagp );
	static int regpiece(regex_t preg, int flagp );
	static int regbranch(regex_t preg, int flagp );
	static int regatom(regex_t preg, int flagp );
	static int regnode(regex_t *preg, int op );
	static int regnext(regex_t *preg, int p );
	static void regc(regex_t *preg, int b );
	static int reginsert(regex_t *preg, int op, int size, int opnd );
	static void regtail(regex_t *preg, int p, int val);
	static void regoptail(regex_t *preg, int p, int val );
	static int regopsize(regex_t *preg, int p );

	static int reg_range_find(const int *string, int c);
	static const char str_find(const char string, int c, int nocase);
	static int prefix_cmp(const int prog, int proglen, const char string, int nocase);

	/#define DEBUG/
	#ifdef DEBUG
	static int regnarrate = 0;
	static void regdump(regex_t *preg);
	static const char *regprop( int op );
	#endif


	/**
	* Returns the length of the null-terminated integer sequence.
	*/
	static int str_int_len(const int *seq)
	{
	int n = 0;
	while (*seq++) {
	n++;
	}
	return n;
	}

	/*
	- regcomp - compile a regular expression into internal code
	*
	* We can't allocate space until we know how big the compiled form will be,
	* but we can't compile it (and thus know how big it is) until we've got a
	* place to put the code. So we cheat: we compile it twice, once with code
	* generation turned off and size counting turned on, and once "for real".
	* This also means that we don't allocate space until we are sure that the
	* thing really will compile successfully, and we never have to move the
	* code and thus invalidate pointers into it. (Note that it has to be in
	* one piece because free() must be able to free it all.)
	*
	* Beware that the optimization-preparation code in here knows about some
	* of the structure of the compiled regexp.
	*/
	int regcomp(regex_t preg, const char exp, int cflags)
	{
	int scan;
	int longest;
	unsigned len;
	int flags;

	#ifdef DEBUG
	fprintf(stderr, "Compiling: '%s'\n", exp);
	#endif
	memset(preg, 0, sizeof(*preg));

	if (exp == NULL)
	FAIL(preg, REG_ERR_NULL_ARGUMENT);

	/* First pass: determine size, legality. */
	preg->cflags = cflags;
	preg->regparse = exp;

	/* Allocate space. */
	preg->proglen = (strlen(exp) + 1) * 5;
	preg->program = malloc(preg->proglen * sizeof(int));
	if (preg->program == NULL)
	FAIL(preg, REG_ERR_NOMEM);

	/* Note that since we store a magic value as the first item in the program,
	* program offsets will never be 0
	*/
	regc(preg, REG_MAGIC);
	if (reg(preg, 0, &flags) == 0) {
	return preg->err;
	}

	/* Small enough for pointer-storage convention? */
	if (preg->re_nsub >= REG_MAX_PAREN) /* Probably could be 65535L. */
	FAIL(preg,REG_ERR_TOO_BIG);

	/* Dig out information for optimizations. */
	preg->regstart = 0; /* Worst-case defaults. */
	preg->reganch = 0;
	preg->regmust = 0;
	preg->regmlen = 0;
	scan = 1; /* First BRANCH. */
	if (OP(preg, regnext(preg, scan)) == END) { /* Only one top-level choice. */
	scan = OPERAND(scan);

	/* Starting-point info. */
	if (OP(preg, scan) == EXACTLY) {
	preg->regstart = preg->program[OPERAND(scan)];
	}
	else if (OP(preg, scan) == BOL)
	preg->reganch++;

	/*
	* If there's something expensive in the r.e., find the
	* longest literal string that must appear and make it the
	* regmust. Resolve ties in favor of later strings, since
	* the regstart check works with the beginning of the r.e.
	* and avoiding duplication strengthens checking. Not a
	* strong reason, but sufficient in the absence of others.
	*/
	if (flags&SPSTART) {
	longest = 0;
	len = 0;
	for (; scan != 0; scan = regnext(preg, scan)) {
	if (OP(preg, scan) == EXACTLY) {
	int plen = str_int_len(preg->program + OPERAND(scan));
	if (plen >= len) {
	longest = OPERAND(scan);
	len = plen;
	}
	}
	}
	preg->regmust = longest;
	preg->regmlen = len;
	}
	}

	#ifdef DEBUG
	regdump(preg);
	#endif

	return 0;
	}

	/*
	- reg - regular expression, i.e. main body or parenthesized thing
	*
	* Caller must absorb opening parenthesis.
	*
	* Combining parenthesis handling with the base level of regular expression
	* is a trifle forced, but the need to tie the tails of the branches to what
	* follows makes it hard to avoid.
	*/
	static int reg(regex_t preg, int paren / Parenthesized? /, int flagp )
	{
	int ret;
	int br;
	int ender;
	int parno = 0;
	int flags;

	flagp = HASWIDTH; / Tentatively. */

	/* Make an OPEN node, if parenthesized. */
	if (paren) {
	if (preg->regparse[0] == '?' && preg->regparse[1] == ':') {
	/* non-capturing paren */
	preg->regparse += 2;
	parno = -1;
	}
	else {
	parno = ++preg->re_nsub;
	}
	ret = regnode(preg, OPEN+parno);
	} else
	ret = 0;

	/* Pick up the branches, linking them together. */
	br = regbranch(preg, &flags);
	if (br == 0)
	return 0;
	if (ret != 0)
	regtail(preg, ret, br); /* OPEN -> first. */
	else
	ret = br;
	if (!(flags&HASWIDTH))
	*flagp &= ~HASWIDTH;
	*flagp \|= flags&SPSTART;
	while (*preg->regparse == '\|') {
	preg->regparse++;
	br = regbranch(preg, &flags);
	if (br == 0)
	return 0;
	regtail(preg, ret, br); /* BRANCH -> BRANCH. */
	if (!(flags&HASWIDTH))
	*flagp &= ~HASWIDTH;
	*flagp \|= flags&SPSTART;
	}

	/* Make a closing node, and hook it on the end. */
	ender = regnode(preg, (paren) ? CLOSE+parno : END);
	regtail(preg, ret, ender);

	/* Hook the tails of the branches to the closing node. */
	for (br = ret; br != 0; br = regnext(preg, br))
	regoptail(preg, br, ender);

	/* Check for proper termination. */
	if (paren && *preg->regparse++ != ')') {
	preg->err = REG_ERR_UNMATCHED_PAREN;
	return 0;
	} else if (!paren && *preg->regparse != '\0') {
	if (*preg->regparse == ')') {
	preg->err = REG_ERR_UNMATCHED_PAREN;
	return 0;
	} else {
	preg->err = REG_ERR_JUNK_ON_END;
	return 0;
	}
	}

	return(ret);
	}

	/*
	- regbranch - one alternative of an \| operator
	*
	* Implements the concatenation operator.
	*/
	static int regbranch(regex_t preg, int flagp )
	{
	int ret;
	int chain;
	int latest;
	int flags;

	flagp = WORST; / Tentatively. */

	ret = regnode(preg, BRANCH);
	chain = 0;
	while (preg->regparse != '\0' && preg->regparse != ')' &&
	*preg->regparse != '\|') {
	latest = regpiece(preg, &flags);
	if (latest == 0)
	return 0;
	*flagp \|= flags&HASWIDTH;
	if (chain == 0) {/* First piece. */
	*flagp \|= flags&SPSTART;
	}
	else {
	regtail(preg, chain, latest);
	}
	chain = latest;
	}
	if (chain == 0) /* Loop ran zero times. */
	(void) regnode(preg, NOTHING);

	return(ret);
	}

	/*
	- regpiece - something followed by possible [*+?]
	*
	* Note that the branching code sequences used for ? and the general cases
	* of * and + are somewhat optimized: they use the same NOTHING node as
	* both the endmarker for their branch list and the body of the last branch.
	* It might seem that this node could be dispensed with entirely, but the
	* endmarker role is not redundant.
	*/
	static int regpiece(regex_t preg, int flagp)
	{
	int ret;
	char op;
	int next;
	int flags;
	int min;
	int max;

	ret = regatom(preg, &flags);
	if (ret == 0)
	return 0;

	op = *preg->regparse;
	if (!ISMULT(op)) {
	*flagp = flags;
	return(ret);
	}

	if (!(flags&HASWIDTH) && op != '?') {
	preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY;
	return 0;
	}

	/* Handle braces (counted repetition) by expansion */
	if (op == '{') {
	char *end;

	min = strtoul(preg->regparse + 1, &end, 10);
	if (end == preg->regparse + 1) {
	preg->err = REG_ERR_BAD_COUNT;
	return 0;
	}
	if (*end == '}') {
	max = min;
	}
	else if (*end == '\0') {
	preg->err = REG_ERR_UNMATCHED_BRACES;
	return 0;
	}
	else {
	preg->regparse = end;
	max = strtoul(preg->regparse + 1, &end, 10);
	if (*end != '}') {
	preg->err = REG_ERR_UNMATCHED_BRACES;
	return 0;
	}
	}
	if (end == preg->regparse + 1) {
	max = MAX_REP_COUNT;
	}
	else if (max < min \|\| max >= 100) {
	preg->err = REG_ERR_BAD_COUNT;
	return 0;
	}
	if (min >= 100) {
	preg->err = REG_ERR_BAD_COUNT;
	return 0;
	}

	preg->regparse = strchr(preg->regparse, '}');
	}
	else {
	min = (op == '+');
	max = (op == '?' ? 1 : MAX_REP_COUNT);
	}

	if (preg->regparse[1] == '?') {
	preg->regparse++;
	next = reginsert(preg, flags & SIMPLE ? REPMIN : REPXMIN, 5, ret);
	}
	else {
	next = reginsert(preg, flags & SIMPLE ? REP: REPX, 5, ret);
	}
	preg->program[ret + 2] = max;
	preg->program[ret + 3] = min;
	preg->program[ret + 4] = 0;

	*flagp = (min) ? (WORST\|HASWIDTH) : (WORST\|SPSTART);

	if (!(flags & SIMPLE)) {
	int back = regnode(preg, BACK);
	regtail(preg, back, ret);
	regtail(preg, next, back);
	}

	preg->regparse++;
	if (ISMULT(*preg->regparse)) {
	preg->err = REG_ERR_NESTED_COUNT;
	return 0;
	}

	return ret;
	}

	/**
	* Add all characters in the inclusive range between lower and upper.
	*
	* Handles a swapped range (upper < lower).
	*/
	static void reg_addrange(regex_t *preg, int lower, int upper)
	{
	if (lower > upper) {
	reg_addrange(preg, upper, lower);
	}
	/* Add a range as length, start */
	regc(preg, upper - lower + 1);
	regc(preg, lower);
	}

	/**
	* Add a null-terminated literal string as a set of ranges.
	*/
	static void reg_addrange_str(regex_t preg, const char str)
	{
	while (*str) {
	reg_addrange(preg, str, str);
	str++;
	}
	}

	/**
	* Extracts the next unicode char from utf8.
	*
	* If 'upper' is set, converts the char to uppercase.
	*/
	static int reg_utf8_tounicode_case(const char s, int uc, int upper)
	{
	int l = utf8_tounicode(s, uc);
	if (upper) {
	uc = utf8_upper(uc);
	}
	return l;
	}

	/**
	* Converts a hex digit to decimal.
	*
	* Returns -1 for an invalid hex digit.
	*/
	static int hexdigitval(int c)
	{
	if (c >= '0' && c <= '9')
	return c - '0';
	if (c >= 'a' && c <= 'f')
	return c - 'a' + 10;
	if (c >= 'A' && c <= 'F')
	return c - 'A' + 10;
	return -1;
	}

	/**
	* Parses up to 'n' hex digits at 's' and stores the result in *uc.
	*
	* Returns the number of hex digits parsed.
	* If there are no hex digits, returns 0 and stores nothing.
	*/
	static int parse_hex(const char s, int n, int uc)
	{
	int val = 0;
	int k;

	for (k = 0; k < n; k++) {
	int c = hexdigitval(*s++);
	if (c == -1) {
	break;
	}
	val = (val << 4) \| c;
	}
	if (k) {
	*uc = val;
	}
	return k;
	}

	/**
	* Call for chars after a backlash to decode the escape sequence.
	*
	* Stores the result in *ch.
	*
	* Returns the number of bytes consumed.
	*/
	static int reg_decode_escape(const char s, int ch)
	{
	int n;
	const char *s0 = s;

	ch = s++;

	switch (*ch) {
	case 'b': *ch = '\b'; break;
	case 'e': *ch = 27; break;
	case 'f': *ch = '\f'; break;
	case 'n': *ch = '\n'; break;
	case 'r': *ch = '\r'; break;
	case 't': *ch = '\t'; break;
	case 'v': *ch = '\v'; break;
	case 'u':
	if (*s == '{') {
	/* Expect \u{NNNN} */
	n = parse_hex(s + 1, 6, ch);
	if (n > 0 && s[n + 1] == '}' && ch >= 0 && ch <= 0x1fffff) {
	s += n + 2;
	}
	else {
	/* Invalid, so just treat as an escaped 'u' */
	*ch = 'u';
	}
	}
	else if ((n = parse_hex(s, 4, ch)) > 0) {
	s += n;
	}
	break;
	case 'U':
	if ((n = parse_hex(s, 8, ch)) > 0) {
	s += n;
	}
	break;
	case 'x':
	if ((n = parse_hex(s, 2, ch)) > 0) {
	s += n;
	}
	break;
	case '\0':
	s--;
	*ch = '\\';
	break;
	}
	return s - s0;
	}

	/*
	- regatom - the lowest level
	*
	* Optimization: gobbles an entire sequence of ordinary characters so that
	* it can turn them into a single node, which is smaller to store and
	* faster to run. Backslashed characters are exceptions, each becoming a
	* separate node; the code is simpler that way and it's not worth fixing.
	*/
	static int regatom(regex_t preg, int flagp)
	{
	int ret;
	int flags;
	int nocase = (preg->cflags & REG_ICASE);

	int ch;
	int n = reg_utf8_tounicode_case(preg->regparse, &ch, nocase);

	flagp = WORST; / Tentatively. */

	preg->regparse += n;
	switch (ch) {
	/* FIXME: these chars only have meaning at beg/end of pat? */
	case '^':
	ret = regnode(preg, BOL);
	break;
	case '$':
	ret = regnode(preg, EOL);
	break;
	case '.':
	ret = regnode(preg, ANY);
	*flagp \|= HASWIDTH\|SIMPLE;
	break;
	case '[': {
	const char *pattern = preg->regparse;

	if (pattern == '^') { / Complement of range. */
	ret = regnode(preg, ANYBUT);
	pattern++;
	} else
	ret = regnode(preg, ANYOF);

	/* Special case. If the first char is ']' or '-', it is part of the set */
	if (pattern == ']' \|\| pattern == '-') {
	reg_addrange(preg, pattern, pattern);
	pattern++;
	}

	while (pattern && pattern != ']') {
	/* Is this a range? a-z */
	int start;
	int end;

	enum {
	CC_ALPHA, CC_ALNUM, CC_SPACE, CC_BLANK, CC_UPPER, CC_LOWER,
	CC_DIGIT, CC_XDIGIT, CC_CNTRL, CC_GRAPH, CC_PRINT, CC_PUNCT,
	CC_NUM
	};
	int cc;

	pattern += reg_utf8_tounicode_case(pattern, &start, nocase);
	if (start == '\\') {
	/* First check for class shorthand escapes */
	switch (*pattern) {
	case 's':
	pattern++;
	cc = CC_SPACE;
	goto cc_switch;
	case 'd':
	pattern++;
	cc = CC_DIGIT;
	goto cc_switch;
	case 'w':
	pattern++;
	reg_addrange(preg, '_', '_');
	cc = CC_ALNUM;
	goto cc_switch;
	}
	pattern += reg_decode_escape(pattern, &start);
	if (start == 0) {
	preg->err = REG_ERR_NULL_CHAR;
	return 0;
	}
	}
	if (pattern[0] == '-' && pattern[1] && pattern[1] != ']') {
	/* skip '-' */
	pattern += utf8_tounicode(pattern, &end);
	pattern += reg_utf8_tounicode_case(pattern, &end, nocase);
	if (end == '\\') {
	pattern += reg_decode_escape(pattern, &end);
	if (end == 0) {
	preg->err = REG_ERR_NULL_CHAR;
	return 0;
	}
	}

	reg_addrange(preg, start, end);
	continue;
	}
	if (start == '[' && pattern[0] == ':') {
	static const char *character_class[] = {
	":alpha:", ":alnum:", ":space:", ":blank:", ":upper:", ":lower:",
	":digit:", ":xdigit:", ":cntrl:", ":graph:", ":print:", ":punct:",
	};

	for (cc = 0; cc < CC_NUM; cc++) {
	n = strlen(character_class[cc]);
	if (strncmp(pattern, character_class[cc], n) == 0) {
	/* Found a character class */
	pattern += n + 1;
	break;
	}
	}
	if (cc != CC_NUM) {
	cc_switch:
	switch (cc) {
	case CC_ALNUM:
	reg_addrange(preg, '0', '9');
	/* Fall through */
	case CC_ALPHA:
	if ((preg->cflags & REG_ICASE) == 0) {
	reg_addrange(preg, 'a', 'z');
	}
	reg_addrange(preg, 'A', 'Z');
	break;
	case CC_SPACE:
	reg_addrange_str(preg, " \t\r\n\f\v");
	break;
	case CC_BLANK:
	reg_addrange_str(preg, " \t");
	break;
	case CC_UPPER:
	reg_addrange(preg, 'A', 'Z');
	break;
	case CC_LOWER:
	reg_addrange(preg, 'a', 'z');
	break;
	case CC_XDIGIT:
	reg_addrange(preg, 'a', 'f');
	reg_addrange(preg, 'A', 'F');
	/* Fall through */
	case CC_DIGIT:
	reg_addrange(preg, '0', '9');
	break;
	case CC_CNTRL:
	reg_addrange(preg, 0, 31);
	reg_addrange(preg, 127, 127);
	break;
	case CC_PRINT:
	reg_addrange(preg, ' ', '~');
	break;
	case CC_GRAPH:
	reg_addrange(preg, '!', '~');
	break;
	case CC_PUNCT:
	reg_addrange(preg, '!', '/');
	reg_addrange(preg, ':', '@');
	reg_addrange(preg, '[', '`');
	reg_addrange(preg, '{', '~');
	break;
	}
	continue;
	}
	}
	/* Not a range, so just add the char */
	reg_addrange(preg, start, start);
	}
	regc(preg, '\0');

	if (*pattern) {
	pattern++;
	}
	preg->regparse = pattern;

	*flagp \|= HASWIDTH\|SIMPLE;
	}
	break;
	case '(':
	ret = reg(preg, 1, &flags);
	if (ret == 0)
	return 0;
	*flagp \|= flags&(HASWIDTH\|SPSTART);
	break;
	case '\0':
	case '\|':
	case ')':
	preg->err = REG_ERR_INTERNAL;
	return 0; /* Supposed to be caught earlier. */
	case '?':
	case '+':
	case '*':
	case '{':
	preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING;
	return 0;
	case '\\':
	ch = *preg->regparse++;
	switch (ch) {
	case '\0':
	preg->err = REG_ERR_TRAILING_BACKSLASH;
	return 0;
	case 'A':
	ret = regnode(preg, BOLX);
	break;
	case 'Z':
	ret = regnode(preg, EOLX);
	break;
	case '<':
	case 'm':
	ret = regnode(preg, WORDA);
	break;
	case '>':
	case 'M':
	ret = regnode(preg, WORDZ);
	break;
	case 'd':
	case 'D':
	ret = regnode(preg, ch == 'd' ? ANYOF : ANYBUT);
	reg_addrange(preg, '0', '9');
	regc(preg, '\0');
	*flagp \|= HASWIDTH\|SIMPLE;
	break;
	case 'w':
	case 'W':
	ret = regnode(preg, ch == 'w' ? ANYOF : ANYBUT);
	if ((preg->cflags & REG_ICASE) == 0) {
	reg_addrange(preg, 'a', 'z');
	}
	reg_addrange(preg, 'A', 'Z');
	reg_addrange(preg, '0', '9');
	reg_addrange(preg, '_', '_');
	regc(preg, '\0');
	*flagp \|= HASWIDTH\|SIMPLE;
	break;
	case 's':
	case 'S':
	ret = regnode(preg, ch == 's' ? ANYOF : ANYBUT);
	reg_addrange_str(preg," \t\r\n\f\v");
	regc(preg, '\0');
	*flagp \|= HASWIDTH\|SIMPLE;
	break;
	/* FIXME: Someday handle \1, \2, ... */
	default:
	/* Handle general quoted chars in exact-match routine */
	/* Back up to include the backslash */
	preg->regparse--;
	goto de_fault;
	}
	break;
	de_fault:
	default: {
	/*
	* Encode a string of characters to be matched exactly.
	*/
	int added = 0;

	/* Back up to pick up the first char of interest */
	preg->regparse -= n;

	ret = regnode(preg, EXACTLY);

	/* Note that a META operator such as ? or * consumes the
	* preceding char.
	* Thus we must be careful to look ahead by 2 and add the
	* last char as it's own EXACTLY if necessary
	*/

	/* Until end of string or a META char is reached */
	while (preg->regparse && strchr(META, preg->regparse) == NULL) {
	n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE));
	if (ch == '\\' && preg->regparse[n]) {
	/* Non-trailing backslash.
	* Is this a special escape, or a regular escape?
	*/
	if (strchr("<>mMwWdDsSAZ", preg->regparse[n])) {
	/* A special escape. All done with EXACTLY */
	break;
	}
	/* Decode it. Note that we add the length for the escape
	* sequence to the length for the backlash so we can skip
	* the entire sequence, or not as required.
	*/
	n += reg_decode_escape(preg->regparse + n, &ch);
	if (ch == 0) {
	preg->err = REG_ERR_NULL_CHAR;
	return 0;
	}
	}

	/* Now we have one char 'ch' of length 'n'.
	* Check to see if the following char is a MULT
	*/

	if (ISMULT(preg->regparse[n])) {
	/* Yes. But do we already have some EXACTLY chars? */
	if (added) {
	/* Yes, so return what we have and pick up the current char next time around */
	break;
	}
	/* No, so add this single char and finish */
	regc(preg, ch);
	added++;
	preg->regparse += n;
	break;
	}

	/* No, so just add this char normally */
	regc(preg, ch);
	added++;
	preg->regparse += n;
	}
	regc(preg, '\0');

	*flagp \|= HASWIDTH;
	if (added == 1)
	*flagp \|= SIMPLE;
	break;
	}
	break;
	}

	return(ret);
	}

	static void reg_grow(regex_t *preg, int n)
	{
	if (preg->p + n >= preg->proglen) {
	preg->proglen = (preg->p + n) * 2;
	preg->program = realloc(preg->program, preg->proglen * sizeof(int));
	}
	}

	/*
	- regnode - emit a node
	*/
	/* Location. */
	static int regnode(regex_t *preg, int op)
	{
	reg_grow(preg, 2);

	/* The OP followed by a next pointer */
	preg->program[preg->p++] = op;
	preg->program[preg->p++] = 0;

	/* Return the start of the node */
	return preg->p - 2;
	}

	/*
	- regc - emit (if appropriate) a byte of code
	*/
	static void regc(regex_t *preg, int b )
	{
	reg_grow(preg, 1);
	preg->program[preg->p++] = b;
	}

	/*
	- reginsert - insert an operator in front of already-emitted operand
	*
	* Means relocating the operand.
	* Returns the new location of the original operand.
	*/
	static int reginsert(regex_t *preg, int op, int size, int opnd )
	{
	reg_grow(preg, size);

	/* Move everything from opnd up */
	memmove(preg->program + opnd + size, preg->program + opnd, sizeof(int) * (preg->p - opnd));
	/* Zero out the new space */
	memset(preg->program + opnd, 0, sizeof(int) * size);

	preg->program[opnd] = op;

	preg->p += size;

	return opnd + size;
	}

	/*
	- regtail - set the next-pointer at the end of a node chain
	*/
	static void regtail(regex_t *preg, int p, int val)
	{
	int scan;
	int temp;
	int offset;

	/* Find last node. */
	scan = p;
	for (;;) {
	temp = regnext(preg, scan);
	if (temp == 0)
	break;
	scan = temp;
	}

	if (OP(preg, scan) == BACK)
	offset = scan - val;
	else
	offset = val - scan;

	preg->program[scan + 1] = offset;
	}

	/*
	- regoptail - regtail on operand of first argument; nop if operandless
	*/

	static void regoptail(regex_t *preg, int p, int val )
	{
	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
	if (p != 0 && OP(preg, p) == BRANCH) {
	regtail(preg, OPERAND(p), val);
	}
	}

	/*
	* regexec and friends
	*/

	/*
	* Forwards.
	*/
	static int regtry(regex_t preg, const char string );
	static int regmatch(regex_t *preg, int prog);
	static int regrepeat(regex_t *preg, int p, int max);

	/*
	- regexec - match a regexp against a string
	*/
	int regexec(regex_t preg, const char string, size_t nmatch, regmatch_t pmatch[], int eflags)
	{
	const char *s;
	int scan;

	/* Be paranoid... */
	if (preg == NULL \|\| preg->program == NULL \|\| string == NULL) {
	return REG_ERR_NULL_ARGUMENT;
	}

	/* Check validity of program. */
	if (*preg->program != REG_MAGIC) {
	return REG_ERR_CORRUPTED;
	}

	#ifdef DEBUG
	fprintf(stderr, "regexec: %s\n", string);
	regdump(preg);
	#endif

	preg->eflags = eflags;
	preg->pmatch = pmatch;
	preg->nmatch = nmatch;
	preg->start = string; /* All offsets are computed from here */

	/* Must clear out the embedded repeat counts of REPX and REPXMIN opcodes */
	for (scan = OPERAND(1); scan != 0; scan += regopsize(preg, scan)) {
	int op = OP(preg, scan);
	if (op == END)
	break;
	if (op == REPX \|\| op == REPXMIN)
	preg->program[scan + 4] = 0;
	}

	/* If there is a "must appear" string, look for it. */
	if (preg->regmust != 0) {
	s = string;
	while ((s = str_find(s, preg->program[preg->regmust], preg->cflags & REG_ICASE)) != NULL) {
	if (prefix_cmp(preg->program + preg->regmust, preg->regmlen, s, preg->cflags & REG_ICASE) >= 0) {
	break;
	}
	s++;
	}
	if (s == NULL) /* Not present. */
	return REG_NOMATCH;
	}

	/* Mark beginning of line for ^ . */
	preg->regbol = string;

	/* Simplest case: anchored match need be tried only once (maybe per line). */
	if (preg->reganch) {
	if (eflags & REG_NOTBOL) {
	/* This is an anchored search, but not an BOL, so possibly skip to the next line */
	goto nextline;
	}
	while (1) {
	if (regtry(preg, string)) {
	return REG_NOERROR;
	}
	if (*string) {
	nextline:
	if (preg->cflags & REG_NEWLINE) {
	/* Try the next anchor? */
	string = strchr(string, '\n');
	if (string) {
	preg->regbol = ++string;
	continue;
	}
	}
	}
	return REG_NOMATCH;
	}
	}

	/* Messy cases: unanchored match. */
	s = string;
	if (preg->regstart != '\0') {
	/* We know what char it must start with. */
	while ((s = str_find(s, preg->regstart, preg->cflags & REG_ICASE)) != NULL) {
	if (regtry(preg, s))
	return REG_NOERROR;
	s++;
	}
	}
	else
	/* We don't -- general case. */
	while (1) {
	if (regtry(preg, s))
	return REG_NOERROR;
	if (*s == '\0') {
	break;
	}
	else {
	int c;
	s += utf8_tounicode(s, &c);
	}
	}

	/* Failure. */
	return REG_NOMATCH;
	}

	/*
	- regtry - try match at specific point
	*/
	/* 0 failure, 1 success */
	static int regtry( regex_t preg, const char string )
	{
	int i;

	preg->reginput = string;

	for (i = 0; i < preg->nmatch; i++) {
	- preg->pmatch[i].rm_so = -1;
	- preg->pmatch[i].rm_eo = -1;
	+ if (preg->pmatch) {
	+ preg->pmatch[i].rm_so = -1;
	+ preg->pmatch[i].rm_eo = -1;
	+ }
	}
	if (regmatch(preg, 1)) {
	- preg->pmatch[0].rm_so = string - preg->start;
	- preg->pmatch[0].rm_eo = preg->reginput - preg->start;
	+ if (preg->pmatch) {
	+ preg->pmatch[0].rm_so = string - preg->start;
	+ preg->pmatch[0].rm_eo = preg->reginput - preg->start;
	+ }
	return(1);
	} else
	return(0);
	}

	/**
	* Returns bytes matched if 'pattern' is a prefix of 'string'.
	*
	* If 'nocase' is non-zero, does a case-insensitive match.
	*
	* Returns -1 on not found.
	*/
	static int prefix_cmp(const int prog, int proglen, const char string, int nocase)
	{
	const char *s = string;
	while (proglen && *s) {
	int ch;
	int n = reg_utf8_tounicode_case(s, &ch, nocase);
	if (ch != *prog) {
	return -1;
	}
	prog++;
	s += n;
	proglen--;
	}
	if (proglen == 0) {
	return s - string;
	}
	return -1;
	}

	/**
	* Searchs for 'c' in the range 'range'.
	*
	* Returns 1 if found, or 0 if not.
	*/
	static int reg_range_find(const int *range, int c)
	{
	while (*range) {
	/printf("Checking %d in range [%d,%d]\n", c, range[1], (range[0] + range[1] - 1));/
	if (c >= range[1] && c <= (range[0] + range[1] - 1)) {
	return 1;
	}
	range += 2;
	}
	return 0;
	}

	/**
	* Search for the character 'c' in the utf-8 string 'string'.
	*
	* If 'nocase' is set, the 'string' is assumed to be uppercase
	* and 'c' is converted to uppercase before matching.
	*
	* Returns the byte position in the string where the 'c' was found, or
	* NULL if not found.
	*/
	static const char str_find(const char string, int c, int nocase)
	{
	if (nocase) {
	/* The "string" should already be converted to uppercase */
	c = utf8_upper(c);
	}
	while (*string) {
	int ch;
	int n = reg_utf8_tounicode_case(string, &ch, nocase);
	if (c == ch) {
	return string;
	}
	string += n;
	}
	return NULL;
	}

	/**
	* Returns true if 'ch' is an end-of-line char.
	*
	* In REG_NEWLINE mode, \n is considered EOL in
	* addition to \0
	*/
	static int reg_iseol(regex_t *preg, int ch)
	{
	if (preg->cflags & REG_NEWLINE) {
	return ch == '\0' \|\| ch == '\n';
	}
	else {
	return ch == '\0';
	}
	}

	static int regmatchsimplerepeat(regex_t *preg, int scan, int matchmin)
	{
	int nextch = '\0';
	const char *save;
	int no;
	int c;

	int max = preg->program[scan + 2];
	int min = preg->program[scan + 3];
	int next = regnext(preg, scan);

	/*
	* Lookahead to avoid useless match attempts
	* when we know what character comes next.
	*/
	if (OP(preg, next) == EXACTLY) {
	nextch = preg->program[OPERAND(next)];
	}
	save = preg->reginput;
	no = regrepeat(preg, scan + 5, max);
	if (no < min) {
	return 0;
	}
	if (matchmin) {
	/* from min up to no */
	max = no;
	no = min;
	}
	/* else from no down to min */
	while (1) {
	if (matchmin) {
	if (no > max) {
	break;
	}
	}
	else {
	if (no < min) {
	break;
	}
	}
	preg->reginput = save + utf8_index(save, no);
	reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE));
	/* If it could work, try it. */
	if (reg_iseol(preg, nextch) \|\| c == nextch) {
	if (regmatch(preg, next)) {
	return(1);
	}
	}
	if (matchmin) {
	/* Couldn't or didn't, add one more */
	no++;
	}
	else {
	/* Couldn't or didn't -- back up. */
	no--;
	}
	}
	return(0);
	}

	static int regmatchrepeat(regex_t *preg, int scan, int matchmin)
	{
	int *scanpt = preg->program + scan;

	int max = scanpt[2];
	int min = scanpt[3];

	/* Have we reached min? */
	if (scanpt[4] < min) {
	/* No, so get another one */
	scanpt[4]++;
	if (regmatch(preg, scan + 5)) {
	return 1;
	}
	scanpt[4]--;
	return 0;
	}
	if (scanpt[4] > max) {
	return 0;
	}

	if (matchmin) {
	/* minimal, so try other branch first */
	if (regmatch(preg, regnext(preg, scan))) {
	return 1;
	}
	/* No, so try one more */
	scanpt[4]++;
	if (regmatch(preg, scan + 5)) {
	return 1;
	}
	scanpt[4]--;
	return 0;
	}
	/* maximal, so try this branch again */
	if (scanpt[4] < max) {
	scanpt[4]++;
	if (regmatch(preg, scan + 5)) {
	return 1;
	}
	scanpt[4]--;
	}
	/* At this point we are at max with no match. Try the other branch */
	return regmatch(preg, regnext(preg, scan));
	}

	/*
	- regmatch - main matching routine
	*
	* Conceptually the strategy is simple: check to see whether the current
	* node matches, call self recursively to see whether the rest matches,
	* and then act accordingly. In practice we make some effort to avoid
	* recursion, in particular by going through "ordinary" nodes (that don't
	* need to know whether the rest of the match failed) by a loop instead of
	* by recursion.
	*/
	/* 0 failure, 1 success */
	static int regmatch(regex_t *preg, int prog)
	{
	int scan; /* Current node. */
	int next; /* Next node. */
	const char *save;

	scan = prog;

	#ifdef DEBUG
	if (scan != 0 && regnarrate)
	fprintf(stderr, "%s(\n", regprop(scan));
	#endif
	while (scan != 0) {
	int n;
	int c;
	#ifdef DEBUG
	if (regnarrate) {
	fprintf(stderr, "%3d: %s...\n", scan, regprop(OP(preg, scan))); /* Where, what. */
	}
	#endif
	next = regnext(preg, scan);
	n = reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE));

	switch (OP(preg, scan)) {
	case BOLX:
	if ((preg->eflags & REG_NOTBOL)) {
	return(0);
	}
	/* Fall through */
	case BOL:
	if (preg->reginput != preg->regbol) {
	return(0);
	}
	break;
	case EOLX:
	if (c != 0) {
	/* For EOLX, only match real end of line, not newline */
	return 0;
	}
	break;
	case EOL:
	if (!reg_iseol(preg, c)) {
	return(0);
	}
	break;
	case WORDA:
	/* Must be looking at a letter, digit, or _ */
	if ((!isalnum(UCHAR(c))) && c != '_')
	return(0);
	/* Prev must be BOL or nonword */
	if (preg->reginput > preg->regbol &&
	(isalnum(UCHAR(preg->reginput[-1])) \|\| preg->reginput[-1] == '_'))
	return(0);
	break;
	case WORDZ:
	/* Can't match at BOL */
	if (preg->reginput > preg->regbol) {
	/* Current must be EOL or nonword */
	if (reg_iseol(preg, c) \|\| !isalnum(UCHAR(c)) \|\| c != '_') {
	c = preg->reginput[-1];
	/* Previous must be word */
	if (isalnum(UCHAR(c)) \|\| c == '_') {
	break;
	}
	}
	}
	/* No */
	return(0);

	case ANY:
	if (reg_iseol(preg, c))
	return 0;
	preg->reginput += n;
	break;
	case EXACTLY: {
	int opnd;
	int len;
	int slen;

	opnd = OPERAND(scan);
	len = str_int_len(preg->program + opnd);

	slen = prefix_cmp(preg->program + opnd, len, preg->reginput, preg->cflags & REG_ICASE);
	if (slen < 0) {
	return(0);
	}
	preg->reginput += slen;
	}
	break;
	case ANYOF:
	if (reg_iseol(preg, c) \|\| reg_range_find(preg->program + OPERAND(scan), c) == 0) {
	return(0);
	}
	preg->reginput += n;
	break;
	case ANYBUT:
	if (reg_iseol(preg, c) \|\| reg_range_find(preg->program + OPERAND(scan), c) != 0) {
	return(0);
	}
	preg->reginput += n;
	break;
	case NOTHING:
	break;
	case BACK:
	break;
	case BRANCH:
	if (OP(preg, next) != BRANCH) /* No choice. */
	next = OPERAND(scan); /* Avoid recursion. */
	else {
	do {
	save = preg->reginput;
	if (regmatch(preg, OPERAND(scan))) {
	return(1);
	}
	preg->reginput = save;
	scan = regnext(preg, scan);
	} while (scan != 0 && OP(preg, scan) == BRANCH);
	return(0);
	/* NOTREACHED */
	}
	break;
	case REP:
	case REPMIN:
	return regmatchsimplerepeat(preg, scan, OP(preg, scan) == REPMIN);

	case REPX:
	case REPXMIN:
	return regmatchrepeat(preg, scan, OP(preg, scan) == REPXMIN);

	case END:
	return 1; /* Success! */

	case OPENNC:
	case CLOSENC:
	return regmatch(preg, next);

	default:
	if (OP(preg, scan) >= OPEN+1 && OP(preg, scan) < CLOSE_END) {
	save = preg->reginput;
	if (regmatch(preg, next)) {
	if (OP(preg, scan) < CLOSE) {
	int no = OP(preg, scan) - OPEN;
	- if (no < preg->nmatch && preg->pmatch[no].rm_so == -1) {
	+ if (no < preg->nmatch && preg->pmatch && preg->pmatch[no].rm_so == -1) {
	preg->pmatch[no].rm_so = save - preg->start;
	}
	}
	else {
	int no = OP(preg, scan) - CLOSE;
	- if (no < preg->nmatch && preg->pmatch[no].rm_eo == -1) {
	+ if (no < preg->nmatch && preg->pmatch && preg->pmatch[no].rm_eo == -1) {
	preg->pmatch[no].rm_eo = save - preg->start;
	}
	}
	return(1);
	}
	/* Restore input position after failure */
	preg->reginput = save;
	return(0);
	}
	return REG_ERR_INTERNAL;
	}

	scan = next;
	}

	/*
	* We get here only if there's trouble -- normally "case END" is
	* the terminating point.
	*/
	return REG_ERR_INTERNAL;
	}

	/*
	- regrepeat - repeatedly match something simple, report how many
	*/
	static int regrepeat(regex_t *preg, int p, int max)
	{
	int count = 0;
	const char *scan;
	int opnd;
	int ch;
	int n;

	scan = preg->reginput;
	opnd = OPERAND(p);
	switch (OP(preg, p)) {
	case ANY:
	while (!reg_iseol(preg, *scan) && count < max) {
	count++;
	scan += utf8_charlen(*scan);
	}
	break;
	case EXACTLY:
	while (count < max) {
	n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE);
	if (preg->program[opnd] != ch) {
	break;
	}
	count++;
	scan += n;
	}
	break;
	case ANYOF:
	while (count < max) {
	n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE);
	if (reg_iseol(preg, ch) \|\| reg_range_find(preg->program + opnd, ch) == 0) {
	break;
	}
	count++;
	scan += n;
	}
	break;
	case ANYBUT:
	while (count < max) {
	n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE);
	if (reg_iseol(preg, ch) \|\| reg_range_find(preg->program + opnd, ch) != 0) {
	break;
	}
	count++;
	scan += n;
	}
	break;
	default: /* Oh dear. Called inappropriately. */
	preg->err = REG_ERR_INTERNAL;
	count = 0; /* Best compromise. */
	break;
	}
	preg->reginput = scan;

	return(count);
	}

	/*
	- regnext - dig the "next" pointer out of a node
	*/
	static int regnext(regex_t *preg, int p )
	{
	int offset;

	offset = NEXT(preg, p);

	if (offset == 0)
	return 0;

	if (OP(preg, p) == BACK)
	return(p-offset);
	else
	return(p+offset);
	}

	/*
	- regopsize - returns the size of opcode + operands at 'p' in words
	*/
	static int regopsize(regex_t *preg, int p )
	{
	/* Almost all opcodes are 2 words, but some are more */
	switch (OP(preg, p)) {
	case REP:
	case REPMIN:
	case REPX:
	case REPXMIN:
	return 5;

	case ANYOF:
	case ANYBUT:
	case EXACTLY: {
	int s = p + 2;
	while (preg->program[s++]) {
	}
	return s - p;
	}
	}
	return 2;
	}

	#if defined(DEBUG) && !defined(JIM_BOOTSTRAP)

	/*
	- regdump - dump a regexp onto stdout in vaguely comprehensible form
	*/
	static void regdump(regex_t *preg)
	{
	int s;
	int op = EXACTLY; /* Arbitrary non-END op. */
	int next;
	char buf[MAX_UTF8_LEN + 1];

	int i;
	for (i = 1; i < preg->p; i++) {
	printf("%02x ", (unsigned char)preg->program[i]);
	if (i % 16 == 0) {
	printf("\n");
	}
	}
	printf("\n");

	s = 1;
	while (op != END && s < preg->p) { /* While that wasn't END last time... */
	op = OP(preg, s);
	printf("%3d: %s", s, regprop(op)); /* Where, what. */
	next = regnext(preg, s);
	if (next == 0) /* Next ptr. */
	printf("(0)");
	else
	printf("(%d)", next);
	s += 2;
	if (op == REP \|\| op == REPMIN \|\| op == REPX \|\| op == REPXMIN) {
	int max = preg->program[s];
	int min = preg->program[s + 1];
	if (max == 65535) {
	printf("{%d,*}", min);
	}
	else {
	printf("{%d,%d}", min, max);
	}
	printf(" %d", preg->program[s + 2]);
	s += 3;
	}
	else if (op == ANYOF \|\| op == ANYBUT) {
	/* set of ranges */

	while (preg->program[s]) {
	int len = preg->program[s++];
	int first = preg->program[s++];
	buf[utf8_getchars(buf, first)] = 0;
	printf("%s", buf);
	if (len > 1) {
	buf[utf8_getchars(buf, first + len - 1)] = 0;
	printf("-%s", buf);
	}
	}
	s++;
	}
	else if (op == EXACTLY) {
	/* Literal string, where present. */

	while (preg->program[s]) {
	buf[utf8_getchars(buf, preg->program[s])] = 0;
	printf("%s", buf);
	s++;
	}
	s++;
	}
	putchar('\n');
	}

	if (op == END) {
	/* Header fields of interest. */
	if (preg->regstart) {
	buf[utf8_getchars(buf, preg->regstart)] = 0;
	printf("start '%s' ", buf);
	}
	if (preg->reganch)
	printf("anchored ");
	if (preg->regmust != 0) {
	int i;
	printf("must have:");
	for (i = 0; i < preg->regmlen; i++) {
	putchar(preg->program[preg->regmust + i]);
	}
	putchar('\n');
	}
	}
	printf("\n");
	}

	/*
	- regprop - printable representation of opcode
	*/
	static const char *regprop( int op )
	{
	static char buf[50];

	switch (op) {
	case BOL:
	return "BOL";
	case EOL:
	return "EOL";
	case BOLX:
	return "BOLX";
	case EOLX:
	return "EOLX";
	case ANY:
	return "ANY";
	case ANYOF:
	return "ANYOF";
	case ANYBUT:
	return "ANYBUT";
	case BRANCH:
	return "BRANCH";
	case EXACTLY:
	return "EXACTLY";
	case NOTHING:
	return "NOTHING";
	case BACK:
	return "BACK";
	case END:
	return "END";
	case REP:
	return "REP";
	case REPMIN:
	return "REPMIN";
	case REPX:
	return "REPX";
	case REPXMIN:
	return "REPXMIN";
	case WORDA:
	return "WORDA";
	case WORDZ:
	return "WORDZ";
	case OPENNC:
	return "OPEN";
	case CLOSENC:
	return "CLOSE";
	default:
	if (op >= OPEN && op < CLOSE) {
	snprintf(buf, sizeof(buf), "OPEN%d", op-OPEN);
	}
	else if (op >= CLOSE && op < CLOSE_END) {
	snprintf(buf, sizeof(buf), "CLOSE%d", op-CLOSE);
	}
	else {
	snprintf(buf, sizeof(buf), "?%d?\n", op);
	}
	return(buf);
	}
	}
	#endif /* JIM_BOOTSTRAP */

	size_t regerror(int errcode, const regex_t preg, char errbuf, size_t errbuf_size)
	{
	static const char *error_strings[] = {
	"success",
	"no match",
	"bad pattern",
	"null argument",
	"unknown error",
	"too big",
	"out of memory",
	"too many ()",
	"parentheses () not balanced",
	"braces {} not balanced",
	"invalid repetition count(s)",
	"extra characters",
	"*+ of empty atom",
	"nested count",
	"internal error",
	"count follows nothing",
	"trailing backslash",
	"corrupted program",
	"contains null char",
	};
	const char *err;

	(void)preg;

	if (errcode < 0 \|\| errcode >= REG_ERR_NUM) {
	err = "Bad error code";
	}
	else {
	err = error_strings[errcode];
	}

	return snprintf(errbuf, errbuf_size, "%s", err);
	}

	void regfree(regex_t *preg)
	{
	free(preg->program);
	}

	#endif
	diff --git a/regexp/jimregexp.h b/regexp/jimregexp.h
	index b7598d4b3..581b7104c 100644
	--- a/regexp/jimregexp.h
	+++ b/regexp/jimregexp.h
	@@ -1,109 +1,109 @@
	#ifndef JIMREGEXP_H
	#define JIMREGEXP_H

	/** regexp(3)-compatible regular expression implementation for Jim.
	*
	* See jimregexp.c for details
	*/

	#ifdef __cplusplus
	extern "C" {
	#endif

	#include <stdlib.h>

	typedef struct {
	int rm_so;
	int rm_eo;
	} regmatch_t;

	/*
	* The "internal use only" fields in regexp.h are present to pass info from
	* compile to execute that permits the execute phase to run lots faster on
	* simple cases. They are:
	*
	* regstart char that must begin a match; '\0' if none obvious
	* reganch is the match anchored (at beginning-of-line only)?
	* regmust string (pointer into program) that match must include, or NULL
	* regmlen length of regmust string
	*
	* Regstart and reganch permit very fast decisions on suitable starting points
	* for a match, cutting down the work a lot. Regmust permits fast rejection
	* of lines that cannot possibly match. The regmust tests are costly enough
	* that regcomp() supplies a regmust only if the r.e. contains something
	* potentially expensive (at present, the only such thing detected is * or +
	* at the start of the r.e., which can involve a lot of backup). Regmlen is
	* supplied because the test in regexec() needs it and regcomp() is computing
	* it anyway.
	*/

	-typedef struct regexp {
	+struct regexp {
	/* -- public -- */
	int re_nsub; /* number of parenthesized subexpressions */

	/* -- private -- */
	int cflags; /* Flags used when compiling */
	int err; /* Any error which occurred during compile */
	int regstart; /* Internal use only. */
	int reganch; /* Internal use only. */
	int regmust; /* Internal use only. */
	int regmlen; /* Internal use only. */
	int program; / Allocated */

	/* working state - compile */
	const char regparse; / Input-scan pointer. */
	int p; /* Current output pos in program */
	int proglen; /* Allocated program size */

	/* working state - exec */
	int eflags; /* Flags used when executing */
	const char start; / Initial string pointer. */
	const char reginput; / Current input pointer. */
	const char regbol; / Beginning of input, for ^ check. */

	/* Input to regexec() */
	regmatch_t pmatch; / submatches will be stored here */
	int nmatch; /* size of pmatch[] */
	-} regexp;
	+};

	-typedef regexp regex_t;
	+typedef struct regexp regex_t;

	#define REG_EXTENDED 0
	#define REG_NEWLINE 1
	#define REG_ICASE 2

	#define REG_NOTBOL 16

	enum {
	REG_NOERROR, /* Success. */
	REG_NOMATCH, /* Didn't find a match (for regexec). */
	REG_BADPAT, /* >= REG_BADPAT is an error */
	REG_ERR_NULL_ARGUMENT,
	REG_ERR_UNKNOWN,
	REG_ERR_TOO_BIG,
	REG_ERR_NOMEM,
	REG_ERR_TOO_MANY_PAREN,
	REG_ERR_UNMATCHED_PAREN,
	REG_ERR_UNMATCHED_BRACES,
	REG_ERR_BAD_COUNT,
	REG_ERR_JUNK_ON_END,
	REG_ERR_OPERAND_COULD_BE_EMPTY,
	REG_ERR_NESTED_COUNT,
	REG_ERR_INTERNAL,
	REG_ERR_COUNT_FOLLOWS_NOTHING,
	REG_ERR_TRAILING_BACKSLASH,
	REG_ERR_CORRUPTED,
	REG_ERR_NULL_CHAR,
	REG_ERR_NUM
	};

	int regcomp(regex_t preg, const char regex, int cflags);
	int regexec(regex_t preg, const char string, size_t nmatch, regmatch_t pmatch[], int eflags);
	size_t regerror(int errcode, const regex_t preg, char errbuf, size_t errbuf_size);
	void regfree(regex_t *preg);

	#ifdef __cplusplus
	}
	#endif

	#endif

File Metadata

Mime Type: text/x-diff
Expires: Fri, Feb 6, 8:29 AM (11 h, 29 m)
Storage Engine: local-disk
Storage Format: Raw Data
Storage Handle: 5e/d8/dad0474003f1b7c15ae3cf9a558c

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions