package org.apache.oro.text.regex; /* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2000 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" * must not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache" * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their * name, without prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * . * * Portions of this software are based upon software originally written * by Daniel F. Savarese. We appreciate his contributions. */ /** * The OpCode class should not be instantiated. It is a holder of various * constants and static methods pertaining to the manipulation of the * op-codes used in a compiled regular expression. @author Daniel F. Savarese @version $Id: OpCode.java,v 1.1 2004/01/10 00:58:23 mikedemmer Exp $ */ final class OpCode { private OpCode() { } // Names, values, and descriptions of operators correspond to those of // Perl regex bytecodes and for compatibility purposes are drawn from // regcomp.h in the Perl source tree by Larry Wall. static final char // Has Operand Meaning _END = 0, // no End of program. _BOL = 1, // no Match "" at beginning of line. _MBOL = 2, // no Same, assuming multiline. _SBOL = 3, // no Same, assuming singleline. _EOL = 4, // no Match "" at end of line. _MEOL = 5, // no Same, assuming multiline. _SEOL = 6, // no Same, assuming singleline. _ANY = 7, // no Match any one character (except newline). _SANY = 8, // no Match any one character. _ANYOF = 9, // yes Match character in (or not in) this class. _CURLY = 10, // yes Match this simple thing {n,m} times. _CURLYX = 11, // yes Match this complex thing {n,m} times. _BRANCH = 12, // yes Match this alternative, or the next... _BACK = 13, // no Match "", "next" ptr points backward. _EXACTLY = 14, // yes Match this string (preceded by length). _NOTHING = 15, // no Match empty string. _STAR = 16, // yes Match this (simple) thing 0 or more times. _PLUS = 17, // yes Match this (simple) thing 1 or more times. _ALNUM = 18, // no Match any alphanumeric character _NALNUM = 19, // no Match any non-alphanumeric character _BOUND = 20, // no Match "" at any word boundary _NBOUND = 21, // no Match "" at any word non-boundary _SPACE = 22, // no Match any whitespace character _NSPACE = 23, // no Match any non-whitespace character _DIGIT = 24, // no Match any numeric character _NDIGIT = 25, // no Match any non-numeric character _REF = 26, // yes Match some already matched string _OPEN = 27, // yes Mark this point in input as start of #n. _CLOSE = 28, // yes Analogous to OPEN. _MINMOD = 29, // no Next operator is not greedy. _GBOL = 30, // no Matches where last m//g left off. _IFMATCH = 31, // no Succeeds if the following matches. _UNLESSM = 32, // no Fails if the following matches. _SUCCEED = 33, // no Return from a subroutine, basically. _WHILEM = 34; // no Do curly processing and see if rest matches. // Lengths of the various operands. static final int _operandLength[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0 }; static final char _opType[] = { _END, _BOL, _BOL, _BOL, _EOL, _EOL, _EOL, _ANY, _ANY, _ANYOF, _CURLY, _CURLY, _BRANCH, _BACK, _EXACTLY, _NOTHING, _STAR, _PLUS, _ALNUM, _NALNUM, _BOUND, _NBOUND, _SPACE, _NSPACE, _DIGIT, _NDIGIT, _REF, _OPEN, _CLOSE, _MINMOD, _BOL, _BRANCH, _BRANCH, _END, _WHILEM }; static final char _opLengthVaries[] = { _BRANCH, _BACK, _STAR, _PLUS, _CURLY, _CURLYX, _REF, _WHILEM }; static final char _opLengthOne[] = { _ANY, _SANY, _ANYOF, _ALNUM, _NALNUM, _SPACE, _NSPACE, _DIGIT, _NDIGIT }; static final int _NULL_OFFSET = -1; static final char _NULL_POINTER = 0; static final int _getNextOffset(char[] program, int offset) { return ((int)program[offset + 1]); } static final char _getArg1(char[] program, int offset) { return program[offset + 2]; } static final char _getArg2(char[] program, int offset) { return program[offset + 3]; } static final int _getOperand(int offset) { return (offset + 2); } static final boolean _isInArray(char ch, char[] array, int start) { while(start < array.length) if(ch == array[start++]) return true; return false; } static final int _getNextOperator(int offset) { return (offset + 2); } static final int _getPrevOperator(int offset) { return (offset - 2); } static final int _getNext(char[] program, int offset) { int offs; if(program == null) return _NULL_OFFSET; offs = _getNextOffset(program, offset); if(offs == _NULL_POINTER) return _NULL_OFFSET; if(program[offset] == OpCode._BACK) return (offset - offs); return (offset + offs); } // doesn't really belong in this class, but we want Perl5Matcher not to // depend on Perl5Compiler static final boolean _isWordCharacter(char token) { return ((token >= 'a' && token <= 'z') || (token >= 'A' && token <= 'Z') || (token >= '0' && token <= '9') || (token == '_')); } }