package org.apache.oro.text.regex;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
* must not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache"
* or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
* name, without prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
*
* A grep method is not included for two reasons: *
String
instance into strings contained in a
* Vector
of size not greater than a specified limit. The
* string is split with a regular expression as the delimiter.
* The limit parameter essentially says to split the
* string only on at most the first limit - 1 number of pattern
* occurences.
* * This method is inspired by the Perl split() function and behaves * identically to it when used in conjunction with the Perl5Matcher and * Perl5Pattern classes except for the following difference: *
* In Perl, if the split expression contains parentheses, the split() * method creates additional list elements from each of the matching * subgroups in the pattern. In other words: *
split("/([,-])/", "8-12,15,18")
produces the Vector containing: *
{ "8", "-", "12", ",", "15", ",", "18" }
The OROMatcher split method does not follow this behavior. The * following Vector would be produced by OROMatcher: *
{ "8", "12", "15", "18" }
To obtain the Perl behavior, use split method in the PerlTools * package available from * http://www.oroinc.com/ . *
* @param matcher The regular expression matcher to execute the split.
* @param pattern The regular expression to use as a split delimiter.
* @param input The String
to split.
* @param limit The limit on the size of the returned Vector
.
* Values <= 0 produce the same behavior as using the
* SPLIT_ALL constant which causes the limit to be
* ignored and splits to be performed on all occurrences of
* the pattern. You should use the SPLIT_ALL constant
* to achieve this behavior instead of relying on the default
* behavior associated with non-positive limit values.
* @return A Vector
containing the substrings of the input
* that occur between the regular expression delimiter occurences.
* The input will not be split into any more substrings than the
* specified limit
. A way of thinking of this is that
* only the first limit - 1
matches of the delimiting
* regular expression will be used to split the input.
*/
public static Vector split(PatternMatcher matcher, Pattern pattern,
String input, int limit)
{
int beginOffset;
Vector results = new Vector(20);
MatchResult currentResult;
PatternMatcherInput pinput;
pinput = new PatternMatcherInput(input);
beginOffset = 0;
while(--limit != 0 && matcher.contains(pinput, pattern)) {
currentResult = matcher.getMatch();
results.addElement(input.substring(beginOffset,
currentResult.beginOffset(0)));
beginOffset = currentResult.endOffset(0);
}
results.addElement(input.substring(beginOffset, input.length()));
return results;
}
/**
* Splits up a String
instance into a Vector
* of all its substrings using a regular expression as the delimiter.
* This method is inspired by the Perl split() function and behaves
* identically to it when used in conjunction with the Perl5Matcher and
* Perl5Pattern classes except for the following difference:
*
*
split("/([,-])/", "8-12,15,18")
produces the Vector containing: *
{ "8", "-", "12", ",", "15", ",", "18" }
The OROMatcher split method does not follow this behavior. The * following Vector would be produced by OROMatcher: *
{ "8", "12", "15", "18" }
To obtain the Perl behavior, use split method in the PerlTools * package available from * http://www.oroinc.com/ . *
* This method is identical to calling: *
** split(matcher, pattern, input, Util.SPLIT_ALL); *
* @param matcher The regular expression matcher to execute the split.
* @param pattern The regular expression to use as a split delimiter.
* @param input The String
to split.
* @return A Vector
containing all the substrings of the input
* that occur between the regular expression delimiter occurences.
*/
public static Vector split( PatternMatcher matcher, Pattern pattern,
String input)
{
return split(matcher, pattern, input, SPLIT_ALL);
}
/**
* Searches a string for a pattern and replaces the first occurrences
* of the pattern with a Substitution up to the number of
* substitutions specified by the numSubs parameter. A
* numSubs value of SUBSTITUTE_ALL will cause all occurrences
* of the pattern to be replaced.
*
* @param matcher The regular expression matcher to execute the pattern
* search.
* @param pattern The regular expression to search for and substitute
* occurrences of.
* @param sub The Substitution used to substitute pattern occurences.
* @param input The String
on which to perform substitutions.
* @param numSubs The number of substitutions to perform. Only the
* first numSubs patterns encountered are
* substituted. If you want to substitute all occurences
* set this parameter to SUBSTITUTE_ALL .
* @return A String comprising the input string with the substitutions,
* if any, made. If no substitutions are made, the returned String
* is the original input String.
*/
public static String substitute(PatternMatcher matcher, Pattern pattern,
Substitution sub, String input, int numSubs)
{
int beginOffset, subCount;
MatchResult currentResult;
PatternMatcherInput pinput;
StringBuffer buffer = new StringBuffer(input.length());
pinput = new PatternMatcherInput(input);
beginOffset = subCount = 0;
// Must be != 0 because SUBSTITUTE_ALL is represented by -1.
// Do NOT change to numSubs > 0.
while(numSubs != 0 && matcher.contains(pinput, pattern)) {
--numSubs;
++subCount;
currentResult = matcher.getMatch();
buffer.append(input.substring(beginOffset,
currentResult.beginOffset(0)));
sub.appendSubstitution(buffer, currentResult, subCount,
input, matcher, pattern);
beginOffset = currentResult.endOffset(0);
}
// No substitutions performed. There's no point in duplicating
// the string as would happen if this check were omitted.
if(subCount == 0)
return input;
buffer.append(input.substring(beginOffset, input.length()));
return buffer.toString();
}
/**
* Searches a string for a pattern and substitutes only the first
* occurence of the pattern.
*
* This method is identical to calling: *
** substitute(matcher, pattern, sub, input, 1); *
* @param matcher The regular expression matcher to execute the pattern
* search.
* @param pattern The regular expression to search for and substitute
* occurrences of.
* @param sub The Substitution used to substitute pattern occurences.
* @param input The String
on which to perform substitutions.
* @return A String comprising the input string with the substitutions,
* if any, made. If no substitutions are made, the returned String
* is the original input String.
*/
public static String substitute(PatternMatcher matcher, Pattern pattern,
Substitution sub, String input)
{
return substitute(matcher, pattern, sub, input, 1);
}
}