/*
* $Id: PatternMatcherInput.java 124053 2005-01-04 01:24:35Z dfs $
*
* Copyright 2000-2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.oro.text.regex;
/**
* The PatternMatcherInput class is used to preserve state across
* calls to the contains()
methods of PatternMatcher instances.
* It is also used to specify that only a subregion of a string
* should be used as input when looking for a pattern match. All that
* is meant by preserving state is that the end offset of the last match
* is remembered, so that the next match is performed from that point
* where the last match left off. This offset can be accessed from
* the {@link #getCurrentOffset()} method and can be set with the
* {@link #setCurrentOffset(int)} method.
*
* You would use a PatternMatcherInput object when you want to search for * more than just the first occurrence of a pattern in a string, or when * you only want to search a subregion of the string for a match. An * example of its most common use is: *
* * @version @version@ * @since 1.0 * @see PatternMatcher */ public final class PatternMatcherInput { String _originalStringInput; char[] _originalCharInput, _originalBuffer, _toLowerBuffer; int _beginOffset, _endOffset, _currentOffset; int _matchBeginOffset = -1, _matchEndOffset = -1; /** * Creates a PatternMatcherInput object, associating a region of a String * as input to be used for pattern matching by PatternMatcher objects. * A copy of the string is not made, therefore you should not modify * the string unless you know what you are doing. * The current offset of the PatternMatcherInput is set to the begin * offset of the region. ** PatternMatcher matcher; * PatternCompiler compiler; * Pattern pattern; * PatternMatcherInput input; * MatchResult result; * * compiler = new Perl5Compiler(); * matcher = new Perl5Matcher(); * * try { * pattern = compiler.compile(somePatternString); * } catch(MalformedPatternException e) { * System.out.println("Bad pattern."); * System.out.println(e.getMessage()); * return; * } * * input = new PatternMatcherInput(someStringInput); * * while(matcher.contains(input, pattern)) { * result = matcher.getMatch(); * // Perform whatever processing on the result you want. * } * // Suppose we want to start searching from the beginning again with * // a different pattern. * // Just set the current offset to the begin offset. * input.setCurrentOffset(input.getBeginOffset()); * * // Second search omitted * * // Suppose we're done with this input, but want to search another string. * // There's no need to create another PatternMatcherInput instance. * // We can just use the setInput() method. * input.setInput(aNewInputString); * *
* @param input The input to associate with the PatternMatcherInput. * @param begin The offset into the char[] to use as the beginning of * the input. * @param length The length of the reegion starting from the begin offset * to use as the input for pattern matching purposes. */ public PatternMatcherInput(String input, int begin, int length) { setInput(input, begin, length); } /** * Like calling *
** PatternMatcherInput(input, 0, input.length()); *
* @param input The input to associate with the PatternMatcherInput. */ public PatternMatcherInput(String input) { this(input, 0, input.length()); } /** * Creates a PatternMatcherInput object, associating a region of a string * (represented as a char[]) as input * to be used for pattern matching by PatternMatcher objects. * A copy of the string is not made, therefore you should not modify * the string unless you know what you are doing. * The current offset of the PatternMatcherInput is set to the begin * offset of the region. *
* @param input The input to associate with the PatternMatcherInput. * @param begin The offset into the char[] to use as the beginning of * the input. * @param length The length of the reegion starting from the begin offset * to use as the input for pattern matching purposes. */ public PatternMatcherInput(char[] input, int begin, int length) { setInput(input, begin, length); } /** * Like calling: *
** PatternMatcherInput(input, 0, input.length); *
* @param input The input to associate with the PatternMatcherInput. */ public PatternMatcherInput(char[] input) { this(input, 0, input.length); } /** * @return The length of the region to be considered input for pattern * matching purposes. Essentially this is then end offset minus * the begin offset. */ public int length() { return (_endOffset - _beginOffset); //return _originalBuffer.length; } /** * Associates a region of a String as input * to be used for pattern matching by PatternMatcher objects. * The current offset of the PatternMatcherInput is set to the begin * offset of the region. *
* @param input The input to associate with the PatternMatcherInput. * @param begin The offset into the String to use as the beginning of * the input. * @param length The length of the reegion starting from the begin offset * to use as the input for pattern matching purposes. */ public void setInput(String input, int begin, int length) { _originalStringInput = input; _originalCharInput = null; _toLowerBuffer = null; _originalBuffer = input.toCharArray(); setCurrentOffset(begin); setBeginOffset(begin); setEndOffset(_beginOffset + length); } /** * This method is identical to calling: *
** setInput(input, 0, input.length()); *
* @param input The input to associate with the PatternMatcherInput. */ public void setInput(String input) { setInput(input, 0, input.length()); } /** * Associates a region of a string (represented as a char[]) as input * to be used for pattern matching by PatternMatcher objects. * A copy of the string is not made, therefore you should not modify * the string unless you know what you are doing. * The current offset of the PatternMatcherInput is set to the begin * offset of the region. *
* @param input The input to associate with the PatternMatcherInput. * @param begin The offset into the char[] to use as the beginning of * the input. * @param length The length of the reegion starting from the begin offset * to use as the input for pattern matching purposes. */ public void setInput(char[] input, int begin, int length) { _originalStringInput = null; _toLowerBuffer = null; _originalBuffer = _originalCharInput = input; setCurrentOffset(begin); setBeginOffset(begin); setEndOffset(_beginOffset + length); } /** * This method is identical to calling: *
** setInput(input, 0, input.length); *
* @param input The input to associate with the PatternMatcherInput. */ public void setInput(char[] input) { setInput(input, 0, input.length); } /** * Returns the character at a particular offset relative to the begin * offset of the input. *
* @param offset The offset at which to fetch a character (relative to * the beginning offset. * @return The character at a particular offset. * @exception ArrayIndexOutOfBoundsException If the offset does not occur * within the bounds of the input. */ public char charAt(int offset) { return _originalBuffer[_beginOffset + offset]; } /** * Returns a new string that is a substring of the PatternMatcherInput * instance. The substring begins at the specified beginOffset relative * to the begin offset and extends to the specified endOffset - 1 * relative to the begin offset of the PatternMatcherInput instance. *
* @param beginOffset The offset relative to the begin offset of the * PatternMatcherInput at which to start the substring (inclusive). * @param endOffset The offset relative to the begin offset of the * PatternMatcherInput at which to end the substring (exclusive). * @return The specified substring. * @exception ArrayIndexOutOfBoundsException If one of the offsets does * not occur within the bounds of the input. */ public String substring(int beginOffset, int endOffset) { return new String(_originalBuffer, _beginOffset+beginOffset, endOffset - beginOffset); } /** * Returns a new string that is a substring of the PatternMatcherInput * instance. The substring begins at the specified beginOffset relative * to the begin offset and extends to the end offset of the * PatternMatcherInput. *
* @param beginOffset The offset relative to the begin offset of the * PatternMatcherInput at which to start the substring. * @return The specified substring. * @exception ArrayIndexOutOfBoundsException If the offset does not occur * within the bounds of the input. */ public String substring(int beginOffset) { beginOffset+=_beginOffset; return new String(_originalBuffer, beginOffset, _endOffset - beginOffset); } /** * Retrieves the original input used to initialize the PatternMatcherInput * instance. If a String was used, the String instance will be returned. * If a char[] was used, a char instance will be returned. This violates * data encapsulation and hiding principles, but it is a great convenience * for the programmer. *
* @return The String or char[] input used to initialize the * PatternMatcherInput instance. */ public Object getInput(){ if(_originalStringInput == null) return _originalCharInput; return _originalStringInput; } /** * Retrieves the char[] buffer to be used used as input by PatternMatcher * implementations to look for matches. This array should be treated * as read only by the programmer. *
* @return The char[] buffer to be used as input by PatternMatcher * implementations. */ public char[] getBuffer() { return _originalBuffer; } /** * Returns whether or not the end of the input has been reached. *
* @return True if the current offset is greater than or equal to the * end offset. */ public boolean endOfInput(){ return (_currentOffset >= _endOffset); } /** * @return The offset of the input that should be considered the start * of the region to be considered as input by PatternMatcher * methods. */ public int getBeginOffset() { return _beginOffset; } /** * @return The offset of the input that should be considered the end * of the region to be considered as input by PatternMatcher * methods. This offset is actually 1 plus the last offset * that is part of the input region. */ public int getEndOffset() { return _endOffset; } /** * @return The offset of the input that should be considered the current * offset where PatternMatcher methods should start looking for * matches. */ public int getCurrentOffset() { return _currentOffset; } /** * Sets the offset of the input that should be considered the start * of the region to be considered as input by PatternMatcher * methods. In other words, everything before this offset is ignored * by a PatternMatcher. *
* @param offset The offset to use as the beginning of the input. */ public void setBeginOffset(int offset) { _beginOffset = offset; } /** * Sets the offset of the input that should be considered the end * of the region to be considered as input by PatternMatcher * methods. This offset is actually 1 plus the last offset * that is part of the input region. *
* @param offset The offset to use as the end of the input. */ public void setEndOffset(int offset) { _endOffset = offset; } /** * Sets the offset of the input that should be considered the current * offset where PatternMatcher methods should start looking for * matches. Also resets all match offset information to -1. By calling * this method, you invalidate all previous match information. Therefore * a PatternMatcher implementation must call this method before setting * match offset information. *
* @param offset The offset to use as the current offset. */ public void setCurrentOffset(int offset) { _currentOffset = offset; setMatchOffsets(-1, -1); } /** * Returns the string representation of the input, where the input is * considered to start from the begin offset and end at the end offset. *
* @return The string representation of the input. */ public String toString() { return new String(_originalBuffer, _beginOffset, length()); } /** * A convenience method returning the part of the input occurring before * the last match found by a call to a Perl5Matcher * {@link Perl5Matcher#contains contains} method. *
* @return The input preceeding a match. */ public String preMatch() { return new String(_originalBuffer, _beginOffset, _matchBeginOffset - _beginOffset); } /** * A convenience method returning the part of the input occurring after * the last match found by a call to a Perl5Matcher * {@link Perl5Matcher#contains contains} method. *
* @return The input succeeding a contains() match. */ public String postMatch() { return new String(_originalBuffer, _matchEndOffset, _endOffset - _matchEndOffset); } /** * A convenience method returning the part of the input corresponding * to the last match found by a call to a Perl5Matcher * {@link Perl5Matcher#contains contains} method. * The method is not called getMatch() so as not to confuse it * with Perl5Matcher's getMatch() which returns a MatchResult instance * and also for consistency with preMatch() and postMatch(). *
* @return The input consisting of the match found by contains(). */ public String match() { return new String(_originalBuffer, _matchBeginOffset, _matchEndOffset - _matchBeginOffset); } /** * This method is intended for use by PatternMatcher implementations. * It is necessary to record the location of the previous match so that * consecutive contains() matches involving null string matches are * properly handled. If you are not implementing a PatternMatcher, forget * this method exists. If you use it outside of its intended context, you * will only disrupt the stored state. *
* As a note, the preMatch(), postMatch(), and match() methods are provided * as conveniences because PatternMatcherInput must store match offset * information to completely preserve state for consecutive PatternMatcher * contains() matches. *
* @param matchBeginOffset The begin offset of a match found by contains(). * @param matchEndOffset The end offset of a match found by contains(). */ public void setMatchOffsets(int matchBeginOffset, int matchEndOffset) { _matchBeginOffset = matchBeginOffset; _matchEndOffset = matchEndOffset; } /** * Returns the offset marking the beginning of the match found by * contains(). *
* @return The begin offset of a contains() match. */ public int getMatchBeginOffset() { return _matchBeginOffset; } /** * Returns the offset marking the end of the match found by contains(). *
* @return The end offset of a contains() match. */ public int getMatchEndOffset() { return _matchEndOffset; } }