47 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
517 int32_t destCapacity,
561 int32_t destCapacity,
587 UVector64 *fCompiledPat;
592 Regex8BitSet *fSets8;
598 int32_t fMinMatchLen;
610 UVector32 *fGroupMap;
616 Regex8BitSet *fStaticSets8;
620 int32_t fInitialStringIdx;
621 int32_t fInitialStringLen;
624 Regex8BitSet *fInitialChars8;
625 UBool fNeedsAltInput;
627 UHashtable *fNamedCaptureMap;
629 friend class RegexCompile;
631 friend class RegexCImpl;
639 void dumpOp(int32_t index)
const;
642 #ifndef U_HIDE_INTERNAL_API
1574 int32_t destCapacity,
1603 int32_t destCapacity,
1684 const void *context,
1699 const void *&context,
1717 const void *context,
1732 const void *&context,
1735 #ifndef U_HIDE_INTERNAL_API
1769 friend class RegexCImpl;
1771 #ifndef U_HIDE_INTERNAL_API
1782 inline void backTrack(int64_t &inputIdx, int32_t &patIdx);
1783 UBool isWordBoundary(int64_t pos);
1784 UBool isUWordBoundary(int64_t pos);
1785 REStackFrame *resetStack();
1786 inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx,
UErrorCode &status);
1790 inline UBool findProgressInterrupt(int64_t matchIndex,
UErrorCode &status);
1792 int64_t appendGroup(int32_t groupNum,
UText *dest,
UErrorCode &status)
const;
1796 UBool isChunkWordBoundary(int32_t pos);
1804 UText *fAltInputText;
1806 int64_t fInputLength;
1809 int64_t fRegionStart;
1810 int64_t fRegionLimit;
1812 int64_t fAnchorStart;
1813 int64_t fAnchorLimit;
1819 int64_t fActiveStart;
1820 int64_t fActiveLimit;
1824 UBool fTransparentBounds;
1825 UBool fAnchoringBounds;
1828 int64_t fMatchStart;
1832 int64_t fLastMatchEnd;
1834 int64_t fAppendPosition;
1843 REStackFrame *fFrame;
1848 int64_t fSmallData[8];
1854 int32_t fTickCounter;
1859 int32_t fStackLimit;
1864 const void *fCallbackContext;
1868 const void *fFindProgressCallbackContext;
1871 UBool fInputUniStrMaybeMutable;
class RegexMatcher bundles together a regular expression pattern and input text to which the expressi...
virtual UBool matches(int64_t startIndex, UErrorCode &status)
Resets the matcher, then attempts to match the input beginning at the specified startIndex,...
virtual UText * replaceFirst(UText *replacement, UText *dest, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
virtual RegexMatcher & reset()
Resets this matcher.
virtual int32_t groupCount() const
Returns the number of capturing groups in this matcher's pattern.
virtual UBool find()
Find the next pattern match in the input string.
virtual ~RegexMatcher()
Destructor.
virtual UBool hitEnd() const
Return TRUE if the most recent matching operation attempted to access additional input beyond the ava...
virtual UnicodeString & appendTail(UnicodeString &dest)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual RegexMatcher & reset(UText *input)
Resets this matcher with a new input string.
virtual int32_t start(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual UText * group(UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual int32_t end(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual RegexMatcher & appendReplacement(UText *dest, UText *replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
virtual int32_t end(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
virtual const RegexPattern & pattern() const
Returns the pattern that is interpreted by this matcher.
virtual int64_t regionStart64() const
Reports the start index of this matcher's region.
virtual int64_t start64(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual int32_t start(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual UBool requireEnd() const
Return TRUE the most recent match succeeded and additional input could cause it to fail.
virtual RegexMatcher & useAnchoringBounds(UBool b)
Set whether this matcher is using Anchoring Bounds for its region.
virtual UBool matches(UErrorCode &status)
Attempts to match the entire input region against the pattern.
virtual UBool hasAnchoringBounds() const
Return true if this matcher is using anchoring bounds.
RegexMatcher(const UnicodeString ®exp, const UnicodeString &input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual int64_t regionEnd64() const
Reports the end (limit) index (exclusive) of this matcher's region.
virtual RegexMatcher & appendReplacement(UnicodeString &dest, const UnicodeString &replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
virtual void setTimeLimit(int32_t limit, UErrorCode &status)
Set a processing time limit for match operations with this Matcher.
virtual int64_t end64(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual UBool lookingAt(int64_t startIndex, UErrorCode &status)
Attempts to match the input string, starting from the specified index, against the pattern.
RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual int32_t regionEnd() const
Reports the end (limit) index (exclusive) of this matcher's region.
void resetPreserveRegion()
virtual void setFindProgressCallback(URegexFindProgressCallback *callback, const void *context, UErrorCode &status)
Set a progress callback function for use with find operations on this Matcher.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
virtual int64_t start64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual UText * getInput(UText *dest, UErrorCode &status) const
Returns the input string being matched, either by copying it into the provided UText parameter or by ...
virtual UBool hasTransparentBounds() const
Queries the transparency of region bounds for this matcher.
virtual int32_t getStackLimit() const
Get the size of the heap storage available for use by the back tracking stack.
virtual RegexMatcher & region(int64_t start, int64_t limit, UErrorCode &status)
Sets the limits of this matcher's region.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
virtual UText * replaceAll(UText *replacement, UText *dest, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual RegexMatcher & region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status)
Identical to region(start, limit, status) but also allows a start position without resetting the regi...
virtual RegexMatcher & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the regular expression is looking for matches without changing...
void setTrace(UBool state)
setTrace Debug function, enable/disable tracing of the matching engine.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
virtual UText * appendTail(UText *dest, UErrorCode &status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual UBool find(UErrorCode &status)
Find the next pattern match in the input string.
virtual UBool find(int64_t start, UErrorCode &status)
Resets this RegexMatcher and then attempts to find the next substring of the input string that matche...
virtual int32_t getTimeLimit() const
Get the time limit, if any, for match operations made with this Matcher.
virtual int32_t regionStart() const
Reports the start index of this matcher's region.
virtual void getFindProgressCallback(URegexFindProgressCallback *&callback, const void *&context, UErrorCode &status)
Get the find progress callback function for this URegularExpression.
virtual UText * group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual RegexMatcher & reset(const UnicodeString &input)
Resets this matcher with a new input string.
virtual UBool lookingAt(UErrorCode &status)
Attempts to match the input string, starting from the beginning of the region, against the pattern.
virtual UText * inputText() const
Returns the input string being matched.
virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const
Returns a string containing the text captured by the given group during the previous match operation.
virtual const UnicodeString & input() const
Returns the input string being matched.
virtual RegexMatcher & reset(int64_t index, UErrorCode &status)
Resets this matcher, and set the current input position.
virtual RegexMatcher & useTransparentBounds(UBool b)
Sets the transparency of region bounds for this matcher.
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
virtual void setStackLimit(int32_t limit, UErrorCode &status)
Set the amount of heap storage available for use by the match backtracking stack.
virtual int64_t end64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
virtual void getMatchCallback(URegexMatchCallback *&callback, const void *&context, UErrorCode &status)
Get the callback function for this URegularExpression.
virtual void setMatchCallback(URegexMatchCallback *callback, const void *context, UErrorCode &status)
Set a callback function for use with this Matcher.
RegexMatcher(UText *regexp, UText *input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual UnicodeString group(UErrorCode &status) const
Returns a string containing the text matched by the previous match.
virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
Class RegexPattern represents a compiled regular expression.
static UBool matches(const UnicodeString ®ex, const UnicodeString &input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
static RegexPattern * compile(UText *regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified match m...
UBool operator!=(const RegexPattern &that) const
Comparison operator.
virtual RegexMatcher * matcher(const UnicodeString &input, UErrorCode &status) const
Creates a RegexMatcher that will match the given input against this pattern.
virtual UText * patternText(UErrorCode &status) const
Returns the regular expression from which this pattern was compiled.
static RegexPattern * compile(const UnicodeString ®ex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified match m...
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
RegexPattern(const RegexPattern &source)
Copy Constructor.
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
virtual UnicodeString pattern() const
Returns the regular expression from which this pattern was compiled.
void dumpPattern() const
Dump a compiled pattern.
static RegexPattern * compile(UText *regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
static UBool matches(UText *regex, UText *input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
virtual RegexMatcher * matcher(UErrorCode &status) const
Creates a RegexMatcher that will match against this pattern.
virtual ~RegexPattern()
Destructor.
static RegexPattern * compile(const UnicodeString ®ex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified match m...
virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const
Get the group number corresponding to a named capture group.
static RegexPattern * compile(UText *regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified match m...
static RegexPattern * compile(const UnicodeString ®ex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
RegexPattern()
default constructor.
virtual RegexPattern * clone() const
Create an exact copy of this RegexPattern object.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
UBool operator==(const RegexPattern &that) const
Comparison operator.
virtual uint32_t flags() const
Get the match mode flags that were used when compiling this pattern.
virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const
Get the group number corresponding to a named capture group.
A subclass of BreakIterator whose behavior is specified using a list of rules.
UObject is the common ICU "boilerplate" class.
A mutable set of Unicode characters and multicharacter strings.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
C API: Parse Error Information.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
A UParseError struct is used to returned detailed information about parsing errors.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
int8_t UBool
The ICU boolean type.
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
C API: Regular Expressions.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.