ICU 57.1  57.1
regex.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 2002-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: regex.h
7 * encoding: US-ASCII
8 * indentation:4
9 *
10 * created on: 2002oct22
11 * created by: Andy Heninger
12 *
13 * ICU Regular Expressions, API for C++
14 */
15 
16 #ifndef REGEX_H
17 #define REGEX_H
18 
19 //#define REGEX_DEBUG
20 
45 #include "unicode/utypes.h"
46 
47 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
48 
49 #include "unicode/uobject.h"
50 #include "unicode/unistr.h"
51 #include "unicode/utext.h"
52 #include "unicode/parseerr.h"
53 
54 #include "unicode/uregex.h"
55 
56 // Forward Declarations
57 
58 struct UHashtable;
59 
61 
62 struct Regex8BitSet;
63 class RegexCImpl;
64 class RegexMatcher;
65 class RegexPattern;
66 struct REStackFrame;
68 class UnicodeSet;
69 class UVector;
70 class UVector32;
71 class UVector64;
72 
73 
85 class U_I18N_API RegexPattern U_FINAL : public UObject {
86 public:
87 
96 
103  RegexPattern(const RegexPattern &source);
104 
110  virtual ~RegexPattern();
111 
120  UBool operator==(const RegexPattern& that) const;
121 
130  inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);}
131 
137  RegexPattern &operator =(const RegexPattern &source);
138 
146  virtual RegexPattern *clone() const;
147 
148 
173  static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
174  UParseError &pe,
175  UErrorCode &status);
176 
203  static RegexPattern * U_EXPORT2 compile( UText *regex,
204  UParseError &pe,
205  UErrorCode &status);
206 
231  static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
232  uint32_t flags,
233  UParseError &pe,
234  UErrorCode &status);
235 
262  static RegexPattern * U_EXPORT2 compile( UText *regex,
263  uint32_t flags,
264  UParseError &pe,
265  UErrorCode &status);
266 
289  static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
290  uint32_t flags,
291  UErrorCode &status);
292 
317  static RegexPattern * U_EXPORT2 compile( UText *regex,
318  uint32_t flags,
319  UErrorCode &status);
320 
326  virtual uint32_t flags() const;
327 
345  virtual RegexMatcher *matcher(const UnicodeString &input,
346  UErrorCode &status) const;
347 
348 private:
361  RegexMatcher *matcher(const UChar *input,
362  UErrorCode &status) const;
363 public:
364 
365 
377  virtual RegexMatcher *matcher(UErrorCode &status) const;
378 
379 
394  static UBool U_EXPORT2 matches(const UnicodeString &regex,
395  const UnicodeString &input,
396  UParseError &pe,
397  UErrorCode &status);
398 
413  static UBool U_EXPORT2 matches(UText *regex,
414  UText *input,
415  UParseError &pe,
416  UErrorCode &status);
417 
426  virtual UnicodeString pattern() const;
427 
428 
439  virtual UText *patternText(UErrorCode &status) const;
440 
441 
455  virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
456 
457 
474  virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
475 
476 
515  virtual int32_t split(const UnicodeString &input,
516  UnicodeString dest[],
517  int32_t destCapacity,
518  UErrorCode &status) const;
519 
520 
559  virtual int32_t split(UText *input,
560  UText *dest[],
561  int32_t destCapacity,
562  UErrorCode &status) const;
563 
564 
570  virtual UClassID getDynamicClassID() const;
571 
577  static UClassID U_EXPORT2 getStaticClassID();
578 
579 private:
580  //
581  // Implementation Data
582  //
583  UText *fPattern; // The original pattern string.
584  UnicodeString *fPatternString; // The original pattern UncodeString if relevant
585  uint32_t fFlags; // The flags used when compiling the pattern.
586  //
587  UVector64 *fCompiledPat; // The compiled pattern p-code.
588  UnicodeString fLiteralText; // Any literal string data from the pattern,
589  // after un-escaping, for use during the match.
590 
591  UVector *fSets; // Any UnicodeSets referenced from the pattern.
592  Regex8BitSet *fSets8; // (and fast sets for latin-1 range.)
593 
594 
595  UErrorCode fDeferredStatus; // status if some prior error has left this
596  // RegexPattern in an unusable state.
597 
598  int32_t fMinMatchLen; // Minimum Match Length. All matches will have length
599  // >= this value. For some patterns, this calculated
600  // value may be less than the true shortest
601  // possible match.
602 
603  int32_t fFrameSize; // Size of a state stack frame in the
604  // execution engine.
605 
606  int32_t fDataSize; // The size of the data needed by the pattern that
607  // does not go on the state stack, but has just
608  // a single copy per matcher.
609 
610  UVector32 *fGroupMap; // Map from capture group number to position of
611  // the group's variables in the matcher stack frame.
612 
613  UnicodeSet **fStaticSets; // Ptr to static (shared) sets for predefined
614  // regex character classes, e.g. Word.
615 
616  Regex8BitSet *fStaticSets8; // Ptr to the static (shared) latin-1 only
617  // sets for predefined regex classes.
618 
619  int32_t fStartType; // Info on how a match must start.
620  int32_t fInitialStringIdx; //
621  int32_t fInitialStringLen;
622  UnicodeSet *fInitialChars;
623  UChar32 fInitialChar;
624  Regex8BitSet *fInitialChars8;
625  UBool fNeedsAltInput;
626 
627  UHashtable *fNamedCaptureMap; // Map from capture group names to numbers.
628 
629  friend class RegexCompile;
630  friend class RegexMatcher;
631  friend class RegexCImpl;
632 
633  //
634  // Implementation Methods
635  //
636  void init(); // Common initialization, for use by constructors.
637  void zap(); // Common cleanup
638 
639  void dumpOp(int32_t index) const;
640 
641  public:
642 #ifndef U_HIDE_INTERNAL_API
647  void dumpPattern() const;
648 #endif /* U_HIDE_INTERNAL_API */
649 };
650 
651 
652 
662 class U_I18N_API RegexMatcher U_FINAL : public UObject {
663 public:
664 
679  RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);
680 
696  RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
697 
719  RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
720  uint32_t flags, UErrorCode &status);
721 
743  RegexMatcher(UText *regexp, UText *input,
744  uint32_t flags, UErrorCode &status);
745 
746 private:
759  RegexMatcher(const UnicodeString &regexp, const UChar *input,
760  uint32_t flags, UErrorCode &status);
761 public:
762 
763 
769  virtual ~RegexMatcher();
770 
771 
778  virtual UBool matches(UErrorCode &status);
779 
780 
791  virtual UBool matches(int64_t startIndex, UErrorCode &status);
792 
793 
807  virtual UBool lookingAt(UErrorCode &status);
808 
809 
823  virtual UBool lookingAt(int64_t startIndex, UErrorCode &status);
824 
825 
838  virtual UBool find();
839 
840 
854  virtual UBool find(UErrorCode &status);
855 
865  virtual UBool find(int64_t start, UErrorCode &status);
866 
867 
877  virtual UnicodeString group(UErrorCode &status) const;
878 
879 
897  virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
898 
904  virtual int32_t groupCount() const;
905 
906 
921  virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
922 
943  virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
944 
952  virtual int32_t start(UErrorCode &status) const;
953 
961  virtual int64_t start64(UErrorCode &status) const;
962 
963 
977  virtual int32_t start(int32_t group, UErrorCode &status) const;
978 
992  virtual int64_t start64(int32_t group, UErrorCode &status) const;
993 
1007  virtual int32_t end(UErrorCode &status) const;
1008 
1022  virtual int64_t end64(UErrorCode &status) const;
1023 
1024 
1042  virtual int32_t end(int32_t group, UErrorCode &status) const;
1043 
1061  virtual int64_t end64(int32_t group, UErrorCode &status) const;
1062 
1071  virtual RegexMatcher &reset();
1072 
1073 
1089  virtual RegexMatcher &reset(int64_t index, UErrorCode &status);
1090 
1091 
1109  virtual RegexMatcher &reset(const UnicodeString &input);
1110 
1111 
1125  virtual RegexMatcher &reset(UText *input);
1126 
1127 
1152  virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status);
1153 
1154 private:
1167  RegexMatcher &reset(const UChar *input);
1168 public:
1169 
1177  virtual const UnicodeString &input() const;
1178 
1187  virtual UText *inputText() const;
1188 
1199  virtual UText *getInput(UText *dest, UErrorCode &status) const;
1200 
1201 
1220  virtual RegexMatcher &region(int64_t start, int64_t limit, UErrorCode &status);
1221 
1233  virtual RegexMatcher &region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
1234 
1243  virtual int32_t regionStart() const;
1244 
1253  virtual int64_t regionStart64() const;
1254 
1255 
1264  virtual int32_t regionEnd() const;
1265 
1274  virtual int64_t regionEnd64() const;
1275 
1284  virtual UBool hasTransparentBounds() const;
1285 
1305 
1306 
1314  virtual UBool hasAnchoringBounds() const;
1315 
1316 
1330 
1331 
1344  virtual UBool hitEnd() const;
1345 
1355  virtual UBool requireEnd() const;
1356 
1357 
1363  virtual const RegexPattern &pattern() const;
1364 
1365 
1382  virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
1383 
1384 
1405  virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
1406 
1407 
1428  virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
1429 
1430 
1455  virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
1456 
1457 
1486  const UnicodeString &replacement, UErrorCode &status);
1487 
1488 
1517  UText *replacement, UErrorCode &status);
1518 
1519 
1531 
1532 
1546  virtual UText *appendTail(UText *dest, UErrorCode &status);
1547 
1548 
1572  virtual int32_t split(const UnicodeString &input,
1573  UnicodeString dest[],
1574  int32_t destCapacity,
1575  UErrorCode &status);
1576 
1577 
1601  virtual int32_t split(UText *input,
1602  UText *dest[],
1603  int32_t destCapacity,
1604  UErrorCode &status);
1605 
1627  virtual void setTimeLimit(int32_t limit, UErrorCode &status);
1628 
1635  virtual int32_t getTimeLimit() const;
1636 
1658  virtual void setStackLimit(int32_t limit, UErrorCode &status);
1659 
1667  virtual int32_t getStackLimit() const;
1668 
1669 
1683  virtual void setMatchCallback(URegexMatchCallback *callback,
1684  const void *context,
1685  UErrorCode &status);
1686 
1687 
1698  virtual void getMatchCallback(URegexMatchCallback *&callback,
1699  const void *&context,
1700  UErrorCode &status);
1701 
1702 
1717  const void *context,
1718  UErrorCode &status);
1719 
1720 
1732  const void *&context,
1733  UErrorCode &status);
1734 
1735 #ifndef U_HIDE_INTERNAL_API
1741  void setTrace(UBool state);
1742 #endif /* U_HIDE_INTERNAL_API */
1743 
1749  static UClassID U_EXPORT2 getStaticClassID();
1750 
1756  virtual UClassID getDynamicClassID() const;
1757 
1758 private:
1759  // Constructors and other object boilerplate are private.
1760  // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
1761  RegexMatcher(); // default constructor not implemented
1762  RegexMatcher(const RegexPattern *pat);
1763  RegexMatcher(const RegexMatcher &other);
1764  RegexMatcher &operator =(const RegexMatcher &rhs);
1765  void init(UErrorCode &status); // Common initialization
1766  void init2(UText *t, UErrorCode &e); // Common initialization, part 2.
1767 
1768  friend class RegexPattern;
1769  friend class RegexCImpl;
1770 public:
1771 #ifndef U_HIDE_INTERNAL_API
1773  void resetPreserveRegion(); // Reset matcher state, but preserve any region.
1774 #endif /* U_HIDE_INTERNAL_API */
1775 private:
1776 
1777  //
1778  // MatchAt This is the internal interface to the match engine itself.
1779  // Match status comes back in matcher member variables.
1780  //
1781  void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status);
1782  inline void backTrack(int64_t &inputIdx, int32_t &patIdx);
1783  UBool isWordBoundary(int64_t pos); // perform Perl-like \b test
1784  UBool isUWordBoundary(int64_t pos); // perform RBBI based \b test
1785  REStackFrame *resetStack();
1786  inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
1787  void IncrementTime(UErrorCode &status);
1788 
1789  // Call user find callback function, if set. Return TRUE if operation should be interrupted.
1790  inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
1791 
1792  int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
1793 
1794  UBool findUsingChunk(UErrorCode &status);
1795  void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
1796  UBool isChunkWordBoundary(int32_t pos);
1797 
1798  const RegexPattern *fPattern;
1799  RegexPattern *fPatternOwned; // Non-NULL if this matcher owns the pattern, and
1800  // should delete it when through.
1801 
1802  const UnicodeString *fInput; // The string being matched. Only used for input()
1803  UText *fInputText; // The text being matched. Is never NULL.
1804  UText *fAltInputText; // A shallow copy of the text being matched.
1805  // Only created if the pattern contains backreferences.
1806  int64_t fInputLength; // Full length of the input text.
1807  int32_t fFrameSize; // The size of a frame in the backtrack stack.
1808 
1809  int64_t fRegionStart; // Start of the input region, default = 0.
1810  int64_t fRegionLimit; // End of input region, default to input.length.
1811 
1812  int64_t fAnchorStart; // Region bounds for anchoring operations (^ or $).
1813  int64_t fAnchorLimit; // See useAnchoringBounds
1814 
1815  int64_t fLookStart; // Region bounds for look-ahead/behind and
1816  int64_t fLookLimit; // and other boundary tests. See
1817  // useTransparentBounds
1818 
1819  int64_t fActiveStart; // Currently active bounds for matching.
1820  int64_t fActiveLimit; // Usually is the same as region, but
1821  // is changed to fLookStart/Limit when
1822  // entering look around regions.
1823 
1824  UBool fTransparentBounds; // True if using transparent bounds.
1825  UBool fAnchoringBounds; // True if using anchoring bounds.
1826 
1827  UBool fMatch; // True if the last attempted match was successful.
1828  int64_t fMatchStart; // Position of the start of the most recent match
1829  int64_t fMatchEnd; // First position after the end of the most recent match
1830  // Zero if no previous match, even when a region
1831  // is active.
1832  int64_t fLastMatchEnd; // First position after the end of the previous match,
1833  // or -1 if there was no previous match.
1834  int64_t fAppendPosition; // First position after the end of the previous
1835  // appendReplacement(). As described by the
1836  // JavaDoc for Java Matcher, where it is called
1837  // "append position"
1838  UBool fHitEnd; // True if the last match touched the end of input.
1839  UBool fRequireEnd; // True if the last match required end-of-input
1840  // (matched $ or Z)
1841 
1842  UVector64 *fStack;
1843  REStackFrame *fFrame; // After finding a match, the last active stack frame,
1844  // which will contain the capture group results.
1845  // NOT valid while match engine is running.
1846 
1847  int64_t *fData; // Data area for use by the compiled pattern.
1848  int64_t fSmallData[8]; // Use this for data if it's enough.
1849 
1850  int32_t fTimeLimit; // Max time (in arbitrary steps) to let the
1851  // match engine run. Zero for unlimited.
1852 
1853  int32_t fTime; // Match time, accumulates while matching.
1854  int32_t fTickCounter; // Low bits counter for time. Counts down StateSaves.
1855  // Kept separately from fTime to keep as much
1856  // code as possible out of the inline
1857  // StateSave function.
1858 
1859  int32_t fStackLimit; // Maximum memory size to use for the backtrack
1860  // stack, in bytes. Zero for unlimited.
1861 
1862  URegexMatchCallback *fCallbackFn; // Pointer to match progress callback funct.
1863  // NULL if there is no callback.
1864  const void *fCallbackContext; // User Context ptr for callback function.
1865 
1866  URegexFindProgressCallback *fFindProgressCallbackFn; // Pointer to match progress callback funct.
1867  // NULL if there is no callback.
1868  const void *fFindProgressCallbackContext; // User Context ptr for callback function.
1869 
1870 
1871  UBool fInputUniStrMaybeMutable; // Set when fInputText wraps a UnicodeString that may be mutable - compatibility.
1872 
1873  UBool fTraceDebug; // Set true for debug tracing of match engine.
1874 
1875  UErrorCode fDeferredStatus; // Save error state that cannot be immediately
1876  // reported, or that permanently disables this matcher.
1877 
1878  RuleBasedBreakIterator *fWordBreakItr;
1879 };
1880 
1882 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
1883 #endif
class RegexMatcher bundles together a regular expression pattern and input text to which the expressi...
Definition: regex.h:662
virtual UBool matches(int64_t startIndex, UErrorCode &status)
Resets the matcher, then attempts to match the input beginning at the specified startIndex,...
virtual UText * replaceFirst(UText *replacement, UText *dest, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
virtual RegexMatcher & reset()
Resets this matcher.
virtual int32_t groupCount() const
Returns the number of capturing groups in this matcher's pattern.
virtual UBool find()
Find the next pattern match in the input string.
virtual ~RegexMatcher()
Destructor.
virtual UBool hitEnd() const
Return TRUE if the most recent matching operation attempted to access additional input beyond the ava...
virtual UnicodeString & appendTail(UnicodeString &dest)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual RegexMatcher & reset(UText *input)
Resets this matcher with a new input string.
virtual int32_t start(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual UText * group(UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual int32_t end(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual RegexMatcher & appendReplacement(UText *dest, UText *replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
virtual int32_t end(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
virtual const RegexPattern & pattern() const
Returns the pattern that is interpreted by this matcher.
virtual int64_t regionStart64() const
Reports the start index of this matcher's region.
virtual int64_t start64(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual int32_t start(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual UBool requireEnd() const
Return TRUE the most recent match succeeded and additional input could cause it to fail.
virtual RegexMatcher & useAnchoringBounds(UBool b)
Set whether this matcher is using Anchoring Bounds for its region.
virtual UBool matches(UErrorCode &status)
Attempts to match the entire input region against the pattern.
virtual UBool hasAnchoringBounds() const
Return true if this matcher is using anchoring bounds.
RegexMatcher(const UnicodeString &regexp, const UnicodeString &input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual int64_t regionEnd64() const
Reports the end (limit) index (exclusive) of this matcher's region.
virtual RegexMatcher & appendReplacement(UnicodeString &dest, const UnicodeString &replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
virtual void setTimeLimit(int32_t limit, UErrorCode &status)
Set a processing time limit for match operations with this Matcher.
virtual int64_t end64(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual UBool lookingAt(int64_t startIndex, UErrorCode &status)
Attempts to match the input string, starting from the specified index, against the pattern.
RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual int32_t regionEnd() const
Reports the end (limit) index (exclusive) of this matcher's region.
void resetPreserveRegion()
virtual void setFindProgressCallback(URegexFindProgressCallback *callback, const void *context, UErrorCode &status)
Set a progress callback function for use with find operations on this Matcher.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
virtual int64_t start64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual UText * getInput(UText *dest, UErrorCode &status) const
Returns the input string being matched, either by copying it into the provided UText parameter or by ...
virtual UBool hasTransparentBounds() const
Queries the transparency of region bounds for this matcher.
virtual int32_t getStackLimit() const
Get the size of the heap storage available for use by the back tracking stack.
virtual RegexMatcher & region(int64_t start, int64_t limit, UErrorCode &status)
Sets the limits of this matcher's region.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
virtual UText * replaceAll(UText *replacement, UText *dest, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual RegexMatcher & region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status)
Identical to region(start, limit, status) but also allows a start position without resetting the regi...
virtual RegexMatcher & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the regular expression is looking for matches without changing...
void setTrace(UBool state)
setTrace Debug function, enable/disable tracing of the matching engine.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
virtual UText * appendTail(UText *dest, UErrorCode &status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual UBool find(UErrorCode &status)
Find the next pattern match in the input string.
virtual UBool find(int64_t start, UErrorCode &status)
Resets this RegexMatcher and then attempts to find the next substring of the input string that matche...
virtual int32_t getTimeLimit() const
Get the time limit, if any, for match operations made with this Matcher.
virtual int32_t regionStart() const
Reports the start index of this matcher's region.
virtual void getFindProgressCallback(URegexFindProgressCallback *&callback, const void *&context, UErrorCode &status)
Get the find progress callback function for this URegularExpression.
virtual UText * group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual RegexMatcher & reset(const UnicodeString &input)
Resets this matcher with a new input string.
virtual UBool lookingAt(UErrorCode &status)
Attempts to match the input string, starting from the beginning of the region, against the pattern.
virtual UText * inputText() const
Returns the input string being matched.
virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const
Returns a string containing the text captured by the given group during the previous match operation.
virtual const UnicodeString & input() const
Returns the input string being matched.
virtual RegexMatcher & reset(int64_t index, UErrorCode &status)
Resets this matcher, and set the current input position.
virtual RegexMatcher & useTransparentBounds(UBool b)
Sets the transparency of region bounds for this matcher.
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
virtual void setStackLimit(int32_t limit, UErrorCode &status)
Set the amount of heap storage available for use by the match backtracking stack.
virtual int64_t end64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
virtual void getMatchCallback(URegexMatchCallback *&callback, const void *&context, UErrorCode &status)
Get the callback function for this URegularExpression.
virtual void setMatchCallback(URegexMatchCallback *callback, const void *context, UErrorCode &status)
Set a callback function for use with this Matcher.
RegexMatcher(UText *regexp, UText *input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual UnicodeString group(UErrorCode &status) const
Returns a string containing the text matched by the previous match.
virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
Class RegexPattern represents a compiled regular expression.
Definition: regex.h:85
static UBool matches(const UnicodeString &regex, const UnicodeString &input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
static RegexPattern * compile(UText *regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified match m...
UBool operator!=(const RegexPattern &that) const
Comparison operator.
Definition: regex.h:130
virtual RegexMatcher * matcher(const UnicodeString &input, UErrorCode &status) const
Creates a RegexMatcher that will match the given input against this pattern.
virtual UText * patternText(UErrorCode &status) const
Returns the regular expression from which this pattern was compiled.
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified match m...
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
RegexPattern(const RegexPattern &source)
Copy Constructor.
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
virtual UnicodeString pattern() const
Returns the regular expression from which this pattern was compiled.
void dumpPattern() const
Dump a compiled pattern.
static RegexPattern * compile(UText *regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
static UBool matches(UText *regex, UText *input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
virtual RegexMatcher * matcher(UErrorCode &status) const
Creates a RegexMatcher that will match against this pattern.
virtual ~RegexPattern()
Destructor.
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified match m...
virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const
Get the group number corresponding to a named capture group.
static RegexPattern * compile(UText *regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified match m...
static RegexPattern * compile(const UnicodeString &regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
RegexPattern()
default constructor.
virtual RegexPattern * clone() const
Create an exact copy of this RegexPattern object.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
UBool operator==(const RegexPattern &that) const
Comparison operator.
virtual uint32_t flags() const
Get the match mode flags that were used when compiling this pattern.
virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const
Get the group number corresponding to a named capture group.
A subclass of BreakIterator whose behavior is specified using a list of rules.
Definition: rbbi.h:65
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:276
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
C API: Parse Error Information.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
UText struct.
Definition: utext.h:1343
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
C API: Regular Expressions.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1571
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1497
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:358
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129