ICU 57.1  57.1
normlzr.h
Go to the documentation of this file.
1 /*
2  ********************************************************************
3  * COPYRIGHT:
4  * Copyright (c) 1996-2015, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  ********************************************************************
7  */
8 
9 #ifndef NORMLZR_H
10 #define NORMLZR_H
11 
12 #include "unicode/utypes.h"
13 
19 #if !UCONFIG_NO_NORMALIZATION
20 
21 #include "unicode/chariter.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "unicode/uobject.h"
26 
133 public:
134 #ifndef U_HIDE_DEPRECATED_API
140  enum {
141  DONE=0xffff
142  };
143 
144  // Constructors
145 
157 
169  Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
170 
182 
188  Normalizer(const Normalizer& copy);
189 #endif /* U_HIDE_DEPRECATED_API */
190 
195  virtual ~Normalizer();
196 
197 
198  //-------------------------------------------------------------------------
199  // Static utility methods
200  //-------------------------------------------------------------------------
201 
202 #ifndef U_HIDE_DEPRECATED_API
217  static void U_EXPORT2 normalize(const UnicodeString& source,
218  UNormalizationMode mode, int32_t options,
219  UnicodeString& result,
220  UErrorCode &status);
221 
239  static void U_EXPORT2 compose(const UnicodeString& source,
240  UBool compat, int32_t options,
241  UnicodeString& result,
242  UErrorCode &status);
243 
261  static void U_EXPORT2 decompose(const UnicodeString& source,
262  UBool compat, int32_t options,
263  UnicodeString& result,
264  UErrorCode &status);
265 
286  static inline UNormalizationCheckResult
287  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
288 
303  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
304 
325  static inline UBool
326  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
327 
343  static UBool
344  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
345 
375  static UnicodeString &
376  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
377  UnicodeString &result,
378  UNormalizationMode mode, int32_t options,
379  UErrorCode &errorCode);
380 #endif /* U_HIDE_DEPRECATED_API */
381 
446  static inline int32_t
447  compare(const UnicodeString &s1, const UnicodeString &s2,
448  uint32_t options,
449  UErrorCode &errorCode);
450 
451 #ifndef U_HIDE_DEPRECATED_API
452  //-------------------------------------------------------------------------
453  // Iteration API
454  //-------------------------------------------------------------------------
455 
465 
474  UChar32 first(void);
475 
484  UChar32 last(void);
485 
500  UChar32 next(void);
501 
517 
527  void setIndexOnly(int32_t index);
528 
534  void reset(void);
535 
550  int32_t getIndex(void) const;
551 
560  int32_t startIndex(void) const;
561 
572  int32_t endIndex(void) const;
573 
582  UBool operator==(const Normalizer& that) const;
583 
592  inline UBool operator!=(const Normalizer& that) const;
593 
600  Normalizer* clone(void) const;
601 
608  int32_t hashCode(void) const;
609 
610  //-------------------------------------------------------------------------
611  // Property access methods
612  //-------------------------------------------------------------------------
613 
630 
642 
659  void setOption(int32_t option,
660  UBool value);
661 
672  UBool getOption(int32_t option) const;
673 
682  void setText(const UnicodeString& newText,
683  UErrorCode &status);
684 
693  void setText(const CharacterIterator& newText,
694  UErrorCode &status);
695 
705  void setText(const UChar* newText,
706  int32_t length,
707  UErrorCode &status);
714  void getText(UnicodeString& result);
715 
721  static UClassID U_EXPORT2 getStaticClassID();
722 #endif /* U_HIDE_DEPRECATED_API */
723 
729  virtual UClassID getDynamicClassID() const;
730 
731 private:
732  //-------------------------------------------------------------------------
733  // Private functions
734  //-------------------------------------------------------------------------
735 
736  Normalizer(); // default constructor not implemented
737  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
738 
739  // Private utility methods for iteration
740  // For documentation, see the source code
741  UBool nextNormalize();
742  UBool previousNormalize();
743 
744  void init();
745  void clearBuffer(void);
746 
747  //-------------------------------------------------------------------------
748  // Private data
749  //-------------------------------------------------------------------------
750 
751  FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
752  const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
753 #ifndef U_HIDE_DEPRECATED_API
754  UNormalizationMode fUMode;
755 #endif /* U_HIDE_DEPRECATED_API */
756  int32_t fOptions;
757 
758  // The input text and our position in it
759  CharacterIterator *text;
760 
761  // The normalization buffer is the result of normalization
762  // of the source in [currentIndex..nextIndex[ .
763  int32_t currentIndex, nextIndex;
764 
765  // A buffer for holding intermediate results
766  UnicodeString buffer;
767  int32_t bufferPos;
768 };
769 
770 //-------------------------------------------------------------------------
771 // Inline implementations
772 //-------------------------------------------------------------------------
773 
774 #ifndef U_HIDE_DEPRECATED_API
775 inline UBool
777 { return ! operator==(other); }
778 
781  UNormalizationMode mode,
782  UErrorCode &status) {
783  return quickCheck(source, mode, 0, status);
784 }
785 
786 inline UBool
788  UNormalizationMode mode,
789  UErrorCode &status) {
790  return isNormalized(source, mode, 0, status);
791 }
792 #endif /* U_HIDE_DEPRECATED_API */
793 
794 inline int32_t
796  uint32_t options,
797  UErrorCode &errorCode) {
798  // all argument checking is done in unorm_compare
799  return unorm_compare(s1.getBuffer(), s1.length(),
800  s2.getBuffer(), s2.length(),
801  options,
802  &errorCode);
803 }
804 
806 
807 #endif /* #if !UCONFIG_NO_NORMALIZATION */
808 
809 #endif // NORMLZR_H
C++ API: Character Iterator.
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
Normalization filtered by a UnicodeSet.
Definition: normalizer2.h:443
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:78
Old Unicode normalization API.
Definition: normlzr.h:132
Normalizer(const UnicodeString &str, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
void setText(const UChar *newText, int32_t length, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status)
Performing quick check on a string, to quickly determine if the string is in a particular normalizati...
Definition: normlzr.h:780
static UnicodeString & concatenate(const UnicodeString &left, const UnicodeString &right, UnicodeString &result, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Concatenate normalized strings, making sure that the result is normalized as well.
void setOption(int32_t option, UBool value)
Set options that affect this Normalizer's operation.
void reset(void)
Reset the index to the beginning of the text.
void setText(const UnicodeString &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Test if a string is in a given normalization form; same as the other version of isNormalized but take...
Normalizer * clone(void) const
Returns a pointer to a new Normalizer that is a clone of this one.
int32_t getIndex(void) const
Retrieve the current iteration position in the input text that is being normalized.
void setText(const CharacterIterator &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
void setIndexOnly(int32_t index)
Set the iteration position in the input text that is being normalized, without any immediate normaliz...
static void normalize(const UnicodeString &source, UNormalizationMode mode, int32_t options, UnicodeString &result, UErrorCode &status)
Normalizes a UnicodeString according to the specified normalization mode.
UChar32 last(void)
Return the last character in the normalized text.
UBool getOption(int32_t option) const
Determine whether an option is turned on or off.
UChar32 first(void)
Return the first character in the normalized text.
static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode)
Test if a string is in a given normalization form.
Definition: normlzr.h:787
Normalizer(const CharacterIterator &iter, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of the given text.
void getText(UnicodeString &result)
Copies the input text into the UnicodeString argument.
int32_t hashCode(void) const
Generates a hash code for this iterator.
static void compose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Compose a UnicodeString.
virtual ~Normalizer()
Destructor.
void setMode(UNormalizationMode newMode)
Set the normalization mode for this object.
UNormalizationMode getUMode(void) const
Return the normalization mode for this object.
UBool operator!=(const Normalizer &that) const
Returns FALSE when both iterators refer to the same character in the same input text.
Definition: normlzr.h:776
int32_t endIndex(void) const
Retrieve the index of the end of the input text.
int32_t startIndex(void) const
Retrieve the index of the start of the input text.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
Normalizer(const Normalizer &copy)
Copy constructor.
static int32_t compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode)
Compare two strings for canonical equivalence.
Definition: normlzr.h:795
static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status)
Performing quick check on a string; same as the other version of quickCheck but takes an extra option...
Normalizer(const UChar *str, int32_t length, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
static void decompose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Static method to decompose a UnicodeString.
UBool operator==(const Normalizer &that) const
Returns TRUE when both iterators refer to the same character in the same input text.
UChar32 next(void)
Return the next character in the normalized text.
UChar32 current(void)
Return the current character in the normalized text.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UChar32 previous(void)
Return the previous character in the normalized text and decrement.
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3794
UChar * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
C++ API: New API for Unicode Normalization.
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:218
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
C++ API: Unicode String.
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:91
int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
C API: Unicode Normalization.
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:136
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:357
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129