15 #ifndef __UCHARSTRIE_H__
16 #define __UCHARSTRIE_H__
65 : ownedArray_(
NULL), uchars_(trieUChars),
66 pos_(uchars_), remainingMatchLength_(-1) {}
81 : ownedArray_(
NULL), uchars_(other.uchars_),
82 pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
91 remainingMatchLength_=-1;
112 int32_t remainingMatchLength;
123 state.uchars=uchars_;
125 state.remainingMatchLength=remainingMatchLength_;
140 if(uchars_==state.uchars && uchars_!=
NULL) {
142 remainingMatchLength_=state.remainingMatchLength;
163 remainingMatchLength_=-1;
164 return nextImpl(uchars_, uchar);
221 const UChar *pos=pos_;
222 int32_t leadUnit=*pos++;
224 return leadUnit&kValueIsFinal ?
225 readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
238 const UChar *pos=pos_;
240 return pos!=
NULL && findUniqueValue(pos+remainingMatchLength_+1,
FALSE, uniqueValue);
331 UBool truncateAndStop() {
339 const UChar *uchars_;
341 const UChar *initialPos_;
342 int32_t remainingMatchLength_;
343 int32_t initialRemainingMatchLength_;
370 : ownedArray_(adoptUChars), uchars_(trieUChars),
371 pos_(uchars_), remainingMatchLength_(-1) {}
382 static inline int32_t readValue(
const UChar *pos, int32_t leadUnit) {
384 if(leadUnit<kMinTwoUnitValueLead) {
386 }
else if(leadUnit<kThreeUnitValueLead) {
387 value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
389 value=(pos[0]<<16)|pos[1];
393 static inline const UChar *skipValue(
const UChar *pos, int32_t leadUnit) {
394 if(leadUnit>=kMinTwoUnitValueLead) {
395 if(leadUnit<kThreeUnitValueLead) {
403 static inline const UChar *skipValue(
const UChar *pos) {
404 int32_t leadUnit=*pos++;
405 return skipValue(pos, leadUnit&0x7fff);
408 static inline int32_t readNodeValue(
const UChar *pos, int32_t leadUnit) {
411 if(leadUnit<kMinTwoUnitNodeValueLead) {
412 value=(leadUnit>>6)-1;
413 }
else if(leadUnit<kThreeUnitNodeValueLead) {
414 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
416 value=(pos[0]<<16)|pos[1];
420 static inline const UChar *skipNodeValue(
const UChar *pos, int32_t leadUnit) {
422 if(leadUnit>=kMinTwoUnitNodeValueLead) {
423 if(leadUnit<kThreeUnitNodeValueLead) {
432 static inline const UChar *jumpByDelta(
const UChar *pos) {
433 int32_t delta=*pos++;
434 if(delta>=kMinTwoUnitDeltaLead) {
435 if(delta==kThreeUnitDeltaLead) {
436 delta=(pos[0]<<16)|pos[1];
439 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
445 static const UChar *skipDelta(
const UChar *pos) {
446 int32_t delta=*pos++;
447 if(delta>=kMinTwoUnitDeltaLead) {
448 if(delta==kThreeUnitDeltaLead) {
470 static const UChar *findUniqueValueFromBranch(
const UChar *pos, int32_t length,
471 UBool haveUniqueValue, int32_t &uniqueValue);
474 static UBool findUniqueValue(
const UChar *pos,
UBool haveUniqueValue, int32_t &uniqueValue);
478 static void getNextBranchUChars(
const UChar *pos, int32_t length,
Appendable &out);
526 static const int32_t kMinLinearMatch=0x30;
527 static const int32_t kMaxLinearMatchLength=0x10;
532 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
533 static const int32_t kNodeTypeMask=kMinValueLead-1;
536 static const int32_t kValueIsFinal=0x8000;
539 static const int32_t kMaxOneUnitValue=0x3fff;
541 static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;
542 static const int32_t kThreeUnitValueLead=0x7fff;
544 static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;
547 static const int32_t kMaxOneUnitNodeValue=0xff;
548 static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);
549 static const int32_t kThreeUnitNodeValueLead=0x7fc0;
551 static const int32_t kMaxTwoUnitNodeValue=
552 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;
555 static const int32_t kMaxOneUnitDelta=0xfbff;
556 static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;
557 static const int32_t kThreeUnitDeltaLead=0xffff;
559 static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;
564 const UChar *uchars_;
571 int32_t remainingMatchLength_;
Base class for objects to which Unicode characters and strings can be appended.
static const int32_t kMaxBranchLinearSubNodeLength
Builder class for UCharsTrie.
Iterator for all of the (string, value) pairs in a UCharsTrie.
Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode)
Iterates from the current state of the specified UCharsTrie.
const UnicodeString & getString() const
UBool next(UErrorCode &errorCode)
Finds the next (string, value) pair if there is one.
Iterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode)
Iterates from the root of a UChar-serialized UCharsTrie.
Iterator & reset()
Resets this iterator to its initial state.
UCharsTrie state object, for saving a trie's current state and resetting the trie back to this state ...
State()
Constructs an empty State.
Light-weight, non-const reader class for a UCharsTrie.
UCharsTrie(const UChar *trieUChars)
Constructs a UCharsTrie reader instance.
UStringTrieResult first(int32_t uchar)
Traverses the trie from the initial state for this input UChar.
UStringTrieResult firstForCodePoint(UChar32 cp)
Traverses the trie from the initial state for the one or two UTF-16 code units for this input code po...
UCharsTrie & reset()
Resets this trie to its initial state.
UBool hasUniqueValue(int32_t &uniqueValue) const
Determines whether all strings reachable from the current state map to the same value.
UCharsTrie & resetToState(const State &state)
Resets this trie to the saved state.
UStringTrieResult current() const
Determines whether the string so far matches, whether it has a value, and whether another input UChar...
UCharsTrie(const UCharsTrie &other)
Copy constructor, copies the other trie reader object and its state, but not the UChar array which wi...
int32_t getValue() const
Returns a matching string's value if called immediately after current()/first()/next() returned USTRI...
UStringTrieResult next(const UChar *s, int32_t length)
Traverses the trie from the current state for this string.
int32_t getNextUChars(Appendable &out) const
Finds each UChar which continues the string from the current state.
const UCharsTrie & saveState(State &state) const
Saves the state of this trie.
UStringTrieResult next(int32_t uchar)
Traverses the trie from the current state for this input UChar.
UStringTrieResult nextForCodePoint(UChar32 cp)
Traverses the trie from the current state for the one or two UTF-16 code units for this input code po...
UMemory is the common ICU base class.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
int8_t UBool
The ICU boolean type.
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
#define TRUE
The TRUE value of a UBool.
#define FALSE
The FALSE value of a UBool.
C++ API: Common ICU base class UObject.
C API: Helper definitions for dictionary trie APIs.
UStringTrieResult
Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
@ USTRINGTRIE_INTERMEDIATE_VALUE
The input unit(s) continued a matching string and there is a value for the string so far.
Basic definitions for ICU, for both C and C++ APIs.
#define NULL
Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.