ICU 57.1  57.1
uset.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2002-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uset.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2002mar07
14 * created by: Markus W. Scherer
15 *
16 * C version of UnicodeSet.
17 */
18 
19 
27 #ifndef __USET_H__
28 #define __USET_H__
29 
30 #include "unicode/utypes.h"
31 #include "unicode/uchar.h"
32 #include "unicode/localpointer.h"
33 
34 #ifndef UCNV_H
35 struct USet;
41 typedef struct USet USet;
42 #endif
43 
49 enum {
55 
83 
93 };
94 
150 typedef enum USetSpanCondition {
205 
206 enum {
214 };
215 
221 typedef struct USerializedSet {
226  const uint16_t *array;
231  int32_t bmpLength;
236  int32_t length;
243 
244 /*********************************************************************
245  * USet API
246  *********************************************************************/
247 
255 U_STABLE USet* U_EXPORT2
257 
268 U_STABLE USet* U_EXPORT2
270 
280 U_STABLE USet* U_EXPORT2
281 uset_openPattern(const UChar* pattern, int32_t patternLength,
282  UErrorCode* ec);
283 
295 U_STABLE USet* U_EXPORT2
296 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
297  uint32_t options,
298  UErrorCode* ec);
299 
306 U_STABLE void U_EXPORT2
308 
309 #if U_SHOW_CPLUSPLUS_API
310 
312 
323 
325 
326 #endif
327 
337 U_STABLE USet * U_EXPORT2
338 uset_clone(const USet *set);
339 
349 U_STABLE UBool U_EXPORT2
350 uset_isFrozen(const USet *set);
351 
366 U_STABLE void U_EXPORT2
368 
379 U_STABLE USet * U_EXPORT2
381 
391 U_STABLE void U_EXPORT2
393  UChar32 start, UChar32 end);
394 
416 U_STABLE int32_t U_EXPORT2
418  const UChar *pattern, int32_t patternLength,
419  uint32_t options,
420  UErrorCode *status);
421 
444 U_STABLE void U_EXPORT2
446  UProperty prop, int32_t value, UErrorCode* ec);
447 
483 U_STABLE void U_EXPORT2
485  const UChar *prop, int32_t propLength,
486  const UChar *value, int32_t valueLength,
487  UErrorCode* ec);
488 
498 U_STABLE UBool U_EXPORT2
499 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
500  int32_t pos);
501 
517 U_STABLE int32_t U_EXPORT2
518 uset_toPattern(const USet* set,
519  UChar* result, int32_t resultCapacity,
520  UBool escapeUnprintable,
521  UErrorCode* ec);
522 
531 U_STABLE void U_EXPORT2
533 
546 U_STABLE void U_EXPORT2
547 uset_addAll(USet* set, const USet *additionalSet);
548 
558 U_STABLE void U_EXPORT2
559 uset_addRange(USet* set, UChar32 start, UChar32 end);
560 
570 U_STABLE void U_EXPORT2
571 uset_addString(USet* set, const UChar* str, int32_t strLen);
572 
582 U_STABLE void U_EXPORT2
583 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
584 
593 U_STABLE void U_EXPORT2
595 
605 U_STABLE void U_EXPORT2
607 
617 U_STABLE void U_EXPORT2
618 uset_removeString(USet* set, const UChar* str, int32_t strLen);
619 
631 U_STABLE void U_EXPORT2
632 uset_removeAll(USet* set, const USet* removeSet);
633 
648 U_STABLE void U_EXPORT2
649 uset_retain(USet* set, UChar32 start, UChar32 end);
650 
663 U_STABLE void U_EXPORT2
664 uset_retainAll(USet* set, const USet* retain);
665 
674 U_STABLE void U_EXPORT2
676 
685 U_STABLE void U_EXPORT2
687 
699 U_STABLE void U_EXPORT2
700 uset_complementAll(USet* set, const USet* complement);
701 
709 U_STABLE void U_EXPORT2
711 
738 U_STABLE void U_EXPORT2
739 uset_closeOver(USet* set, int32_t attributes);
740 
747 U_STABLE void U_EXPORT2
749 
757 U_STABLE UBool U_EXPORT2
758 uset_isEmpty(const USet* set);
759 
768 U_STABLE UBool U_EXPORT2
769 uset_contains(const USet* set, UChar32 c);
770 
780 U_STABLE UBool U_EXPORT2
781 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
782 
791 U_STABLE UBool U_EXPORT2
792 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
793 
804 U_STABLE int32_t U_EXPORT2
805 uset_indexOf(const USet* set, UChar32 c);
806 
817 U_STABLE UChar32 U_EXPORT2
818 uset_charAt(const USet* set, int32_t charIndex);
819 
828 U_STABLE int32_t U_EXPORT2
829 uset_size(const USet* set);
830 
839 U_STABLE int32_t U_EXPORT2
841 
860 U_STABLE int32_t U_EXPORT2
861 uset_getItem(const USet* set, int32_t itemIndex,
862  UChar32* start, UChar32* end,
863  UChar* str, int32_t strCapacity,
864  UErrorCode* ec);
865 
874 U_STABLE UBool U_EXPORT2
875 uset_containsAll(const USet* set1, const USet* set2);
876 
887 U_STABLE UBool U_EXPORT2
888 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
889 
898 U_STABLE UBool U_EXPORT2
899 uset_containsNone(const USet* set1, const USet* set2);
900 
909 U_STABLE UBool U_EXPORT2
910 uset_containsSome(const USet* set1, const USet* set2);
911 
931 U_STABLE int32_t U_EXPORT2
932 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
933 
952 U_STABLE int32_t U_EXPORT2
953 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
954 
974 U_STABLE int32_t U_EXPORT2
975 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
976 
995 U_STABLE int32_t U_EXPORT2
996 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
997 
1006 U_STABLE UBool U_EXPORT2
1007 uset_equals(const USet* set1, const USet* set2);
1008 
1009 /*********************************************************************
1010  * Serialized set API
1011  *********************************************************************/
1012 
1062 U_STABLE int32_t U_EXPORT2
1063 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1064 
1073 U_STABLE UBool U_EXPORT2
1074 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1075 
1083 U_STABLE void U_EXPORT2
1085 
1094 U_STABLE UBool U_EXPORT2
1096 
1106 U_STABLE int32_t U_EXPORT2
1108 
1122 U_STABLE UBool U_EXPORT2
1123 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1124  UChar32* pStart, UChar32* pEnd);
1125 
1126 #endif
"Smart pointer" class, closes a USet via uset_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:539
A serialized form of a Unicode set.
Definition: uset.h:221
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:241
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:231
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:226
int32_t length
The total length of the array.
Definition: uset.h:236
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:161
struct USet USet
Definition: ucnv.h:67
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
USet * uset_openEmpty(void)
Create an empty USet object.
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
USet * uset_clone(const USet *set)
Returns a copy of this object.
void uset_clear(USet *set)
Removes all of the elements from this set.
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:150
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition: uset.h:163
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition: uset.h:178
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition: uset.h:203
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition: uset.h:198
void uset_complement(USet *set)
Inverts this set.
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
void uset_freeze(USet *set)
Freeze the set (make it immutable).
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition: uset.h:213
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:54
@ USET_ADD_CASE_MAPPINGS
Enable case insensitive matching.
Definition: uset.h:92
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uset.h:82
void uset_close(USet *set)
Disposes of the storage used by a USet object.
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end.
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129