ICU 57.1  57.1
brkiter.h
Go to the documentation of this file.
1 /*
2 ********************************************************************************
3 * Copyright (C) 1997-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ********************************************************************************
6 *
7 * File brkiter.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/18/97 aliu Added typedef for TextCount. Made DONE const.
13 * 05/07/97 aliu Fixed DLL declaration.
14 * 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
15 * 08/11/98 helena Sync-up JDK1.2.
16 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
17 ********************************************************************************
18 */
19 
20 #ifndef BRKITER_H
21 #define BRKITER_H
22 
23 #include "unicode/utypes.h"
24 
30 #if UCONFIG_NO_BREAK_ITERATION
31 
33 
34 /*
35  * Allow the declaration of APIs with pointers to BreakIterator
36  * even when break iteration is removed from the build.
37  */
38 class BreakIterator;
39 
41 
42 #else
43 
44 #include "unicode/uobject.h"
45 #include "unicode/unistr.h"
46 #include "unicode/chariter.h"
47 #include "unicode/locid.h"
48 #include "unicode/ubrk.h"
49 #include "unicode/strenum.h"
50 #include "unicode/utext.h"
51 #include "unicode/umisc.h"
52 
54 
101 public:
106  virtual ~BreakIterator();
107 
121  virtual UBool operator==(const BreakIterator&) const = 0;
122 
129  UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
130 
136  virtual BreakIterator* clone(void) const = 0;
137 
143  virtual UClassID getDynamicClassID(void) const = 0;
144 
149  virtual CharacterIterator& getText(void) const = 0;
150 
151 
166  virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
167 
174  virtual void setText(const UnicodeString &text) = 0;
175 
194  virtual void setText(UText *text, UErrorCode &status) = 0;
195 
204  virtual void adoptText(CharacterIterator* it) = 0;
205 
206  enum {
212  DONE = (int32_t)-1
213  };
214 
220  virtual int32_t first(void) = 0;
221 
227  virtual int32_t last(void) = 0;
228 
235  virtual int32_t previous(void) = 0;
236 
243  virtual int32_t next(void) = 0;
244 
250  virtual int32_t current(void) const = 0;
251 
260  virtual int32_t following(int32_t offset) = 0;
261 
270  virtual int32_t preceding(int32_t offset) = 0;
271 
280  virtual UBool isBoundary(int32_t offset) = 0;
281 
291  virtual int32_t next(int32_t n) = 0;
292 
307  virtual int32_t getRuleStatus() const;
308 
337  virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
338 
358  static BreakIterator* U_EXPORT2
359  createWordInstance(const Locale& where, UErrorCode& status);
360 
382  static BreakIterator* U_EXPORT2
383  createLineInstance(const Locale& where, UErrorCode& status);
384 
404  static BreakIterator* U_EXPORT2
405  createCharacterInstance(const Locale& where, UErrorCode& status);
406 
425  static BreakIterator* U_EXPORT2
426  createSentenceInstance(const Locale& where, UErrorCode& status);
427 
450  static BreakIterator* U_EXPORT2
451  createTitleInstance(const Locale& where, UErrorCode& status);
452 
462  static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
463 
473  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
474  const Locale& displayLocale,
475  UnicodeString& name);
476 
485  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
486  UnicodeString& name);
487 
507  virtual BreakIterator * createBufferClone(void *stackBuffer,
508  int32_t &BufferSize,
509  UErrorCode &status) = 0;
510 
511 #ifndef U_HIDE_DEPRECATED_API
512 
519  inline UBool isBufferClone(void);
520 
521 #endif /* U_HIDE_DEPRECATED_API */
522 
523 #if !UCONFIG_NO_SERVICE
539  static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
540  const Locale& locale,
541  UBreakIteratorType kind,
542  UErrorCode& status);
543 
556  static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
557 
564  static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
565 #endif
566 
573 
574 #ifndef U_HIDE_INTERNAL_API
581  const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
582 #endif /* U_HIDE_INTERNAL_API */
583 
609  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
610 
611  private:
612  static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
613  static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
614  static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
615 
616  friend class ICUBreakIteratorFactory;
617  friend class ICUBreakIteratorService;
618 
619 protected:
620  // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
621  // or else the compiler will create a public ones.
625  BreakIterator (const BreakIterator &other) : UObject(other) {}
626 #ifndef U_HIDE_INTERNAL_API
628  BreakIterator (const Locale& valid, const Locale& actual);
629 #endif /* U_HIDE_INTERNAL_API */
630 
631 private:
632 
634  char actualLocale[ULOC_FULLNAME_CAPACITY];
635  char validLocale[ULOC_FULLNAME_CAPACITY];
636 
641  BreakIterator& operator=(const BreakIterator&);
642 };
643 
644 #ifndef U_HIDE_DEPRECATED_API
645 
647 {
648  return FALSE;
649 }
650 
651 #endif /* U_HIDE_DEPRECATED_API */
652 
654 
655 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
656 
657 #endif // _BRKITER
658 //eof
C++ API: Character Iterator.
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:100
static UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the langauge of the default locale.
const char * getLocaleID(ULocDataLocaleType type, UErrorCode &status) const
Get the locale for this break iterator object.
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
static UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired langauge.
static URegistryKey registerInstance(BreakIterator *toAdopt, const Locale &locale, UBreakIteratorType kind, UErrorCode &status)
Register a new break iterator of the indicated kind, to use in the given locale.
BreakIterator(const Locale &valid, const Locale &actual)
virtual int32_t current(void) const =0
Return character index of the current interator position within the text.
virtual ~BreakIterator()
destructor
static StringEnumeration * getAvailableLocales(void)
Return a StringEnumeration over the locales available at the time of the call, including registered l...
static UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered BreakIterator using the key returned from the register call.
static const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which TextBoundaries are installed.
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the m...
virtual int32_t next(void)=0
Advance the iterator to the boundary following the current boundary.
Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Returns the locale for this break iterator.
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
virtual void setText(UText *text, UErrorCode &status)=0
Reset the break iterator to operate over the text represented by the UText.
static BreakIterator * createLineInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for line-breaks using specified locale.
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
virtual UBool isBoundary(int32_t offset)=0
Return true if the specfied position is a boundary position.
virtual BreakIterator * clone(void) const =0
Return a polymorphic copy of this object.
static BreakIterator * createSentenceInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for sentence-breaks using specified locale Returns an instance of a BreakIterato...
virtual CharacterIterator & getText(void) const =0
Return a CharacterIterator over the text being analyzed.
virtual int32_t last(void)=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
BreakIterator(const BreakIterator &other)
Definition: brkiter.h:625
virtual int32_t next(int32_t n)=0
Set the iterator position to the nth boundary from the current boundary.
static BreakIterator * createTitleInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for title-casing breaks using the specified locale Returns an instance of a Brea...
virtual int32_t previous(void)=0
Set the iterator position to the boundary preceding the current boundary.
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
virtual int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the most recen...
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition: brkiter.h:129
virtual UBool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
virtual int32_t first(void)=0
Sets the current iteration position to the beginning of the text, position zero.
static BreakIterator * createCharacterInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for character-breaks using specified locale Returns an instance of a BreakIterat...
virtual UClassID getDynamicClassID(void) const =0
Return a polymorphic class ID for this object.
static BreakIterator * createWordInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for word-breaks using the given locale.
UBool isBufferClone(void)
Determine whether the BreakIterator was created in user memory by createBufferClone(),...
Definition: brkiter.h:646
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:185
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:55
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
C++ API: Locale ID object.
C++ API: String Enumeration.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
UText struct.
Definition: utext.h:1343
C API: BreakIterator.
UBreakIteratorType
The possible types of text boundaries.
Definition: ubrk.h:97
#define ULOC_FULLNAME_CAPACITY
Useful constant for the maximum size of the whole locale ID (including the terminating NULL and all k...
Definition: uloc.h:262
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition: uloc.h:336
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
#define FALSE
The FALSE value of a UBool.
Definition: umachine.h:242
C API:misc definitions.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition: umisc.h:55
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:357
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129