ICU 57.1  57.1
messagepattern.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 * Copyright (C) 2011-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: messagepattern.h
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2011mar14
12 * created by: Markus W. Scherer
13 */
14 
15 #ifndef __MESSAGEPATTERN_H__
16 #define __MESSAGEPATTERN_H__
17 
23 #include "unicode/utypes.h"
24 
25 #if !UCONFIG_NO_FORMATTING
26 
27 #include "unicode/parseerr.h"
28 #include "unicode/unistr.h"
29 
88 };
93 
203 };
208 
256 };
261 
268 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
269  ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
270 
271 enum {
278 
287 };
288 
295 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
296 
298 
299 class MessagePatternDoubleList;
300 class MessagePatternPartsList;
301 
359 public:
369 
380 
399  MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
400 
407 
415 
420  virtual ~MessagePattern();
421 
440  UParseError *parseError, UErrorCode &errorCode);
441 
460  UParseError *parseError, UErrorCode &errorCode);
461 
480  UParseError *parseError, UErrorCode &errorCode);
481 
500  UParseError *parseError, UErrorCode &errorCode);
501 
507  void clear();
508 
516  clear();
517  aposMode=mode;
518  }
519 
525  UBool operator==(const MessagePattern &other) const;
526 
532  inline UBool operator!=(const MessagePattern &other) const {
533  return !operator==(other);
534  }
535 
540  int32_t hashCode() const;
541 
547  return aposMode;
548  }
549 
550  // Java has package-private jdkAposMode() here.
551  // In C++, this is declared in the MessageImpl class.
552 
558  return msg;
559  }
560 
567  return hasArgNames;
568  }
569 
576  return hasArgNumbers;
577  }
578 
590  static int32_t validateArgumentName(const UnicodeString &name);
591 
603 
604  class Part;
605 
612  int32_t countParts() const {
613  return partsLength;
614  }
615 
622  const Part &getPart(int32_t i) const {
623  return parts[i];
624  }
625 
634  return getPart(i).type;
635  }
636 
644  int32_t getPatternIndex(int32_t partIndex) const {
645  return getPart(partIndex).index;
646  }
647 
655  UnicodeString getSubstring(const Part &part) const {
656  return msg.tempSubString(part.index, part.length);
657  }
658 
666  UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
667  return 0==msg.compare(part.index, part.length, s);
668  }
669 
676  double getNumericValue(const Part &part) const;
677 
684  double getPluralOffset(int32_t pluralStart) const;
685 
694  int32_t getLimitPartIndex(int32_t start) const {
695  int32_t limit=getPart(start).limitPartIndex;
696  if(limit<start) {
697  return start;
698  }
699  return limit;
700  }
701 
709  class Part : public UMemory {
710  public:
715  Part() {}
716 
723  return type;
724  }
725 
731  int32_t getIndex() const {
732  return index;
733  }
734 
741  int32_t getLength() const {
742  return length;
743  }
744 
751  int32_t getLimit() const {
752  return index+length;
753  }
754 
761  int32_t getValue() const {
762  return value;
763  }
764 
772  UMessagePatternPartType type=getType();
774  return (UMessagePatternArgType)value;
775  } else {
776  return UMSGPAT_ARG_TYPE_NONE;
777  }
778  }
779 
789  }
790 
796  UBool operator==(const Part &other) const;
797 
803  inline UBool operator!=(const Part &other) const {
804  return !operator==(other);
805  }
806 
811  int32_t hashCode() const {
812  return ((type*37+index)*37+length)*37+value;
813  }
814 
815  private:
816  friend class MessagePattern;
817 
818  static const int32_t MAX_LENGTH=0xffff;
819  static const int32_t MAX_VALUE=0x7fff;
820 
821  // Some fields are not final because they are modified during pattern parsing.
822  // After pattern parsing, the parts are effectively immutable.
824  int32_t index;
825  uint16_t length;
826  int16_t value;
827  int32_t limitPartIndex;
828  };
829 
830 private:
831  void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
832 
833  void postParse();
834 
835  int32_t parseMessage(int32_t index, int32_t msgStartLength,
836  int32_t nestingLevel, UMessagePatternArgType parentType,
837  UParseError *parseError, UErrorCode &errorCode);
838 
839  int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
840  UParseError *parseError, UErrorCode &errorCode);
841 
842  int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
843 
844  int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
845  UParseError *parseError, UErrorCode &errorCode);
846 
847  int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
848  UParseError *parseError, UErrorCode &errorCode);
849 
858  static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
859 
860  int32_t parseArgNumber(int32_t start, int32_t limit) {
861  return parseArgNumber(msg, start, limit);
862  }
863 
872  void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
873  UParseError *parseError, UErrorCode &errorCode);
874 
875  // Java has package-private appendReducedApostrophes() here.
876  // In C++, this is declared in the MessageImpl class.
877 
878  int32_t skipWhiteSpace(int32_t index);
879 
880  int32_t skipIdentifier(int32_t index);
881 
886  int32_t skipDouble(int32_t index);
887 
888  static UBool isArgTypeChar(UChar32 c);
889 
890  UBool isChoice(int32_t index);
891 
892  UBool isPlural(int32_t index);
893 
894  UBool isSelect(int32_t index);
895 
896  UBool isOrdinal(int32_t index);
897 
902  UBool inMessageFormatPattern(int32_t nestingLevel);
903 
908  UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
909 
910  void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
911  int32_t value, UErrorCode &errorCode);
912 
913  void addLimitPart(int32_t start,
914  UMessagePatternPartType type, int32_t index, int32_t length,
915  int32_t value, UErrorCode &errorCode);
916 
917  void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
918 
919  void setParseError(UParseError *parseError, int32_t index);
920 
921  UBool init(UErrorCode &errorCode);
922  UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
923 
925  UnicodeString msg;
926  // ArrayList<Part> parts=new ArrayList<Part>();
927  MessagePatternPartsList *partsList;
928  Part *parts;
929  int32_t partsLength;
930  // ArrayList<Double> numericValues;
931  MessagePatternDoubleList *numericValuesList;
932  double *numericValues;
933  int32_t numericValuesLength;
934  UBool hasArgNames;
935  UBool hasArgNumbers;
936  UBool needsAutoQuoting;
937 };
938 
940 
941 #endif // !UCONFIG_NO_FORMATTING
942 
943 #endif // __MESSAGEPATTERN_H__
A message pattern "part", representing a pattern parsing event.
UBool operator==(const Part &other) const
int32_t getLimit() const
Returns the pattern string limit (exclusive-end) index associated with this Part.
int32_t hashCode() const
int32_t getValue() const
Returns a value associated with this part.
Part()
Default constructor, do not use.
int32_t getIndex() const
Returns the pattern string index associated with this Part.
UMessagePatternArgType getArgType() const
Returns the argument type if this part is of type ARG_START or ARG_LIMIT, otherwise UMSGPAT_ARG_TYPE_...
int32_t getLength() const
Returns the length of the pattern substring associated with this Part.
UMessagePatternPartType getType() const
Returns the type of this part.
static UBool hasNumericValue(UMessagePatternPartType type)
Indicates whether the Part type has a numeric value.
UBool operator!=(const Part &other) const
Parses and represents ICU MessageFormat patterns.
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)
Clears this MessagePattern and sets the UMessagePatternApostropheMode.
MessagePattern(UErrorCode &errorCode)
Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
virtual ~MessagePattern()
Destructor.
MessagePattern & parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a MessageFormat pattern string.
MessagePattern & parsePluralStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a PluralFormat pattern string.
UnicodeString autoQuoteApostropheDeep() const
Returns a version of the parsed pattern string where each ASCII apostrophe is doubled (escaped) if it...
MessagePattern & operator=(const MessagePattern &other)
Assignment operator.
UMessagePatternPartType getPartType(int32_t i) const
Returns the UMessagePatternPartType of the i-th pattern "part".
MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Constructs a MessagePattern with default UMessagePatternApostropheMode and parses the MessageFormat p...
UBool hasNumberedArguments() const
Does the parsed pattern have numbered arguments like {2}?
MessagePattern(const MessagePattern &other)
Copy constructor.
UBool hasNamedArguments() const
Does the parsed pattern have named arguments like {first_name}?
int32_t countParts() const
Returns the number of "parts" created by parsing the pattern string.
int32_t getPatternIndex(int32_t partIndex) const
Returns the pattern index of the specified pattern "part".
const Part & getPart(int32_t i) const
Gets the i-th pattern "part".
static int32_t validateArgumentName(const UnicodeString &name)
Validates and parses an argument name or argument number string.
int32_t hashCode() const
MessagePattern & parseChoiceStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a ChoiceFormat pattern string.
double getNumericValue(const Part &part) const
Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
void clear()
Clears this MessagePattern.
MessagePattern & parseSelectStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a SelectFormat pattern string.
const UnicodeString & getPatternString() const
double getPluralOffset(int32_t pluralStart) const
Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
UBool operator==(const MessagePattern &other) const
UnicodeString getSubstring(const Part &part) const
Returns the substring of the pattern string indicated by the Part.
UBool operator!=(const MessagePattern &other) const
int32_t getLimitPartIndex(int32_t start) const
Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
Constructs an empty MessagePattern.
UMessagePatternApostropheMode getApostropheMode() const
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const
Compares the part's substring with the input string s.
UMemory is the common ICU base class.
Definition: uobject.h:110
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
UMessagePatternPartType
MessagePattern::Part type constants.
@ UMSGPAT_PART_TYPE_INSERT_CHAR
Indicates that a syntax character needs to be inserted for auto-quoting.
@ UMSGPAT_PART_TYPE_ARG_TYPE
The argument type.
@ UMSGPAT_PART_TYPE_MSG_START
Start of a message pattern (main or nested).
@ UMSGPAT_PART_TYPE_ARG_SELECTOR
A selector substring in a "complex" argument style.
@ UMSGPAT_PART_TYPE_ARG_LIMIT
End of an argument.
@ UMSGPAT_PART_TYPE_ARG_NUMBER
The argument number, provided by the value.
@ UMSGPAT_PART_TYPE_MSG_LIMIT
End of a message pattern (main or nested).
@ UMSGPAT_PART_TYPE_ARG_DOUBLE
A numeric value, for example the offset or an explicit selector value in a PluralFormat style.
@ UMSGPAT_PART_TYPE_ARG_NAME
The argument name.
@ UMSGPAT_PART_TYPE_ARG_START
Start of an argument.
@ UMSGPAT_PART_TYPE_REPLACE_NUMBER
Indicates a syntactic (non-escaped) # symbol in a plural variant.
@ UMSGPAT_PART_TYPE_ARG_STYLE
The argument style text.
@ UMSGPAT_PART_TYPE_ARG_INT
An integer value, for example the offset or an explicit selector value in a PluralFormat style.
@ UMSGPAT_PART_TYPE_SKIP_SYNTAX
Indicates a substring of the pattern string which is to be skipped when formatting.
UMessagePatternArgType
Argument type constants.
@ UMSGPAT_ARG_TYPE_SELECT
The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
@ UMSGPAT_ARG_TYPE_NONE
The argument has no specified type.
@ UMSGPAT_ARG_TYPE_PLURAL
The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset (e....
@ UMSGPAT_ARG_TYPE_SIMPLE
The argument has a "simple" type which is provided by the ARG_TYPE part.
@ UMSGPAT_ARG_TYPE_SELECTORDINAL
The argument is an ordinal-number PluralFormat with the same style parts sequence and semantics as UM...
@ UMSGPAT_ARG_TYPE_CHOICE
The argument is a ChoiceFormat with one or more ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR,...
@ UMSGPAT_ARG_NAME_NOT_VALID
Return value from MessagePattern.validateArgumentName() for when the string is invalid.
@ UMSGPAT_ARG_NAME_NOT_NUMBER
Return value from MessagePattern.validateArgumentName() for when the string is a valid "pattern ident...
UMessagePatternApostropheMode
Mode for when an apostrophe starts quoted literal text for MessageFormat output.
@ UMSGPAT_APOS_DOUBLE_OPTIONAL
A literal apostrophe is represented by either a single or a double apostrophe pattern character.
@ UMSGPAT_APOS_DOUBLE_REQUIRED
A literal apostrophe must be represented by a double apostrophe pattern character.
C API: Parse Error Information.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
C++ API: Unicode String.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:357
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129