ICU 69.1  69.1
caniter.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 #ifndef CANITER_H
11 #define CANITER_H
12 
13 #include "unicode/utypes.h"
14 
15 #if U_SHOW_CPLUSPLUS_API
16 
17 #if !UCONFIG_NO_NORMALIZATION
18 
19 #include "unicode/uobject.h"
20 #include "unicode/unistr.h"
21 
31 #ifndef CANITER_SKIP_ZEROES
32 #define CANITER_SKIP_ZEROES true
33 #endif
34 
35 U_NAMESPACE_BEGIN
36 
37 class Hashtable;
38 class Normalizer2;
39 class Normalizer2Impl;
40 
77 public:
84  CanonicalIterator(const UnicodeString &source, UErrorCode &status);
85 
90  virtual ~CanonicalIterator();
91 
98 
103  void reset();
104 
113 
121  void setSource(const UnicodeString &newSource, UErrorCode &status);
122 
123 #ifndef U_HIDE_INTERNAL_API
133  static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
134 #endif /* U_HIDE_INTERNAL_API */
135 
141  static UClassID U_EXPORT2 getStaticClassID();
142 
148  virtual UClassID getDynamicClassID() const;
149 
150 private:
151  // ===================== PRIVATES ==============================
152  // private default constructor
154 
155 
160  CanonicalIterator(const CanonicalIterator& other);
161 
166  CanonicalIterator& operator=(const CanonicalIterator& other);
167 
168  // fields
169  UnicodeString source;
170  UBool done;
171 
172  // 2 dimensional array holds the pieces of the string with
173  // their different canonically equivalent representations
174  UnicodeString **pieces;
175  int32_t pieces_length;
176  int32_t *pieces_lengths;
177 
178  // current is used in iterating to combine pieces
179  int32_t *current;
180  int32_t current_length;
181 
182  // transient fields
183  UnicodeString buffer;
184 
185  const Normalizer2 &nfd;
186  const Normalizer2Impl &nfcImpl;
187 
188  // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
189  UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
190 
191  //Set getEquivalents2(String segment);
192  Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
193  //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
194 
200  //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
201  Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
202  //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
203 
204  void cleanPieces();
205 
206 };
207 
208 U_NAMESPACE_END
209 
210 #endif /* #if !UCONFIG_NO_NORMALIZATION */
211 
212 #endif /* U_SHOW_CPLUSPLUS_API */
213 
214 #endif
This class allows one to iterate through all the strings that are canonically equivalent to a given s...
Definition: caniter.h:76
void reset()
Resets the iterator so that one can start again from the beginning.
UnicodeString next()
Get the next canonically equivalent string.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
static void permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status)
Dumb recursive implementation of permutation.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
void setSource(const UnicodeString &newSource, UErrorCode &status)
Set a new source for this iterator.
CanonicalIterator(const UnicodeString &source, UErrorCode &status)
Construct a CanonicalIterator object.
UnicodeString getSource()
Gets the NFD form of the current source we are iterating over.
virtual ~CanonicalIterator()
Destructor Cleans pieces.
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:85
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:467
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
#define U_FINAL
Defined to the C++11 "final" keyword if available.
Definition: umachine.h:141
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:300