2 *******************************************************************************
3 * Copyright (C) 2006, International Business Machines Corporation and others. *
4 * All Rights Reserved. *
5 *******************************************************************************
11 #include "unicode/utypes.h"
12 #include "unicode/uobject.h"
13 #include "unicode/utext.h"
20 * <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
22 * @param ds Pointer to UDataSwapper containing global data about the
23 * transformation and function pointers for handling primitive
25 * @param inData Pointer to the input data to be transformed or examined.
26 * @param length Length of the data, counting bytes. May be -1 for preflighting.
27 * If length>=0, then transform the data.
28 * If length==-1, then only determine the length of the data.
29 * The length cannot be determined from the data itself for all
30 * types of data (e.g., not for simple arrays of integers).
31 * @param outData Pointer to the output data buffer.
32 * If length>=0 (transformation), then the output buffer must
33 * have a capacity of at least length.
34 * If length==-1, then outData will not be used and can be NULL.
35 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
36 * fulfill U_SUCCESS on input.
37 * @return The actual length of the data.
42 U_CAPI
int32_t U_EXPORT2
43 triedict_swap(const UDataSwapper
*ds
,
44 const void *inData
, int32_t length
, void *outData
,
45 UErrorCode
*pErrorCode
);
49 class StringEnumeration
;
50 struct CompactTrieHeader
;
52 /*******************************************************************
57 * <p>TrieWordDictionary is an abstract class that represents a word
58 * dictionary based on a trie. The base protocol is read-only.
59 * Subclasses may allow writing.</p>
61 class U_COMMON_API TrieWordDictionary
: public UMemory
{
65 * <p>Default constructor.</p>
71 * <p>Virtual destructor.</p>
73 virtual ~TrieWordDictionary();
76 * <p>Find dictionary words that match the text.</p>
78 * @param text A UText representing the text. The
79 * iterator is left after the longest prefix match in the dictionary.
80 * @param start The current position in text.
81 * @param maxLength The maximum number of code units to match.
82 * @param lengths An array that is filled with the lengths of words that matched.
83 * @param count Filled with the number of elements output in lengths.
84 * @param limit The size of the lengths array; this limits the number of words output.
85 * @return The number of characters in text that were matched.
87 virtual int32_t matches( UText
*text
,
91 int limit
) const = 0;
94 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
96 * @param status A status code recording the success of the call.
97 * @return A StringEnumeration that will iterate through the whole dictionary.
98 * The caller is responsible for closing it. The order is unspecified.
100 virtual StringEnumeration
*openWords( UErrorCode
&status
) const = 0;
104 /*******************************************************************
105 * MutableTrieDictionary
109 * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
113 struct TernaryNode
; // Forwards declaration
115 class U_COMMON_API MutableTrieDictionary
: public TrieWordDictionary
{
118 * The root node of the trie
125 * A UText for internal use
131 friend class CompactTrieDictionary
; // For fast conversion
136 * <p>Constructor.</p>
138 * @param median A UChar around which to balance the trie. Ideally, it should
139 * begin at least one word that is near the median of the set in the dictionary
140 * @param status A status code recording the success of the call.
142 MutableTrieDictionary( UChar median
, UErrorCode
&status
);
145 * <p>Virtual destructor.</p>
147 virtual ~MutableTrieDictionary();
150 * <p>Find dictionary words that match the text.</p>
152 * @param text A UText representing the text. The
153 * iterator is left after the longest prefix match in the dictionary.
154 * @param maxLength The maximum number of code units to match.
155 * @param lengths An array that is filled with the lengths of words that matched.
156 * @param count Filled with the number of elements output in lengths.
157 * @param limit The size of the lengths array; this limits the number of words output.
158 * @return The number of characters in text that were matched.
160 virtual int32_t matches( UText
*text
,
167 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
169 * @param status A status code recording the success of the call.
170 * @return A StringEnumeration that will iterate through the whole dictionary.
171 * The caller is responsible for closing it. The order is unspecified.
173 virtual StringEnumeration
*openWords( UErrorCode
&status
) const;
176 * <p>Add one word to the dictionary.</p>
178 * @param word A UChar buffer containing the word.
179 * @param length The length of the word.
180 * @param status The resultant status
182 virtual void addWord( const UChar
*word
,
188 * <p>Add all strings from a UEnumeration to the dictionary.</p>
190 * @param words A UEnumeration that will return the desired words.
191 * @param status The resultant status
193 virtual void addWords( UEnumeration
*words
, UErrorCode
&status
);
198 * <p>Search the dictionary for matches.</p>
200 * @param text A UText representing the text. The
201 * iterator is left after the longest prefix match in the dictionary.
202 * @param maxLength The maximum number of code units to match.
203 * @param lengths An array that is filled with the lengths of words that matched.
204 * @param count Filled with the number of elements output in lengths.
205 * @param limit The size of the lengths array; this limits the number of words output.
206 * @param parent The parent of the current node
207 * @param pMatched The returned parent node matched the input
208 * @return The number of characters in text that were matched.
210 virtual int32_t search( UText
*text
,
215 TernaryNode
*&parent
,
216 UBool
&pMatched
) const;
220 * <p>Private constructor. The root node it not allocated.</p>
222 * @param status A status code recording the success of the call.
224 MutableTrieDictionary( UErrorCode
&status
);
227 /*******************************************************************
228 * CompactTrieDictionary
232 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
235 class U_COMMON_API CompactTrieDictionary
: public TrieWordDictionary
{
238 * The root node of the trie
241 const CompactTrieHeader
*fData
;
244 * A UBool indicating whether or not we own the fData.
252 * <p>Construct a dictionary from a UDataMemory.</p>
254 * @param data A pointer to a UDataMemory, which is adopted
255 * @param status A status code giving the result of the constructor
257 CompactTrieDictionary(UDataMemory
*dataObj
, UErrorCode
&status
);
260 * <p>Construct a dictionary from raw saved data.</p>
262 * @param data A pointer to the raw data, which is still owned by the caller
263 * @param status A status code giving the result of the constructor
265 CompactTrieDictionary(const void *dataObj
, UErrorCode
&status
);
268 * <p>Construct a dictionary from a MutableTrieDictionary.</p>
270 * @param dict The dictionary to use as input.
271 * @param status A status code recording the success of the call.
273 CompactTrieDictionary( const MutableTrieDictionary
&dict
, UErrorCode
&status
);
276 * <p>Virtual destructor.</p>
278 virtual ~CompactTrieDictionary();
281 * <p>Find dictionary words that match the text.</p>
283 * @param text A UText representing the text. The
284 * iterator is left after the longest prefix match in the dictionary.
285 * @param maxLength The maximum number of code units to match.
286 * @param lengths An array that is filled with the lengths of words that matched.
287 * @param count Filled with the number of elements output in lengths.
288 * @param limit The size of the lengths array; this limits the number of words output.
289 * @return The number of characters in text that were matched.
291 virtual int32_t matches( UText
*text
,
298 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
300 * @param status A status code recording the success of the call.
301 * @return A StringEnumeration that will iterate through the whole dictionary.
302 * The caller is responsible for closing it. The order is unspecified.
304 virtual StringEnumeration
*openWords( UErrorCode
&status
) const;
307 * <p>Return the size of the compact data.</p>
309 * @return The size of the dictionary's compact data.
311 virtual uint32_t dataSize() const;
314 * <p>Return a void * pointer to the compact data, platform-endian.</p>
316 * @return The data for the compact dictionary, suitable for passing to the
319 virtual const void *data() const;
322 * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
324 * @param status A status code recording the success of the call.
325 * @return A MutableTrieDictionary with the same data as this dictionary
327 virtual MutableTrieDictionary
*cloneMutable( UErrorCode
&status
) const;
332 * <p>Convert a MutableTrieDictionary into a compact data blob.</p>
334 * @param dict The dictionary to convert.
335 * @param status A status code recording the success of the call.
336 * @return A single data blob starting with a CompactTrieHeader.
338 static CompactTrieHeader
*compactMutableTrieDictionary( const MutableTrieDictionary
&dict
,
339 UErrorCode
&status
);