2 *******************************************************************************
4 * Copyright (C) 1999-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: unistr_case.cpp
10 * tab size: 8 (not used)
13 * created on: 2004aug19
14 * created by: Markus W. Scherer
16 * Case-mapping functions moved here from unistr.cpp
19 #include "unicode/utypes.h"
20 #include "unicode/putil.h"
21 #include "unicode/locid.h"
24 #include "unicode/ustring.h"
25 #include "unicode/unistr.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ubrk.h"
34 //========================================
35 // Read-only implementation
36 //========================================
39 UnicodeString::doCaseCompare(int32_t start
,
41 const UChar
*srcChars
,
44 uint32_t options
) const
46 // compare illegal string values
47 // treat const UChar *srcChars==NULL as an empty string
52 // pin indices to legal values
53 pinIndices(start
, length
);
55 if(srcChars
== NULL
) {
56 srcStart
= srcLength
= 0;
59 // get the correct pointer
60 const UChar
*chars
= getArrayStart();
65 if(chars
!= srcChars
) {
66 UErrorCode errorCode
=U_ZERO_ERROR
;
67 int32_t result
=u_strcmpFold(chars
, length
, srcChars
, srcLength
,
68 options
|U_COMPARE_IGNORE_CASE
, &errorCode
);
70 return (int8_t)(result
>> 24 | 1);
73 // get the srcLength if necessary
75 srcLength
= u_strlen(srcChars
+ srcStart
);
77 if(length
!= srcLength
) {
78 return (int8_t)((length
- srcLength
) >> 24 | 1);
84 //========================================
85 // Write implementation
86 //========================================
89 * Implement argument checking and buffer handling
90 * for string case mapping as a common function.
94 UnicodeString::caseMap(BreakIterator
*titleIter
,
97 int32_t toWhichCase
) {
103 UErrorCode errorCode
;
105 errorCode
= U_ZERO_ERROR
;
106 const UCaseProps
*csp
=ucase_getSingleton(&errorCode
);
107 if(U_FAILURE(errorCode
)) {
112 // We need to allocate a new buffer for the internal string case mapping function.
113 // This is very similar to how doReplace() below keeps the old array pointer
114 // and deletes the old array itself after it is done.
115 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
116 UChar
*oldArray
= fArray
;
117 int32_t oldLength
= fLength
;
118 int32_t *bufferToDelete
= 0;
120 // Make sure that if the string is in fStackBuffer we do not overwrite it!
122 if(fLength
<= US_STACKBUF_SIZE
) {
123 if(fArray
== fStackBuffer
) {
124 capacity
= 2 * US_STACKBUF_SIZE
; // make sure that cloneArrayIfNeeded() allocates a new buffer
126 capacity
= US_STACKBUF_SIZE
;
129 capacity
= fLength
+ 20;
131 if(!cloneArrayIfNeeded(capacity
, capacity
, FALSE
, &bufferToDelete
, TRUE
)) {
135 // Case-map, and if the result is too long, then reallocate and repeat.
137 errorCode
= U_ZERO_ERROR
;
138 if(toWhichCase
==TO_LOWER
) {
139 fLength
= ustr_toLower(csp
, fArray
, fCapacity
,
142 } else if(toWhichCase
==TO_UPPER
) {
143 fLength
= ustr_toUpper(csp
, fArray
, fCapacity
,
146 } else if(toWhichCase
==TO_TITLE
) {
147 #if UCONFIG_NO_BREAK_ITERATION
148 errorCode
=U_UNSUPPORTED_ERROR
;
150 fLength
= ustr_toTitle(csp
, fArray
, fCapacity
,
152 (UBreakIterator
*)titleIter
, locale
, options
, &errorCode
);
155 fLength
= ustr_foldCase(csp
, fArray
, fCapacity
,
160 } while(errorCode
==U_BUFFER_OVERFLOW_ERROR
&& cloneArrayIfNeeded(fLength
, fLength
, FALSE
));
162 if (bufferToDelete
) {
163 uprv_free(bufferToDelete
);
165 if(U_FAILURE(errorCode
)) {
172 UnicodeString::toLower() {
173 return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER
);
177 UnicodeString::toLower(const Locale
&locale
) {
178 return caseMap(0, locale
.getName(), 0, TO_LOWER
);
182 UnicodeString::toUpper() {
183 return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER
);
187 UnicodeString::toUpper(const Locale
&locale
) {
188 return caseMap(0, locale
.getName(), 0, TO_UPPER
);
191 #if !UCONFIG_NO_BREAK_ITERATION
194 UnicodeString::toTitle(BreakIterator
*titleIter
) {
195 return caseMap(titleIter
, Locale::getDefault().getName(), 0, TO_TITLE
);
199 UnicodeString::toTitle(BreakIterator
*titleIter
, const Locale
&locale
) {
200 return caseMap(titleIter
, locale
.getName(), 0, TO_TITLE
);
204 UnicodeString::toTitle(BreakIterator
*titleIter
, const Locale
&locale
, uint32_t options
) {
205 return caseMap(titleIter
, locale
.getName(), options
, TO_TITLE
);
211 UnicodeString::foldCase(uint32_t options
) {
212 /* The Locale parameter isn't used. Use "" instead. */
213 return caseMap(0, "", options
, FOLD_CASE
);
218 // Defined here to reduce dependencies on break iterator
219 U_CAPI
int32_t U_EXPORT2
220 uhash_hashCaselessUnicodeString(const UHashTok key
) {
222 const UnicodeString
*str
= (const UnicodeString
*) key
.pointer
;
226 // Inefficient; a better way would be to have a hash function in
227 // UnicodeString that does case folding on the fly.
228 UnicodeString
copy(*str
);
229 return copy
.foldCase().hashCode();
232 // Defined here to reduce dependencies on break iterator
233 U_CAPI UBool U_EXPORT2
234 uhash_compareCaselessUnicodeString(const UHashTok key1
, const UHashTok key2
) {
236 const UnicodeString
*str1
= (const UnicodeString
*) key1
.pointer
;
237 const UnicodeString
*str2
= (const UnicodeString
*) key2
.pointer
;
241 if (str1
== NULL
|| str2
== NULL
) {
244 return str1
->caseCompare(*str2
, U_FOLD_CASE_DEFAULT
) == 0;