2 *****************************************************************************
4 * Copyright (C) 1998-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *****************************************************************************
10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
15 * 06/29/2000 helena Major rewrite of the callback APIs.
18 #include "unicode/utypes.h"
20 #if !UCONFIG_NO_CONVERSION
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
26 #include "unicode/ucnv.h"
29 #define VALUE_STRING_LENGTH 32
30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
31 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
32 #define UNICODE_U_CODEPOINT 0x0055
33 #define UNICODE_X_CODEPOINT 0x0058
34 #define UNICODE_RS_CODEPOINT 0x005C
35 #define UNICODE_U_LOW_CODEPOINT 0x0075
36 #define UNICODE_X_LOW_CODEPOINT 0x0078
37 #define UNICODE_AMP_CODEPOINT 0x0026
38 #define UNICODE_HASH_CODEPOINT 0x0023
39 #define UNICODE_SEMICOLON_CODEPOINT 0x003B
40 #define UNICODE_PLUS_CODEPOINT 0x002B
41 #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
42 #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
43 #define UCNV_PRV_ESCAPE_ICU 0
44 #define UCNV_PRV_ESCAPE_C 'C'
45 #define UCNV_PRV_ESCAPE_XML_DEC 'D'
46 #define UCNV_PRV_ESCAPE_XML_HEX 'X'
47 #define UCNV_PRV_ESCAPE_JAVA 'J'
48 #define UCNV_PRV_ESCAPE_UNICODE 'U'
49 #define UCNV_PRV_STOP_ON_ILLEGAL 'i'
51 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
53 UCNV_FROM_U_CALLBACK_STOP (
55 UConverterFromUnicodeArgs
*fromUArgs
,
56 const UChar
* codeUnits
,
59 UConverterCallbackReason reason
,
62 /* the caller must have set the error code accordingly */
67 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
69 UCNV_TO_U_CALLBACK_STOP (
71 UConverterToUnicodeArgs
*toUArgs
,
72 const char* codePoints
,
74 UConverterCallbackReason reason
,
77 /* the caller must have set the error code accordingly */
82 UCNV_FROM_U_CALLBACK_SKIP (
84 UConverterFromUnicodeArgs
*fromUArgs
,
85 const UChar
* codeUnits
,
88 UConverterCallbackReason reason
,
91 if (reason
<= UCNV_IRREGULAR
)
93 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
97 /* else the caller must have set the error code accordingly. */
99 /* else ignore the reset, close and clone calls. */
102 U_CAPI
void U_EXPORT2
103 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
105 UConverterFromUnicodeArgs
*fromArgs
,
106 const UChar
* codeUnits
,
109 UConverterCallbackReason reason
,
112 if (reason
<= UCNV_IRREGULAR
)
114 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
117 ucnv_cbFromUWriteSub(fromArgs
, 0, err
);
119 /* else the caller must have set the error code accordingly. */
121 /* else ignore the reset, close and clone calls. */
124 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
125 *uses a clean copy (resetted) of the converter, to convert that unicode
126 *escape sequence to the target codepage (if conversion failure happens then
127 *we revert to substituting with subchar)
129 U_CAPI
void U_EXPORT2
130 UCNV_FROM_U_CALLBACK_ESCAPE (
132 UConverterFromUnicodeArgs
*fromArgs
,
133 const UChar
*codeUnits
,
136 UConverterCallbackReason reason
,
140 UChar valueString
[VALUE_STRING_LENGTH
];
141 int32_t valueStringLength
= 0;
144 const UChar
*myValueSource
= NULL
;
145 UErrorCode err2
= U_ZERO_ERROR
;
146 UConverterFromUCallback original
= NULL
;
147 const void *originalContext
;
149 UConverterFromUCallback ignoredCallback
= NULL
;
150 const void *ignoredContext
;
152 if (reason
> UCNV_IRREGULAR
)
157 ucnv_setFromUCallBack (fromArgs
->converter
,
158 (UConverterFromUCallback
) UCNV_FROM_U_CALLBACK_SUBSTITUTE
,
164 if (U_FAILURE (err2
))
173 valueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
174 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
175 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[i
++], 16, 4);
180 switch(*((char*)context
))
182 case UCNV_PRV_ESCAPE_JAVA
:
185 valueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
186 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_LOW_CODEPOINT
; /* adding u */
187 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[i
++], 16, 4);
191 case UCNV_PRV_ESCAPE_C
:
192 valueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
195 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
196 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 8);
200 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_LOW_CODEPOINT
; /* adding u */
201 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 16, 4);
205 case UCNV_PRV_ESCAPE_XML_DEC
:
207 valueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
208 valueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
210 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 10, 0);
213 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 10, 0);
215 valueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
218 case UCNV_PRV_ESCAPE_XML_HEX
:
220 valueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
221 valueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
222 valueString
[valueStringLength
++] = (UChar
) UNICODE_X_LOW_CODEPOINT
; /* adding x */
224 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 0);
227 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 16, 0);
229 valueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
232 case UCNV_PRV_ESCAPE_UNICODE
:
233 valueString
[valueStringLength
++] = (UChar
) UNICODE_LEFT_CURLY_CODEPOINT
; /* adding { */
234 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
235 valueString
[valueStringLength
++] = (UChar
) UNICODE_PLUS_CODEPOINT
; /* adding + */
237 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 4);
239 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 16, 4);
241 valueString
[valueStringLength
++] = (UChar
) UNICODE_RIGHT_CURLY_CODEPOINT
; /* adding } */
247 valueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
248 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
249 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[i
++], 16, 4);
253 myValueSource
= valueString
;
255 /* reset the error */
258 ucnv_cbFromUWriteUChars(fromArgs
, &myValueSource
, myValueSource
+valueStringLength
, 0, err
);
260 ucnv_setFromUCallBack (fromArgs
->converter
,
266 if (U_FAILURE (err2
))
277 U_CAPI
void U_EXPORT2
278 UCNV_TO_U_CALLBACK_SKIP (
280 UConverterToUnicodeArgs
*toArgs
,
281 const char* codeUnits
,
283 UConverterCallbackReason reason
,
286 if (reason
<= UCNV_IRREGULAR
)
288 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
292 /* else the caller must have set the error code accordingly. */
294 /* else ignore the reset, close and clone calls. */
297 U_CAPI
void U_EXPORT2
298 UCNV_TO_U_CALLBACK_SUBSTITUTE (
300 UConverterToUnicodeArgs
*toArgs
,
301 const char* codeUnits
,
303 UConverterCallbackReason reason
,
306 if (reason
<= UCNV_IRREGULAR
)
308 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
311 ucnv_cbToUWriteSub(toArgs
,0,err
);
313 /* else the caller must have set the error code accordingly. */
315 /* else ignore the reset, close and clone calls. */
318 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
319 *and uses that as the substitution sequence
321 U_CAPI
void U_EXPORT2
322 UCNV_TO_U_CALLBACK_ESCAPE (
324 UConverterToUnicodeArgs
*toArgs
,
325 const char* codeUnits
,
327 UConverterCallbackReason reason
,
330 UChar uniValueString
[VALUE_STRING_LENGTH
];
331 int32_t valueStringLength
= 0;
334 if (reason
> UCNV_IRREGULAR
)
343 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
344 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_CODEPOINT
; /* adding X */
345 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t) codeUnits
[i
++], 16, 2);
350 switch(*((char*)context
))
352 case UCNV_PRV_ESCAPE_XML_DEC
:
355 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
356 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
357 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t)codeUnits
[i
++], 10, 0);
358 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
362 case UCNV_PRV_ESCAPE_XML_HEX
:
365 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
366 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
367 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_LOW_CODEPOINT
; /* adding x */
368 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t)codeUnits
[i
++], 16, 0);
369 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
372 case UCNV_PRV_ESCAPE_C
:
375 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
376 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_LOW_CODEPOINT
; /* adding x */
377 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t)codeUnits
[i
++], 16, 2);
383 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
384 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_CODEPOINT
; /* adding X */
385 uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t) codeUnits
[i
++], 16, 2);
386 valueStringLength
+= 2;
390 /* reset the error */
393 ucnv_cbToUWriteUChars(toArgs
, uniValueString
, valueStringLength
, 0, err
);