- support of [Strings.LanguageID]-sections for inf-files added in setupapi
[reactos.git] / reactos / lib / 3rdparty / icu4ros / icu / source / common / ucnvhz.c
1 /*
2 **********************************************************************
3 * Copyright (C) 2000-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: ucnvhz.c
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2000oct16
12 * created by: Ram Viswanadha
13 * 10/31/2000 Ram Implemented offsets logic function
14 *
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
20
21 #include "cmemory.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/ucnv_cb.h"
24 #include "unicode/uset.h"
25 #include "ucnv_bld.h"
26 #include "ucnv_cnv.h"
27
28 #define UCNV_TILDE 0x7E /* ~ */
29 #define UCNV_OPEN_BRACE 0x7B /* { */
30 #define UCNV_CLOSE_BRACE 0x7D /* } */
31 #define SB_ESCAPE "\x7E\x7D"
32 #define DB_ESCAPE "\x7E\x7B"
33 #define TILDE_ESCAPE "\x7E\x7E"
34 #define ESC_LEN 2
35
36
37 #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \
38 while(len-->0){ \
39 if(targetIndex < targetLength){ \
40 args->target[targetIndex] = (unsigned char) *strToAppend; \
41 if(args->offsets!=NULL){ \
42 *(offsets++) = sourceIndex-1; \
43 } \
44 targetIndex++; \
45 } \
46 else{ \
47 args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
48 *err =U_BUFFER_OVERFLOW_ERROR; \
49 } \
50 strToAppend++; \
51 } \
52 }
53
54
55 typedef struct{
56 UConverter* gbConverter;
57 int32_t targetIndex;
58 int32_t sourceIndex;
59 UBool isEscapeAppended;
60 UBool isStateDBCS;
61 UBool isTargetUCharDBCS;
62 }UConverterDataHZ;
63
64
65
66 static void
67 _HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, UErrorCode *errorCode){
68 cnv->toUnicodeStatus = 0;
69 cnv->fromUnicodeStatus= 0;
70 cnv->mode=0;
71 cnv->fromUChar32=0x0000;
72 cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ));
73 if(cnv->extraInfo != NULL){
74 uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ));
75 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode);
76 }
77 else {
78 *errorCode = U_MEMORY_ALLOCATION_ERROR;
79 return;
80 }
81 }
82
83 static void
84 _HZClose(UConverter *cnv){
85 if(cnv->extraInfo != NULL) {
86 ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
87 if(!cnv->isExtraLocal) {
88 uprv_free(cnv->extraInfo);
89 }
90 cnv->extraInfo = NULL;
91 }
92 }
93
94 static void
95 _HZReset(UConverter *cnv, UConverterResetChoice choice){
96 if(choice<=UCNV_RESET_TO_UNICODE) {
97 cnv->toUnicodeStatus = 0;
98 cnv->mode=0;
99 if(cnv->extraInfo != NULL){
100 ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
101 }
102 }
103 if(choice!=UCNV_RESET_TO_UNICODE) {
104 cnv->fromUnicodeStatus= 0;
105 cnv->fromUChar32=0x0000;
106 if(cnv->extraInfo != NULL){
107 ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
108 ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
109 ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
110 ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
111 }
112 }
113 }
114
115 /**************************************HZ Encoding*************************************************
116 * Rules for HZ encoding
117 *
118 * In ASCII mode, a byte is interpreted as an ASCII character, unless a
119 * '~' is encountered. The character '~' is an escape character. By
120 * convention, it must be immediately followed ONLY by '~', '{' or '\n'
121 * (<LF>), with the following special meaning.
122
123 * 1. The escape sequence '~~' is interpreted as a '~'.
124 * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
125 * 3. The escape sequence '~\n' is a line-continuation marker to be
126 * consumed with no output produced.
127 * In GB mode, characters are interpreted two bytes at a time as (pure)
128 * GB codes until the escape-from-GB code '~}' is read. This code
129 * switches the mode from GB back to ASCII. (Note that the escape-
130 * from-GB code '~}' ($7E7D) is outside the defined GB range.)
131 *
132 * Source: RFC 1842
133 */
134
135
136 static void
137 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
138 UErrorCode* err){
139 char tempBuf[2];
140 const char *mySource = ( char *) args->source;
141 UChar *myTarget = args->target;
142 const char *mySourceLimit = args->sourceLimit;
143 UChar32 targetUniChar = 0x0000;
144 UChar mySourceChar = 0x0000;
145 UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
146 tempBuf[0]=0;
147 tempBuf[1]=0;
148 if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
149 *err = U_ILLEGAL_ARGUMENT_ERROR;
150 return;
151 }
152
153 while(mySource< mySourceLimit){
154
155 if(myTarget < args->targetLimit){
156
157 mySourceChar= (unsigned char) *mySource++;
158
159 switch(mySourceChar){
160 case 0x0A:
161 if(args->converter->mode ==UCNV_TILDE){
162 args->converter->mode=0;
163
164 }
165 *(myTarget++)=(UChar)mySourceChar;
166 continue;
167
168 case UCNV_TILDE:
169 if(args->converter->mode ==UCNV_TILDE){
170 *(myTarget++)=(UChar)mySourceChar;
171 args->converter->mode=0;
172 continue;
173
174 }
175 else if(args->converter->toUnicodeStatus !=0){
176 args->converter->mode=0;
177 break;
178 }
179 else{
180 args->converter->mode = UCNV_TILDE;
181 continue;
182 }
183
184
185 case UCNV_OPEN_BRACE:
186 if(args->converter->mode == UCNV_TILDE){
187 args->converter->mode=0;
188 myData->isStateDBCS = TRUE;
189 continue;
190 }
191 else{
192 break;
193 }
194
195
196 case UCNV_CLOSE_BRACE:
197 if(args->converter->mode == UCNV_TILDE){
198 args->converter->mode=0;
199 myData->isStateDBCS = FALSE;
200 continue;
201 }
202 else{
203 break;
204 }
205
206 default:
207 /* if the first byte is equal to TILDE and the trail byte
208 * is not a valid byte then it is an error condition
209 */
210 if(args->converter->mode == UCNV_TILDE){
211 args->converter->mode=0;
212 mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
213 goto SAVE_STATE;
214 }
215
216 break;
217
218 }
219
220 if(myData->isStateDBCS){
221 if(args->converter->toUnicodeStatus == 0x00){
222 args->converter->toUnicodeStatus = (UChar) mySourceChar;
223 continue;
224 }
225 else{
226 tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ;
227 tempBuf[1] = (char) (mySourceChar+0x80);
228 mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
229 args->converter->toUnicodeStatus =0x00;
230 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
231 tempBuf, 2, args->converter->useFallback);
232 }
233 }
234 else{
235 if(args->converter->fromUnicodeStatus == 0x00){
236 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
237 mySource - 1, 1, args->converter->useFallback);
238 }
239 else{
240 goto SAVE_STATE;
241 }
242
243 }
244 if(targetUniChar < 0xfffe){
245 if(args->offsets) {
246 args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
247 }
248
249 *(myTarget++)=(UChar)targetUniChar;
250 }
251 else if(targetUniChar>=0xfffe){
252 SAVE_STATE:
253 if(targetUniChar == 0xfffe){
254 *err = U_INVALID_CHAR_FOUND;
255 }
256 else{
257 *err = U_ILLEGAL_CHAR_FOUND;
258 }
259 if(myData->isStateDBCS){
260 /* this should never occur since isStateDBCS is set to true
261 * only after tempBuf[0] and tempBuf[1]
262 * are set to the input .. just to please BEAM
263 */
264 if(tempBuf[0]==0 || tempBuf[1]==0){
265 *err = U_INTERNAL_PROGRAM_ERROR;
266 }else{
267 args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80);
268 args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80);
269 args->converter->toULength=2;
270 }
271 }
272 else{
273 args->converter->toUBytes[0] = (uint8_t)mySourceChar;
274 args->converter->toULength=1;
275 }
276 break;
277 }
278 }
279 else{
280 *err =U_BUFFER_OVERFLOW_ERROR;
281 break;
282 }
283 }
284
285 args->target = myTarget;
286 args->source = mySource;
287 }
288
289
290 static void
291 UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
292 UErrorCode * err){
293 const UChar *mySource = args->source;
294 char *myTarget = args->target;
295 int32_t* offsets = args->offsets;
296 int32_t mySourceIndex = 0;
297 int32_t myTargetIndex = 0;
298 int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
299 int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
300 int32_t length=0;
301 uint32_t targetUniChar = 0x0000;
302 UChar32 mySourceChar = 0x0000;
303 UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
304 UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
305 UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
306 int len =0;
307 const char* escSeq=NULL;
308
309 if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
310 *err = U_ILLEGAL_ARGUMENT_ERROR;
311 return;
312 }
313 if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
314 goto getTrail;
315 }
316 /*writing the char to the output stream */
317 while (mySourceIndex < mySourceLength){
318 targetUniChar = missingCharMarker;
319 if (myTargetIndex < targetLength){
320
321 mySourceChar = (UChar) mySource[mySourceIndex++];
322
323
324 oldIsTargetUCharDBCS = isTargetUCharDBCS;
325 if(mySourceChar ==UCNV_TILDE){
326 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
327 len = ESC_LEN;
328 escSeq = TILDE_ESCAPE;
329 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
330 continue;
331 }
332 else{
333 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
334 mySourceChar,&targetUniChar,args->converter->useFallback);
335
336 }
337 /* only DBCS or SBCS characters are expected*/
338 /* DB haracters with high bit set to 1 are expected */
339 if(length > 2 || length==0 ||(((targetUniChar & 0x8080) != 0x8080)&& length==2)){
340 targetUniChar= missingCharMarker;
341 }
342 if (targetUniChar != missingCharMarker){
343 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
344 if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
345 /*Shifting from a double byte to single byte mode*/
346 if(!isTargetUCharDBCS){
347 len =ESC_LEN;
348 escSeq = SB_ESCAPE;
349 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
350 myConverterData->isEscapeAppended = TRUE;
351 }
352 else{ /* Shifting from a single byte to double byte mode*/
353 len =ESC_LEN;
354 escSeq = DB_ESCAPE;
355 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
356 myConverterData->isEscapeAppended = TRUE;
357
358 }
359 }
360
361 if(isTargetUCharDBCS){
362 if( myTargetIndex <targetLength){
363 myTarget[myTargetIndex++] =(char) ((targetUniChar >> 8) -0x80);
364 if(offsets){
365 *(offsets++) = mySourceIndex-1;
366 }
367 if(myTargetIndex < targetLength){
368 myTarget[myTargetIndex++] =(char) ((targetUniChar & 0x00FF) -0x80);
369 if(offsets){
370 *(offsets++) = mySourceIndex-1;
371 }
372 }else{
373 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
374 *err = U_BUFFER_OVERFLOW_ERROR;
375 }
376 }else{
377 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) ((targetUniChar >> 8) -0x80);
378 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
379 *err = U_BUFFER_OVERFLOW_ERROR;
380 }
381
382 }else{
383 if( myTargetIndex <targetLength){
384 myTarget[myTargetIndex++] = (char) (targetUniChar );
385 if(offsets){
386 *(offsets++) = mySourceIndex-1;
387 }
388
389 }else{
390 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
391 *err = U_BUFFER_OVERFLOW_ERROR;
392 }
393 }
394
395 }
396 else{
397 /* oops.. the code point is unassigned */
398 /*Handle surrogates */
399 /*check if the char is a First surrogate*/
400 if(UTF_IS_SURROGATE(mySourceChar)) {
401 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
402 args->converter->fromUChar32=mySourceChar;
403 getTrail:
404 /*look ahead to find the trail surrogate*/
405 if(mySourceIndex < mySourceLength) {
406 /* test the following code unit */
407 UChar trail=(UChar) args->source[mySourceIndex];
408 if(UTF_IS_SECOND_SURROGATE(trail)) {
409 ++mySourceIndex;
410 mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail);
411 args->converter->fromUChar32=0x00;
412 /* there are no surrogates in GB2312*/
413 *err = U_INVALID_CHAR_FOUND;
414 /* exit this condition tree */
415 } else {
416 /* this is an unmatched lead code unit (1st surrogate) */
417 /* callback(illegal) */
418 *err=U_ILLEGAL_CHAR_FOUND;
419 }
420 } else {
421 /* no more input */
422 *err = U_ZERO_ERROR;
423 }
424 } else {
425 /* this is an unmatched trail code unit (2nd surrogate) */
426 /* callback(illegal) */
427 *err=U_ILLEGAL_CHAR_FOUND;
428 }
429 } else {
430 /* callback(unassigned) for a BMP code point */
431 *err = U_INVALID_CHAR_FOUND;
432 }
433
434 args->converter->fromUChar32=mySourceChar;
435 break;
436 }
437 }
438 else{
439 *err = U_BUFFER_OVERFLOW_ERROR;
440 break;
441 }
442 targetUniChar=missingCharMarker;
443 }
444
445 args->target += myTargetIndex;
446 args->source += mySourceIndex;
447 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
448 }
449
450 static void
451 _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
452 UConverter *cnv = args->converter;
453 UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
454 char *p;
455 char buffer[4];
456 p = buffer;
457
458 if( convData->isTargetUCharDBCS){
459 *p++= UCNV_TILDE;
460 *p++= UCNV_CLOSE_BRACE;
461 convData->isTargetUCharDBCS=FALSE;
462 }
463 *p++= (char)cnv->subChars[0];
464
465 ucnv_cbFromUWriteBytes(args,
466 buffer, (int32_t)(p - buffer),
467 offsetIndex, err);
468 }
469
470 /*
471 * Structure for cloning an HZ converter into a single memory block.
472 * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct,
473 * and then ucnv_safeClone() of the sub-converter may additionally align
474 * subCnv inside the cloneHZStruct, for which we need the deadSpace after
475 * subCnv. This is because UAlignedMemory may be larger than the actually
476 * necessary alignment size for the platform.
477 * The other cloneHZStruct fields will not be moved around,
478 * and are aligned properly with cloneHZStruct's alignment.
479 */
480 struct cloneHZStruct
481 {
482 UConverter cnv;
483 UConverter subCnv;
484 UAlignedMemory deadSpace;
485 UConverterDataHZ mydata;
486 };
487
488
489 static UConverter *
490 _HZ_SafeClone(const UConverter *cnv,
491 void *stackBuffer,
492 int32_t *pBufferSize,
493 UErrorCode *status)
494 {
495 struct cloneHZStruct * localClone;
496 int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
497
498 if (U_FAILURE(*status)){
499 return 0;
500 }
501
502 if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
503 *pBufferSize = bufferSizeNeeded;
504 return 0;
505 }
506
507 localClone = (struct cloneHZStruct *)stackBuffer;
508 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
509
510 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
511 localClone->cnv.extraInfo = &localClone->mydata;
512 localClone->cnv.isExtraLocal = TRUE;
513
514 /* deep-clone the sub-converter */
515 size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
516 ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
517 ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
518
519 return &localClone->cnv;
520 }
521
522 static void
523 _HZ_GetUnicodeSet(const UConverter *cnv,
524 const USetAdder *sa,
525 UConverterUnicodeSet which,
526 UErrorCode *pErrorCode) {
527 /* the tilde '~' is hardcoded in the converter */
528 sa->add(sa->set, 0x7e);
529
530 /* add all of the code points that the sub-converter handles */
531 ((UConverterDataHZ*)cnv->extraInfo)->
532 gbConverter->sharedData->impl->
533 getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
534 sa, which, pErrorCode);
535 }
536
537 static const UConverterImpl _HZImpl={
538
539 UCNV_HZ,
540
541 NULL,
542 NULL,
543
544 _HZOpen,
545 _HZClose,
546 _HZReset,
547
548 UConverter_toUnicode_HZ_OFFSETS_LOGIC,
549 UConverter_toUnicode_HZ_OFFSETS_LOGIC,
550 UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
551 UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
552 NULL,
553
554 NULL,
555 NULL,
556 _HZ_WriteSub,
557 _HZ_SafeClone,
558 _HZ_GetUnicodeSet
559 };
560
561 static const UConverterStaticData _HZStaticData={
562 sizeof(UConverterStaticData),
563 "HZ",
564 0,
565 UCNV_IBM,
566 UCNV_HZ,
567 1,
568 4,
569 { 0x1a, 0, 0, 0 },
570 1,
571 FALSE,
572 FALSE,
573 0,
574 0,
575 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
576
577 };
578
579
580 const UConverterSharedData _HZData={
581 sizeof(UConverterSharedData),
582 ~((uint32_t) 0),
583 NULL,
584 NULL,
585 &_HZStaticData,
586 FALSE,
587 &_HZImpl,
588 0
589 };
590
591 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */