sdk/lib/3rdparty/libxml2/encoding.c

   1 /*
   2  * encoding.c : implements the encoding conversion functions needed for XML
   3  *
   4  * Related specs:
   5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
   6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
   7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
   8  * [ISO-8859-1]   ISO Latin-1 characters codes.
   9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
  10  *                Worldwide Character Encoding -- Version 1.0", Addison-
  11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
  12  *                described in Unicode Technical Report #4.
  13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
  14  *                Information Interchange, ANSI X3.4-1986.
  15  *
  16  * See Copyright for the status of this software.
  17  *
  18  * daniel@veillard.com
  19  *
  20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
  21  */
  22
  23 #define IN_LIBXML
  24 #include "libxml.h"
  25
  26 #include <string.h>
  27 #include <limits.h>
  28
  29 #ifdef HAVE_CTYPE_H
  30 #include <ctype.h>
  31 #endif
  32 #ifdef HAVE_STDLIB_H
  33 #include <stdlib.h>
  34 #endif
  35 #ifdef LIBXML_ICONV_ENABLED
  36 #ifdef HAVE_ERRNO_H
  37 #include <errno.h>
  38 #endif
  39 #endif
  40 #include <libxml/encoding.h>
  41 #include <libxml/xmlmemory.h>
  42 #ifdef LIBXML_HTML_ENABLED
  43 #include <libxml/HTMLparser.h>
  44 #endif
  45 #include <libxml/globals.h>
  46 #include <libxml/xmlerror.h>
  47
  48 #include "buf.h"
  49 #include "enc.h"
  50
  51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
  52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
  53
  54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
  55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
  56 struct _xmlCharEncodingAlias {
  57     const char *name;
  58     const char *alias;
  59 };
  60
  61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
  62 static int xmlCharEncodingAliasesNb = 0;
  63 static int xmlCharEncodingAliasesMax = 0;
  64
  65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
  66 #if 0
  67 #define DEBUG_ENCODING  /* Define this to get encoding traces */
  68 #endif
  69 #else
  70 #ifdef LIBXML_ISO8859X_ENABLED
  71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
  72 #endif
  73 #endif
  74
  75 static int xmlLittleEndian = 1;
  76
  77 /**
  78  * xmlEncodingErrMemory:
  79  * @extra:  extra informations
  80  *
  81  * Handle an out of memory condition
  82  */
  83 static void
  84 xmlEncodingErrMemory(const char *extra)
  85 {
  86     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
  87 }
  88
  89 /**
  90  * xmlErrEncoding:
  91  * @error:  the error number
  92  * @msg:  the error message
  93  *
  94  * n encoding error
  95  */
  96 static void LIBXML_ATTR_FORMAT(2,0)
  97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
  98 {
  99     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
 100                     XML_FROM_I18N, error, XML_ERR_FATAL,
 101                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
 102 }
 103
 104 #ifdef LIBXML_ICU_ENABLED
 105 static uconv_t*
 106 openIcuConverter(const char* name, int toUnicode)
 107 {
 108   UErrorCode status = U_ZERO_ERROR;
 109   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
 110   if (conv == NULL)
 111     return NULL;
 112
 113   conv->uconv = ucnv_open(name, &status);
 114   if (U_FAILURE(status))
 115     goto error;
 116
 117   status = U_ZERO_ERROR;
 118   if (toUnicode) {
 119     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
 120                         NULL, NULL, NULL, &status);
 121   }
 122   else {
 123     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
 124                         NULL, NULL, NULL, &status);
 125   }
 126   if (U_FAILURE(status))
 127     goto error;
 128
 129   status = U_ZERO_ERROR;
 130   conv->utf8 = ucnv_open("UTF-8", &status);
 131   if (U_SUCCESS(status))
 132     return conv;
 133
 134 error:
 135   if (conv->uconv)
 136     ucnv_close(conv->uconv);
 137   xmlFree(conv);
 138   return NULL;
 139 }
 140
 141 static void
 142 closeIcuConverter(uconv_t *conv)
 143 {
 144   if (conv != NULL) {
 145     ucnv_close(conv->uconv);
 146     ucnv_close(conv->utf8);
 147     xmlFree(conv);
 148   }
 149 }
 150 #endif /* LIBXML_ICU_ENABLED */
 151
 152 /************************************************************************
 153  *                                                                      *
 154  *              Conversions To/From UTF8 encoding                       *
 155  *                                                                      *
 156  ************************************************************************/
 157
 158 /**
 159  * asciiToUTF8:
 160  * @out:  a pointer to an array of bytes to store the result
 161  * @outlen:  the length of @out
 162  * @in:  a pointer to an array of ASCII chars
 163  * @inlen:  the length of @in
 164  *
 165  * Take a block of ASCII chars in and try to convert it to an UTF-8
 166  * block of chars out.
 167  * Returns 0 if success, or -1 otherwise
 168  * The value of @inlen after return is the number of octets consumed
 169  *     if the return value is positive, else unpredictable.
 170  * The value of @outlen after return is the number of octets consumed.
 171  */
 172 static int
 173 asciiToUTF8(unsigned char* out, int *outlen,
 174               const unsigned char* in, int *inlen) {
 175     unsigned char* outstart = out;
 176     const unsigned char* base = in;
 177     const unsigned char* processed = in;
 178     unsigned char* outend = out + *outlen;
 179     const unsigned char* inend;
 180     unsigned int c;
 181
 182     inend = in + (*inlen);
 183     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 184         c= *in++;
 185
 186         if (out >= outend)
 187             break;
 188         if (c < 0x80) {
 189             *out++ = c;
 190         } else {
 191             *outlen = out - outstart;
 192             *inlen = processed - base;
 193             return(-1);
 194         }
 195
 196         processed = (const unsigned char*) in;
 197     }
 198     *outlen = out - outstart;
 199     *inlen = processed - base;
 200     return(*outlen);
 201 }
 202
 203 #ifdef LIBXML_OUTPUT_ENABLED
 204 /**
 205  * UTF8Toascii:
 206  * @out:  a pointer to an array of bytes to store the result
 207  * @outlen:  the length of @out
 208  * @in:  a pointer to an array of UTF-8 chars
 209  * @inlen:  the length of @in
 210  *
 211  * Take a block of UTF-8 chars in and try to convert it to an ASCII
 212  * block of chars out.
 213  *
 214  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
 215  * The value of @inlen after return is the number of octets consumed
 216  *     if the return value is positive, else unpredictable.
 217  * The value of @outlen after return is the number of octets consumed.
 218  */
 219 static int
 220 UTF8Toascii(unsigned char* out, int *outlen,
 221               const unsigned char* in, int *inlen) {
 222     const unsigned char* processed = in;
 223     const unsigned char* outend;
 224     const unsigned char* outstart = out;
 225     const unsigned char* instart = in;
 226     const unsigned char* inend;
 227     unsigned int c, d;
 228     int trailing;
 229
 230     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 231     if (in == NULL) {
 232         /*
 233          * initialization nothing to do
 234          */
 235         *outlen = 0;
 236         *inlen = 0;
 237         return(0);
 238     }
 239     inend = in + (*inlen);
 240     outend = out + (*outlen);
 241     while (in < inend) {
 242         d = *in++;
 243         if      (d < 0x80)  { c= d; trailing= 0; }
 244         else if (d < 0xC0) {
 245             /* trailing byte in leading position */
 246             *outlen = out - outstart;
 247             *inlen = processed - instart;
 248             return(-2);
 249         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 250         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 251         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 252         else {
 253             /* no chance for this in Ascii */
 254             *outlen = out - outstart;
 255             *inlen = processed - instart;
 256             return(-2);
 257         }
 258
 259         if (inend - in < trailing) {
 260             break;
 261         }
 262
 263         for ( ; trailing; trailing--) {
 264             if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 265                 break;
 266             c <<= 6;
 267             c |= d & 0x3F;
 268         }
 269
 270         /* assertion: c is a single UTF-4 value */
 271         if (c < 0x80) {
 272             if (out >= outend)
 273                 break;
 274             *out++ = c;
 275         } else {
 276             /* no chance for this in Ascii */
 277             *outlen = out - outstart;
 278             *inlen = processed - instart;
 279             return(-2);
 280         }
 281         processed = in;
 282     }
 283     *outlen = out - outstart;
 284     *inlen = processed - instart;
 285     return(*outlen);
 286 }
 287 #endif /* LIBXML_OUTPUT_ENABLED */
 288
 289 /**
 290  * isolat1ToUTF8:
 291  * @out:  a pointer to an array of bytes to store the result
 292  * @outlen:  the length of @out
 293  * @in:  a pointer to an array of ISO Latin 1 chars
 294  * @inlen:  the length of @in
 295  *
 296  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
 297  * block of chars out.
 298  * Returns the number of bytes written if success, or -1 otherwise
 299  * The value of @inlen after return is the number of octets consumed
 300  *     if the return value is positive, else unpredictable.
 301  * The value of @outlen after return is the number of octets consumed.
 302  */
 303 int
 304 isolat1ToUTF8(unsigned char* out, int *outlen,
 305               const unsigned char* in, int *inlen) {
 306     unsigned char* outstart = out;
 307     const unsigned char* base = in;
 308     unsigned char* outend;
 309     const unsigned char* inend;
 310     const unsigned char* instop;
 311
 312     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
 313         return(-1);
 314
 315     outend = out + *outlen;
 316     inend = in + (*inlen);
 317     instop = inend;
 318
 319     while ((in < inend) && (out < outend - 1)) {
 320         if (*in >= 0x80) {
 321             *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
 322             *out++ = ((*in) & 0x3F) | 0x80;
 323             ++in;
 324         }
 325         if ((instop - in) > (outend - out)) instop = in + (outend - out);
 326         while ((in < instop) && (*in < 0x80)) {
 327             *out++ = *in++;
 328         }
 329     }
 330     if ((in < inend) && (out < outend) && (*in < 0x80)) {
 331         *out++ = *in++;
 332     }
 333     *outlen = out - outstart;
 334     *inlen = in - base;
 335     return(*outlen);
 336 }
 337
 338 /**
 339  * UTF8ToUTF8:
 340  * @out:  a pointer to an array of bytes to store the result
 341  * @outlen:  the length of @out
 342  * @inb:  a pointer to an array of UTF-8 chars
 343  * @inlenb:  the length of @in in UTF-8 chars
 344  *
 345  * No op copy operation for UTF8 handling.
 346  *
 347  * Returns the number of bytes written, or -1 if lack of space.
 348  *     The value of *inlen after return is the number of octets consumed
 349  *     if the return value is positive, else unpredictable.
 350  */
 351 static int
 352 UTF8ToUTF8(unsigned char* out, int *outlen,
 353            const unsigned char* inb, int *inlenb)
 354 {
 355     int len;
 356
 357     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
 358         return(-1);
 359     if (inb == NULL) {
 360         /* inb == NULL means output is initialized. */
 361         *outlen = 0;
 362         *inlenb = 0;
 363         return(0);
 364     }
 365     if (*outlen > *inlenb) {
 366         len = *inlenb;
 367     } else {
 368         len = *outlen;
 369     }
 370     if (len < 0)
 371         return(-1);
 372
 373     memcpy(out, inb, len);
 374
 375     *outlen = len;
 376     *inlenb = len;
 377     return(*outlen);
 378 }
 379
 380
 381 #ifdef LIBXML_OUTPUT_ENABLED
 382 /**
 383  * UTF8Toisolat1:
 384  * @out:  a pointer to an array of bytes to store the result
 385  * @outlen:  the length of @out
 386  * @in:  a pointer to an array of UTF-8 chars
 387  * @inlen:  the length of @in
 388  *
 389  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
 390  * block of chars out.
 391  *
 392  * Returns the number of bytes written if success, -2 if the transcoding fails,
 393            or -1 otherwise
 394  * The value of @inlen after return is the number of octets consumed
 395  *     if the return value is positive, else unpredictable.
 396  * The value of @outlen after return is the number of octets consumed.
 397  */
 398 int
 399 UTF8Toisolat1(unsigned char* out, int *outlen,
 400               const unsigned char* in, int *inlen) {
 401     const unsigned char* processed = in;
 402     const unsigned char* outend;
 403     const unsigned char* outstart = out;
 404     const unsigned char* instart = in;
 405     const unsigned char* inend;
 406     unsigned int c, d;
 407     int trailing;
 408
 409     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 410     if (in == NULL) {
 411         /*
 412          * initialization nothing to do
 413          */
 414         *outlen = 0;
 415         *inlen = 0;
 416         return(0);
 417     }
 418     inend = in + (*inlen);
 419     outend = out + (*outlen);
 420     while (in < inend) {
 421         d = *in++;
 422         if      (d < 0x80)  { c= d; trailing= 0; }
 423         else if (d < 0xC0) {
 424             /* trailing byte in leading position */
 425             *outlen = out - outstart;
 426             *inlen = processed - instart;
 427             return(-2);
 428         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 429         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 430         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 431         else {
 432             /* no chance for this in IsoLat1 */
 433             *outlen = out - outstart;
 434             *inlen = processed - instart;
 435             return(-2);
 436         }
 437
 438         if (inend - in < trailing) {
 439             break;
 440         }
 441
 442         for ( ; trailing; trailing--) {
 443             if (in >= inend)
 444                 break;
 445             if (((d= *in++) & 0xC0) != 0x80) {
 446                 *outlen = out - outstart;
 447                 *inlen = processed - instart;
 448                 return(-2);
 449             }
 450             c <<= 6;
 451             c |= d & 0x3F;
 452         }
 453
 454         /* assertion: c is a single UTF-4 value */
 455         if (c <= 0xFF) {
 456             if (out >= outend)
 457                 break;
 458             *out++ = c;
 459         } else {
 460             /* no chance for this in IsoLat1 */
 461             *outlen = out - outstart;
 462             *inlen = processed - instart;
 463             return(-2);
 464         }
 465         processed = in;
 466     }
 467     *outlen = out - outstart;
 468     *inlen = processed - instart;
 469     return(*outlen);
 470 }
 471 #endif /* LIBXML_OUTPUT_ENABLED */
 472
 473 /**
 474  * UTF16LEToUTF8:
 475  * @out:  a pointer to an array of bytes to store the result
 476  * @outlen:  the length of @out
 477  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
 478  * @inlenb:  the length of @in in UTF-16LE chars
 479  *
 480  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
 481  * block of chars out. This function assumes the endian property
 482  * is the same between the native type of this machine and the
 483  * inputed one.
 484  *
 485  * Returns the number of bytes written, or -1 if lack of space, or -2
 486  *     if the transcoding fails (if *in is not a valid utf16 string)
 487  *     The value of *inlen after return is the number of octets consumed
 488  *     if the return value is positive, else unpredictable.
 489  */
 490 static int
 491 UTF16LEToUTF8(unsigned char* out, int *outlen,
 492             const unsigned char* inb, int *inlenb)
 493 {
 494     unsigned char* outstart = out;
 495     const unsigned char* processed = inb;
 496     unsigned char* outend = out + *outlen;
 497     unsigned short* in = (unsigned short*) inb;
 498     unsigned short* inend;
 499     unsigned int c, d, inlen;
 500     unsigned char *tmp;
 501     int bits;
 502
 503     if ((*inlenb % 2) == 1)
 504         (*inlenb)--;
 505     inlen = *inlenb / 2;
 506     inend = in + inlen;
 507     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 508         if (xmlLittleEndian) {
 509             c= *in++;
 510         } else {
 511             tmp = (unsigned char *) in;
 512             c = *tmp++;
 513             c = c | (((unsigned int)*tmp) << 8);
 514             in++;
 515         }
 516         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 517             if (in >= inend) {           /* (in > inend) shouldn't happens */
 518                 break;
 519             }
 520             if (xmlLittleEndian) {
 521                 d = *in++;
 522             } else {
 523                 tmp = (unsigned char *) in;
 524                 d = *tmp++;
 525                 d = d | (((unsigned int)*tmp) << 8);
 526                 in++;
 527             }
 528             if ((d & 0xFC00) == 0xDC00) {
 529                 c &= 0x03FF;
 530                 c <<= 10;
 531                 c |= d & 0x03FF;
 532                 c += 0x10000;
 533             }
 534             else {
 535                 *outlen = out - outstart;
 536                 *inlenb = processed - inb;
 537                 return(-2);
 538             }
 539         }
 540
 541         /* assertion: c is a single UTF-4 value */
 542         if (out >= outend)
 543             break;
 544         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 545         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 546         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 547         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 548
 549         for ( ; bits >= 0; bits-= 6) {
 550             if (out >= outend)
 551                 break;
 552             *out++= ((c >> bits) & 0x3F) | 0x80;
 553         }
 554         processed = (const unsigned char*) in;
 555     }
 556     *outlen = out - outstart;
 557     *inlenb = processed - inb;
 558     return(*outlen);
 559 }
 560
 561 #ifdef LIBXML_OUTPUT_ENABLED
 562 /**
 563  * UTF8ToUTF16LE:
 564  * @outb:  a pointer to an array of bytes to store the result
 565  * @outlen:  the length of @outb
 566  * @in:  a pointer to an array of UTF-8 chars
 567  * @inlen:  the length of @in
 568  *
 569  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
 570  * block of chars out.
 571  *
 572  * Returns the number of bytes written, or -1 if lack of space, or -2
 573  *     if the transcoding failed.
 574  */
 575 static int
 576 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
 577             const unsigned char* in, int *inlen)
 578 {
 579     unsigned short* out = (unsigned short*) outb;
 580     const unsigned char* processed = in;
 581     const unsigned char *const instart = in;
 582     unsigned short* outstart= out;
 583     unsigned short* outend;
 584     const unsigned char* inend;
 585     unsigned int c, d;
 586     int trailing;
 587     unsigned char *tmp;
 588     unsigned short tmp1, tmp2;
 589
 590     /* UTF16LE encoding has no BOM */
 591     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 592     if (in == NULL) {
 593         *outlen = 0;
 594         *inlen = 0;
 595         return(0);
 596     }
 597     inend= in + *inlen;
 598     outend = out + (*outlen / 2);
 599     while (in < inend) {
 600       d= *in++;
 601       if      (d < 0x80)  { c= d; trailing= 0; }
 602       else if (d < 0xC0) {
 603           /* trailing byte in leading position */
 604           *outlen = (out - outstart) * 2;
 605           *inlen = processed - instart;
 606           return(-2);
 607       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 608       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 609       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 610       else {
 611         /* no chance for this in UTF-16 */
 612         *outlen = (out - outstart) * 2;
 613         *inlen = processed - instart;
 614         return(-2);
 615       }
 616
 617       if (inend - in < trailing) {
 618           break;
 619       }
 620
 621       for ( ; trailing; trailing--) {
 622           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 623               break;
 624           c <<= 6;
 625           c |= d & 0x3F;
 626       }
 627
 628       /* assertion: c is a single UTF-4 value */
 629         if (c < 0x10000) {
 630             if (out >= outend)
 631                 break;
 632             if (xmlLittleEndian) {
 633                 *out++ = c;
 634             } else {
 635                 tmp = (unsigned char *) out;
 636                 *tmp = c ;
 637                 *(tmp + 1) = c >> 8 ;
 638                 out++;
 639             }
 640         }
 641         else if (c < 0x110000) {
 642             if (out+1 >= outend)
 643                 break;
 644             c -= 0x10000;
 645             if (xmlLittleEndian) {
 646                 *out++ = 0xD800 | (c >> 10);
 647                 *out++ = 0xDC00 | (c & 0x03FF);
 648             } else {
 649                 tmp1 = 0xD800 | (c >> 10);
 650                 tmp = (unsigned char *) out;
 651                 *tmp = (unsigned char) tmp1;
 652                 *(tmp + 1) = tmp1 >> 8;
 653                 out++;
 654
 655                 tmp2 = 0xDC00 | (c & 0x03FF);
 656                 tmp = (unsigned char *) out;
 657                 *tmp  = (unsigned char) tmp2;
 658                 *(tmp + 1) = tmp2 >> 8;
 659                 out++;
 660             }
 661         }
 662         else
 663             break;
 664         processed = in;
 665     }
 666     *outlen = (out - outstart) * 2;
 667     *inlen = processed - instart;
 668     return(*outlen);
 669 }
 670
 671 /**
 672  * UTF8ToUTF16:
 673  * @outb:  a pointer to an array of bytes to store the result
 674  * @outlen:  the length of @outb
 675  * @in:  a pointer to an array of UTF-8 chars
 676  * @inlen:  the length of @in
 677  *
 678  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
 679  * block of chars out.
 680  *
 681  * Returns the number of bytes written, or -1 if lack of space, or -2
 682  *     if the transcoding failed.
 683  */
 684 static int
 685 UTF8ToUTF16(unsigned char* outb, int *outlen,
 686             const unsigned char* in, int *inlen)
 687 {
 688     if (in == NULL) {
 689         /*
 690          * initialization, add the Byte Order Mark for UTF-16LE
 691          */
 692         if (*outlen >= 2) {
 693             outb[0] = 0xFF;
 694             outb[1] = 0xFE;
 695             *outlen = 2;
 696             *inlen = 0;
 697 #ifdef DEBUG_ENCODING
 698             xmlGenericError(xmlGenericErrorContext,
 699                     "Added FFFE Byte Order Mark\n");
 700 #endif
 701             return(2);
 702         }
 703         *outlen = 0;
 704         *inlen = 0;
 705         return(0);
 706     }
 707     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
 708 }
 709 #endif /* LIBXML_OUTPUT_ENABLED */
 710
 711 /**
 712  * UTF16BEToUTF8:
 713  * @out:  a pointer to an array of bytes to store the result
 714  * @outlen:  the length of @out
 715  * @inb:  a pointer to an array of UTF-16 passed as a byte array
 716  * @inlenb:  the length of @in in UTF-16 chars
 717  *
 718  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
 719  * block of chars out. This function assumes the endian property
 720  * is the same between the native type of this machine and the
 721  * inputed one.
 722  *
 723  * Returns the number of bytes written, or -1 if lack of space, or -2
 724  *     if the transcoding fails (if *in is not a valid utf16 string)
 725  * The value of *inlen after return is the number of octets consumed
 726  *     if the return value is positive, else unpredictable.
 727  */
 728 static int
 729 UTF16BEToUTF8(unsigned char* out, int *outlen,
 730             const unsigned char* inb, int *inlenb)
 731 {
 732     unsigned char* outstart = out;
 733     const unsigned char* processed = inb;
 734     unsigned char* outend = out + *outlen;
 735     unsigned short* in = (unsigned short*) inb;
 736     unsigned short* inend;
 737     unsigned int c, d, inlen;
 738     unsigned char *tmp;
 739     int bits;
 740
 741     if ((*inlenb % 2) == 1)
 742         (*inlenb)--;
 743     inlen = *inlenb / 2;
 744     inend= in + inlen;
 745     while (in < inend) {
 746         if (xmlLittleEndian) {
 747             tmp = (unsigned char *) in;
 748             c = *tmp++;
 749             c = c << 8;
 750             c = c | (unsigned int) *tmp;
 751             in++;
 752         } else {
 753             c= *in++;
 754         }
 755         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 756             if (in >= inend) {           /* (in > inend) shouldn't happens */
 757                 *outlen = out - outstart;
 758                 *inlenb = processed - inb;
 759                 return(-2);
 760             }
 761             if (xmlLittleEndian) {
 762                 tmp = (unsigned char *) in;
 763                 d = *tmp++;
 764                 d = d << 8;
 765                 d = d | (unsigned int) *tmp;
 766                 in++;
 767             } else {
 768                 d= *in++;
 769             }
 770             if ((d & 0xFC00) == 0xDC00) {
 771                 c &= 0x03FF;
 772                 c <<= 10;
 773                 c |= d & 0x03FF;
 774                 c += 0x10000;
 775             }
 776             else {
 777                 *outlen = out - outstart;
 778                 *inlenb = processed - inb;
 779                 return(-2);
 780             }
 781         }
 782
 783         /* assertion: c is a single UTF-4 value */
 784         if (out >= outend)
 785             break;
 786         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 787         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 788         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 789         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 790
 791         for ( ; bits >= 0; bits-= 6) {
 792             if (out >= outend)
 793                 break;
 794             *out++= ((c >> bits) & 0x3F) | 0x80;
 795         }
 796         processed = (const unsigned char*) in;
 797     }
 798     *outlen = out - outstart;
 799     *inlenb = processed - inb;
 800     return(*outlen);
 801 }
 802
 803 #ifdef LIBXML_OUTPUT_ENABLED
 804 /**
 805  * UTF8ToUTF16BE:
 806  * @outb:  a pointer to an array of bytes to store the result
 807  * @outlen:  the length of @outb
 808  * @in:  a pointer to an array of UTF-8 chars
 809  * @inlen:  the length of @in
 810  *
 811  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
 812  * block of chars out.
 813  *
 814  * Returns the number of byte written, or -1 by lack of space, or -2
 815  *     if the transcoding failed.
 816  */
 817 static int
 818 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
 819             const unsigned char* in, int *inlen)
 820 {
 821     unsigned short* out = (unsigned short*) outb;
 822     const unsigned char* processed = in;
 823     const unsigned char *const instart = in;
 824     unsigned short* outstart= out;
 825     unsigned short* outend;
 826     const unsigned char* inend;
 827     unsigned int c, d;
 828     int trailing;
 829     unsigned char *tmp;
 830     unsigned short tmp1, tmp2;
 831
 832     /* UTF-16BE has no BOM */
 833     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 834     if (in == NULL) {
 835         *outlen = 0;
 836         *inlen = 0;
 837         return(0);
 838     }
 839     inend= in + *inlen;
 840     outend = out + (*outlen / 2);
 841     while (in < inend) {
 842       d= *in++;
 843       if      (d < 0x80)  { c= d; trailing= 0; }
 844       else if (d < 0xC0)  {
 845           /* trailing byte in leading position */
 846           *outlen = out - outstart;
 847           *inlen = processed - instart;
 848           return(-2);
 849       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 850       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 851       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 852       else {
 853           /* no chance for this in UTF-16 */
 854           *outlen = out - outstart;
 855           *inlen = processed - instart;
 856           return(-2);
 857       }
 858
 859       if (inend - in < trailing) {
 860           break;
 861       }
 862
 863       for ( ; trailing; trailing--) {
 864           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
 865           c <<= 6;
 866           c |= d & 0x3F;
 867       }
 868
 869       /* assertion: c is a single UTF-4 value */
 870         if (c < 0x10000) {
 871             if (out >= outend)  break;
 872             if (xmlLittleEndian) {
 873                 tmp = (unsigned char *) out;
 874                 *tmp = c >> 8;
 875                 *(tmp + 1) = c;
 876                 out++;
 877             } else {
 878                 *out++ = c;
 879             }
 880         }
 881         else if (c < 0x110000) {
 882             if (out+1 >= outend)  break;
 883             c -= 0x10000;
 884             if (xmlLittleEndian) {
 885                 tmp1 = 0xD800 | (c >> 10);
 886                 tmp = (unsigned char *) out;
 887                 *tmp = tmp1 >> 8;
 888                 *(tmp + 1) = (unsigned char) tmp1;
 889                 out++;
 890
 891                 tmp2 = 0xDC00 | (c & 0x03FF);
 892                 tmp = (unsigned char *) out;
 893                 *tmp = tmp2 >> 8;
 894                 *(tmp + 1) = (unsigned char) tmp2;
 895                 out++;
 896             } else {
 897                 *out++ = 0xD800 | (c >> 10);
 898                 *out++ = 0xDC00 | (c & 0x03FF);
 899             }
 900         }
 901         else
 902             break;
 903         processed = in;
 904     }
 905     *outlen = (out - outstart) * 2;
 906     *inlen = processed - instart;
 907     return(*outlen);
 908 }
 909 #endif /* LIBXML_OUTPUT_ENABLED */
 910
 911 /************************************************************************
 912  *                                                                      *
 913  *              Generic encoding handling routines                      *
 914  *                                                                      *
 915  ************************************************************************/
 916
 917 /**
 918  * xmlDetectCharEncoding:
 919  * @in:  a pointer to the first bytes of the XML entity, must be at least
 920  *       2 bytes long (at least 4 if encoding is UTF4 variant).
 921  * @len:  pointer to the length of the buffer
 922  *
 923  * Guess the encoding of the entity using the first bytes of the entity content
 924  * according to the non-normative appendix F of the XML-1.0 recommendation.
 925  *
 926  * Returns one of the XML_CHAR_ENCODING_... values.
 927  */
 928 xmlCharEncoding
 929 xmlDetectCharEncoding(const unsigned char* in, int len)
 930 {
 931     if (in == NULL)
 932         return(XML_CHAR_ENCODING_NONE);
 933     if (len >= 4) {
 934         if ((in[0] == 0x00) && (in[1] == 0x00) &&
 935             (in[2] == 0x00) && (in[3] == 0x3C))
 936             return(XML_CHAR_ENCODING_UCS4BE);
 937         if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 938             (in[2] == 0x00) && (in[3] == 0x00))
 939             return(XML_CHAR_ENCODING_UCS4LE);
 940         if ((in[0] == 0x00) && (in[1] == 0x00) &&
 941             (in[2] == 0x3C) && (in[3] == 0x00))
 942             return(XML_CHAR_ENCODING_UCS4_2143);
 943         if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 944             (in[2] == 0x00) && (in[3] == 0x00))
 945             return(XML_CHAR_ENCODING_UCS4_3412);
 946         if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
 947             (in[2] == 0xA7) && (in[3] == 0x94))
 948             return(XML_CHAR_ENCODING_EBCDIC);
 949         if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
 950             (in[2] == 0x78) && (in[3] == 0x6D))
 951             return(XML_CHAR_ENCODING_UTF8);
 952         /*
 953          * Although not part of the recommendation, we also
 954          * attempt an "auto-recognition" of UTF-16LE and
 955          * UTF-16BE encodings.
 956          */
 957         if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 958             (in[2] == 0x3F) && (in[3] == 0x00))
 959             return(XML_CHAR_ENCODING_UTF16LE);
 960         if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 961             (in[2] == 0x00) && (in[3] == 0x3F))
 962             return(XML_CHAR_ENCODING_UTF16BE);
 963     }
 964     if (len >= 3) {
 965         /*
 966          * Errata on XML-1.0 June 20 2001
 967          * We now allow an UTF8 encoded BOM
 968          */
 969         if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
 970             (in[2] == 0xBF))
 971             return(XML_CHAR_ENCODING_UTF8);
 972     }
 973     /* For UTF-16 we can recognize by the BOM */
 974     if (len >= 2) {
 975         if ((in[0] == 0xFE) && (in[1] == 0xFF))
 976             return(XML_CHAR_ENCODING_UTF16BE);
 977         if ((in[0] == 0xFF) && (in[1] == 0xFE))
 978             return(XML_CHAR_ENCODING_UTF16LE);
 979     }
 980     return(XML_CHAR_ENCODING_NONE);
 981 }
 982
 983 /**
 984  * xmlCleanupEncodingAliases:
 985  *
 986  * Unregisters all aliases
 987  */
 988 void
 989 xmlCleanupEncodingAliases(void) {
 990     int i;
 991
 992     if (xmlCharEncodingAliases == NULL)
 993         return;
 994
 995     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
 996         if (xmlCharEncodingAliases[i].name != NULL)
 997             xmlFree((char *) xmlCharEncodingAliases[i].name);
 998         if (xmlCharEncodingAliases[i].alias != NULL)
 999             xmlFree((char *) xmlCharEncodingAliases[i].alias);
1000     }
1001     xmlCharEncodingAliasesNb = 0;
1002     xmlCharEncodingAliasesMax = 0;
1003     xmlFree(xmlCharEncodingAliases);
1004     xmlCharEncodingAliases = NULL;
1005 }
1006
1007 /**
1008  * xmlGetEncodingAlias:
1009  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1010  *
1011  * Lookup an encoding name for the given alias.
1012  *
1013  * Returns NULL if not found, otherwise the original name
1014  */
1015 const char *
1016 xmlGetEncodingAlias(const char *alias) {
1017     int i;
1018     char upper[100];
1019
1020     if (alias == NULL)
1021         return(NULL);
1022
1023     if (xmlCharEncodingAliases == NULL)
1024         return(NULL);
1025
1026     for (i = 0;i < 99;i++) {
1027         upper[i] = toupper(alias[i]);
1028         if (upper[i] == 0) break;
1029     }
1030     upper[i] = 0;
1031
1032     /*
1033      * Walk down the list looking for a definition of the alias
1034      */
1035     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1036         if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1037             return(xmlCharEncodingAliases[i].name);
1038         }
1039     }
1040     return(NULL);
1041 }
1042
1043 /**
1044  * xmlAddEncodingAlias:
1045  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1046  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1047  *
1048  * Registers an alias @alias for an encoding named @name. Existing alias
1049  * will be overwritten.
1050  *
1051  * Returns 0 in case of success, -1 in case of error
1052  */
1053 int
1054 xmlAddEncodingAlias(const char *name, const char *alias) {
1055     int i;
1056     char upper[100];
1057
1058     if ((name == NULL) || (alias == NULL))
1059         return(-1);
1060
1061     for (i = 0;i < 99;i++) {
1062         upper[i] = toupper(alias[i]);
1063         if (upper[i] == 0) break;
1064     }
1065     upper[i] = 0;
1066
1067     if (xmlCharEncodingAliases == NULL) {
1068         xmlCharEncodingAliasesNb = 0;
1069         xmlCharEncodingAliasesMax = 20;
1070         xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1071               xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1072         if (xmlCharEncodingAliases == NULL)
1073             return(-1);
1074     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1075         xmlCharEncodingAliasesMax *= 2;
1076         xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1077               xmlRealloc(xmlCharEncodingAliases,
1078                          xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1079     }
1080     /*
1081      * Walk down the list looking for a definition of the alias
1082      */
1083     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1084         if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1085             /*
1086              * Replace the definition.
1087              */
1088             xmlFree((char *) xmlCharEncodingAliases[i].name);
1089             xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1090             return(0);
1091         }
1092     }
1093     /*
1094      * Add the definition
1095      */
1096     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1097     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1098     xmlCharEncodingAliasesNb++;
1099     return(0);
1100 }
1101
1102 /**
1103  * xmlDelEncodingAlias:
1104  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1105  *
1106  * Unregisters an encoding alias @alias
1107  *
1108  * Returns 0 in case of success, -1 in case of error
1109  */
1110 int
1111 xmlDelEncodingAlias(const char *alias) {
1112     int i;
1113
1114     if (alias == NULL)
1115         return(-1);
1116
1117     if (xmlCharEncodingAliases == NULL)
1118         return(-1);
1119     /*
1120      * Walk down the list looking for a definition of the alias
1121      */
1122     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1123         if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1124             xmlFree((char *) xmlCharEncodingAliases[i].name);
1125             xmlFree((char *) xmlCharEncodingAliases[i].alias);
1126             xmlCharEncodingAliasesNb--;
1127             memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1128                     sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1129             return(0);
1130         }
1131     }
1132     return(-1);
1133 }
1134
1135 /**
1136  * xmlParseCharEncoding:
1137  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1138  *
1139  * Compare the string to the encoding schemes already known. Note
1140  * that the comparison is case insensitive accordingly to the section
1141  * [XML] 4.3.3 Character Encoding in Entities.
1142  *
1143  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1144  * if not recognized.
1145  */
1146 xmlCharEncoding
1147 xmlParseCharEncoding(const char* name)
1148 {
1149     const char *alias;
1150     char upper[500];
1151     int i;
1152
1153     if (name == NULL)
1154         return(XML_CHAR_ENCODING_NONE);
1155
1156     /*
1157      * Do the alias resolution
1158      */
1159     alias = xmlGetEncodingAlias(name);
1160     if (alias != NULL)
1161         name = alias;
1162
1163     for (i = 0;i < 499;i++) {
1164         upper[i] = toupper(name[i]);
1165         if (upper[i] == 0) break;
1166     }
1167     upper[i] = 0;
1168
1169     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1170     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1171     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1172
1173     /*
1174      * NOTE: if we were able to parse this, the endianness of UTF16 is
1175      *       already found and in use
1176      */
1177     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1178     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1179
1180     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1181     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1182     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1183
1184     /*
1185      * NOTE: if we were able to parse this, the endianness of UCS4 is
1186      *       already found and in use
1187      */
1188     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1189     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1190     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1191
1192
1193     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1194     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1195     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1196
1197     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1198     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1199     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1200
1201     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1202     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1203     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1204     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1205     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1206     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1207     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1208
1209     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1210     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1211     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1212
1213 #ifdef DEBUG_ENCODING
1214     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1215 #endif
1216     return(XML_CHAR_ENCODING_ERROR);
1217 }
1218
1219 /**
1220  * xmlGetCharEncodingName:
1221  * @enc:  the encoding
1222  *
1223  * The "canonical" name for XML encoding.
1224  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1225  * Section 4.3.3  Character Encoding in Entities
1226  *
1227  * Returns the canonical name for the given encoding
1228  */
1229
1230 const char*
1231 xmlGetCharEncodingName(xmlCharEncoding enc) {
1232     switch (enc) {
1233         case XML_CHAR_ENCODING_ERROR:
1234             return(NULL);
1235         case XML_CHAR_ENCODING_NONE:
1236             return(NULL);
1237         case XML_CHAR_ENCODING_UTF8:
1238             return("UTF-8");
1239         case XML_CHAR_ENCODING_UTF16LE:
1240             return("UTF-16");
1241         case XML_CHAR_ENCODING_UTF16BE:
1242             return("UTF-16");
1243         case XML_CHAR_ENCODING_EBCDIC:
1244             return("EBCDIC");
1245         case XML_CHAR_ENCODING_UCS4LE:
1246             return("ISO-10646-UCS-4");
1247         case XML_CHAR_ENCODING_UCS4BE:
1248             return("ISO-10646-UCS-4");
1249         case XML_CHAR_ENCODING_UCS4_2143:
1250             return("ISO-10646-UCS-4");
1251         case XML_CHAR_ENCODING_UCS4_3412:
1252             return("ISO-10646-UCS-4");
1253         case XML_CHAR_ENCODING_UCS2:
1254             return("ISO-10646-UCS-2");
1255         case XML_CHAR_ENCODING_8859_1:
1256             return("ISO-8859-1");
1257         case XML_CHAR_ENCODING_8859_2:
1258             return("ISO-8859-2");
1259         case XML_CHAR_ENCODING_8859_3:
1260             return("ISO-8859-3");
1261         case XML_CHAR_ENCODING_8859_4:
1262             return("ISO-8859-4");
1263         case XML_CHAR_ENCODING_8859_5:
1264             return("ISO-8859-5");
1265         case XML_CHAR_ENCODING_8859_6:
1266             return("ISO-8859-6");
1267         case XML_CHAR_ENCODING_8859_7:
1268             return("ISO-8859-7");
1269         case XML_CHAR_ENCODING_8859_8:
1270             return("ISO-8859-8");
1271         case XML_CHAR_ENCODING_8859_9:
1272             return("ISO-8859-9");
1273         case XML_CHAR_ENCODING_2022_JP:
1274             return("ISO-2022-JP");
1275         case XML_CHAR_ENCODING_SHIFT_JIS:
1276             return("Shift-JIS");
1277         case XML_CHAR_ENCODING_EUC_JP:
1278             return("EUC-JP");
1279         case XML_CHAR_ENCODING_ASCII:
1280             return(NULL);
1281     }
1282     return(NULL);
1283 }
1284
1285 /************************************************************************
1286  *                                                                      *
1287  *                      Char encoding handlers                          *
1288  *                                                                      *
1289  ************************************************************************/
1290
1291
1292 /* the size should be growable, but it's not a big deal ... */
1293 #define MAX_ENCODING_HANDLERS 50
1294 static xmlCharEncodingHandlerPtr *handlers = NULL;
1295 static int nbCharEncodingHandler = 0;
1296
1297 /*
1298  * The default is UTF-8 for XML, that's also the default used for the
1299  * parser internals, so the default encoding handler is NULL
1300  */
1301
1302 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1303
1304 /**
1305  * xmlNewCharEncodingHandler:
1306  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1307  * @input:  the xmlCharEncodingInputFunc to read that encoding
1308  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1309  *
1310  * Create and registers an xmlCharEncodingHandler.
1311  *
1312  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1313  */
1314 xmlCharEncodingHandlerPtr
1315 xmlNewCharEncodingHandler(const char *name,
1316                           xmlCharEncodingInputFunc input,
1317                           xmlCharEncodingOutputFunc output) {
1318     xmlCharEncodingHandlerPtr handler;
1319     const char *alias;
1320     char upper[500];
1321     int i;
1322     char *up = NULL;
1323
1324     /*
1325      * Do the alias resolution
1326      */
1327     alias = xmlGetEncodingAlias(name);
1328     if (alias != NULL)
1329         name = alias;
1330
1331     /*
1332      * Keep only the uppercase version of the encoding.
1333      */
1334     if (name == NULL) {
1335         xmlEncodingErr(XML_I18N_NO_NAME,
1336                        "xmlNewCharEncodingHandler : no name !\n", NULL);
1337         return(NULL);
1338     }
1339     for (i = 0;i < 499;i++) {
1340         upper[i] = toupper(name[i]);
1341         if (upper[i] == 0) break;
1342     }
1343     upper[i] = 0;
1344     up = xmlMemStrdup(upper);
1345     if (up == NULL) {
1346         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1347         return(NULL);
1348     }
1349
1350     /*
1351      * allocate and fill-up an handler block.
1352      */
1353     handler = (xmlCharEncodingHandlerPtr)
1354               xmlMalloc(sizeof(xmlCharEncodingHandler));
1355     if (handler == NULL) {
1356         xmlFree(up);
1357         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1358         return(NULL);
1359     }
1360     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1361     handler->input = input;
1362     handler->output = output;
1363     handler->name = up;
1364
1365 #ifdef LIBXML_ICONV_ENABLED
1366     handler->iconv_in = NULL;
1367     handler->iconv_out = NULL;
1368 #endif
1369 #ifdef LIBXML_ICU_ENABLED
1370     handler->uconv_in = NULL;
1371     handler->uconv_out = NULL;
1372 #endif
1373
1374     /*
1375      * registers and returns the handler.
1376      */
1377     xmlRegisterCharEncodingHandler(handler);
1378 #ifdef DEBUG_ENCODING
1379     xmlGenericError(xmlGenericErrorContext,
1380             "Registered encoding handler for %s\n", name);
1381 #endif
1382     return(handler);
1383 }
1384
1385 /**
1386  * xmlInitCharEncodingHandlers:
1387  *
1388  * Initialize the char encoding support, it registers the default
1389  * encoding supported.
1390  * NOTE: while public, this function usually doesn't need to be called
1391  *       in normal processing.
1392  */
1393 void
1394 xmlInitCharEncodingHandlers(void) {
1395     unsigned short int tst = 0x1234;
1396     unsigned char *ptr = (unsigned char *) &tst;
1397
1398     if (handlers != NULL) return;
1399
1400     handlers = (xmlCharEncodingHandlerPtr *)
1401         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1402
1403     if (*ptr == 0x12) xmlLittleEndian = 0;
1404     else if (*ptr == 0x34) xmlLittleEndian = 1;
1405     else {
1406         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1407                        "Odd problem at endianness detection\n", NULL);
1408     }
1409
1410     if (handlers == NULL) {
1411         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1412         return;
1413     }
1414     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1415 #ifdef LIBXML_OUTPUT_ENABLED
1416     xmlUTF16LEHandler =
1417           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1418     xmlUTF16BEHandler =
1419           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1420     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1421     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1422     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1423     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1424 #ifdef LIBXML_HTML_ENABLED
1425     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1426 #endif
1427 #else
1428     xmlUTF16LEHandler =
1429           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1430     xmlUTF16BEHandler =
1431           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1432     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1433     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1434     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1435     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1436 #endif /* LIBXML_OUTPUT_ENABLED */
1437 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1438 #ifdef LIBXML_ISO8859X_ENABLED
1439     xmlRegisterCharEncodingHandlersISO8859x ();
1440 #endif
1441 #endif
1442
1443 }
1444
1445 /**
1446  * xmlCleanupCharEncodingHandlers:
1447  *
1448  * Cleanup the memory allocated for the char encoding support, it
1449  * unregisters all the encoding handlers and the aliases.
1450  */
1451 void
1452 xmlCleanupCharEncodingHandlers(void) {
1453     xmlCleanupEncodingAliases();
1454
1455     if (handlers == NULL) return;
1456
1457     for (;nbCharEncodingHandler > 0;) {
1458         nbCharEncodingHandler--;
1459         if (handlers[nbCharEncodingHandler] != NULL) {
1460             if (handlers[nbCharEncodingHandler]->name != NULL)
1461                 xmlFree(handlers[nbCharEncodingHandler]->name);
1462             xmlFree(handlers[nbCharEncodingHandler]);
1463         }
1464     }
1465     xmlFree(handlers);
1466     handlers = NULL;
1467     nbCharEncodingHandler = 0;
1468     xmlDefaultCharEncodingHandler = NULL;
1469 }
1470
1471 /**
1472  * xmlRegisterCharEncodingHandler:
1473  * @handler:  the xmlCharEncodingHandlerPtr handler block
1474  *
1475  * Register the char encoding handler, surprising, isn't it ?
1476  */
1477 void
1478 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1479     if (handlers == NULL) xmlInitCharEncodingHandlers();
1480     if ((handler == NULL) || (handlers == NULL)) {
1481         xmlEncodingErr(XML_I18N_NO_HANDLER,
1482                 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1483         return;
1484     }
1485
1486     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1487         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1488         "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1489                        "MAX_ENCODING_HANDLERS");
1490         return;
1491     }
1492     handlers[nbCharEncodingHandler++] = handler;
1493 }
1494
1495 /**
1496  * xmlGetCharEncodingHandler:
1497  * @enc:  an xmlCharEncoding value.
1498  *
1499  * Search in the registered set the handler able to read/write that encoding.
1500  *
1501  * Returns the handler or NULL if not found
1502  */
1503 xmlCharEncodingHandlerPtr
1504 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1505     xmlCharEncodingHandlerPtr handler;
1506
1507     if (handlers == NULL) xmlInitCharEncodingHandlers();
1508     switch (enc) {
1509         case XML_CHAR_ENCODING_ERROR:
1510             return(NULL);
1511         case XML_CHAR_ENCODING_NONE:
1512             return(NULL);
1513         case XML_CHAR_ENCODING_UTF8:
1514             return(NULL);
1515         case XML_CHAR_ENCODING_UTF16LE:
1516             return(xmlUTF16LEHandler);
1517         case XML_CHAR_ENCODING_UTF16BE:
1518             return(xmlUTF16BEHandler);
1519         case XML_CHAR_ENCODING_EBCDIC:
1520             handler = xmlFindCharEncodingHandler("EBCDIC");
1521             if (handler != NULL) return(handler);
1522             handler = xmlFindCharEncodingHandler("ebcdic");
1523             if (handler != NULL) return(handler);
1524             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1525             if (handler != NULL) return(handler);
1526             handler = xmlFindCharEncodingHandler("IBM-037");
1527             if (handler != NULL) return(handler);
1528             break;
1529         case XML_CHAR_ENCODING_UCS4BE:
1530             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1531             if (handler != NULL) return(handler);
1532             handler = xmlFindCharEncodingHandler("UCS-4");
1533             if (handler != NULL) return(handler);
1534             handler = xmlFindCharEncodingHandler("UCS4");
1535             if (handler != NULL) return(handler);
1536             break;
1537         case XML_CHAR_ENCODING_UCS4LE:
1538             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1539             if (handler != NULL) return(handler);
1540             handler = xmlFindCharEncodingHandler("UCS-4");
1541             if (handler != NULL) return(handler);
1542             handler = xmlFindCharEncodingHandler("UCS4");
1543             if (handler != NULL) return(handler);
1544             break;
1545         case XML_CHAR_ENCODING_UCS4_2143:
1546             break;
1547         case XML_CHAR_ENCODING_UCS4_3412:
1548             break;
1549         case XML_CHAR_ENCODING_UCS2:
1550             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1551             if (handler != NULL) return(handler);
1552             handler = xmlFindCharEncodingHandler("UCS-2");
1553             if (handler != NULL) return(handler);
1554             handler = xmlFindCharEncodingHandler("UCS2");
1555             if (handler != NULL) return(handler);
1556             break;
1557
1558             /*
1559              * We used to keep ISO Latin encodings native in the
1560              * generated data. This led to so many problems that
1561              * this has been removed. One can still change this
1562              * back by registering no-ops encoders for those
1563              */
1564         case XML_CHAR_ENCODING_8859_1:
1565             handler = xmlFindCharEncodingHandler("ISO-8859-1");
1566             if (handler != NULL) return(handler);
1567             break;
1568         case XML_CHAR_ENCODING_8859_2:
1569             handler = xmlFindCharEncodingHandler("ISO-8859-2");
1570             if (handler != NULL) return(handler);
1571             break;
1572         case XML_CHAR_ENCODING_8859_3:
1573             handler = xmlFindCharEncodingHandler("ISO-8859-3");
1574             if (handler != NULL) return(handler);
1575             break;
1576         case XML_CHAR_ENCODING_8859_4:
1577             handler = xmlFindCharEncodingHandler("ISO-8859-4");
1578             if (handler != NULL) return(handler);
1579             break;
1580         case XML_CHAR_ENCODING_8859_5:
1581             handler = xmlFindCharEncodingHandler("ISO-8859-5");
1582             if (handler != NULL) return(handler);
1583             break;
1584         case XML_CHAR_ENCODING_8859_6:
1585             handler = xmlFindCharEncodingHandler("ISO-8859-6");
1586             if (handler != NULL) return(handler);
1587             break;
1588         case XML_CHAR_ENCODING_8859_7:
1589             handler = xmlFindCharEncodingHandler("ISO-8859-7");
1590             if (handler != NULL) return(handler);
1591             break;
1592         case XML_CHAR_ENCODING_8859_8:
1593             handler = xmlFindCharEncodingHandler("ISO-8859-8");
1594             if (handler != NULL) return(handler);
1595             break;
1596         case XML_CHAR_ENCODING_8859_9:
1597             handler = xmlFindCharEncodingHandler("ISO-8859-9");
1598             if (handler != NULL) return(handler);
1599             break;
1600
1601
1602         case XML_CHAR_ENCODING_2022_JP:
1603             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1604             if (handler != NULL) return(handler);
1605             break;
1606         case XML_CHAR_ENCODING_SHIFT_JIS:
1607             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1608             if (handler != NULL) return(handler);
1609             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1610             if (handler != NULL) return(handler);
1611             handler = xmlFindCharEncodingHandler("Shift_JIS");
1612             if (handler != NULL) return(handler);
1613             break;
1614         case XML_CHAR_ENCODING_EUC_JP:
1615             handler = xmlFindCharEncodingHandler("EUC-JP");
1616             if (handler != NULL) return(handler);
1617             break;
1618         default:
1619             break;
1620     }
1621
1622 #ifdef DEBUG_ENCODING
1623     xmlGenericError(xmlGenericErrorContext,
1624             "No handler found for encoding %d\n", enc);
1625 #endif
1626     return(NULL);
1627 }
1628
1629 /**
1630  * xmlFindCharEncodingHandler:
1631  * @name:  a string describing the char encoding.
1632  *
1633  * Search in the registered set the handler able to read/write that encoding.
1634  *
1635  * Returns the handler or NULL if not found
1636  */
1637 xmlCharEncodingHandlerPtr
1638 xmlFindCharEncodingHandler(const char *name) {
1639     const char *nalias;
1640     const char *norig;
1641     xmlCharEncoding alias;
1642 #ifdef LIBXML_ICONV_ENABLED
1643     xmlCharEncodingHandlerPtr enc;
1644     iconv_t icv_in, icv_out;
1645 #endif /* LIBXML_ICONV_ENABLED */
1646 #ifdef LIBXML_ICU_ENABLED
1647     xmlCharEncodingHandlerPtr encu;
1648     uconv_t *ucv_in, *ucv_out;
1649 #endif /* LIBXML_ICU_ENABLED */
1650     char upper[100];
1651     int i;
1652
1653     if (handlers == NULL) xmlInitCharEncodingHandlers();
1654     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1655     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1656
1657     /*
1658      * Do the alias resolution
1659      */
1660     norig = name;
1661     nalias = xmlGetEncodingAlias(name);
1662     if (nalias != NULL)
1663         name = nalias;
1664
1665     /*
1666      * Check first for directly registered encoding names
1667      */
1668     for (i = 0;i < 99;i++) {
1669         upper[i] = toupper(name[i]);
1670         if (upper[i] == 0) break;
1671     }
1672     upper[i] = 0;
1673
1674     if (handlers != NULL) {
1675         for (i = 0;i < nbCharEncodingHandler; i++) {
1676             if (!strcmp(upper, handlers[i]->name)) {
1677 #ifdef DEBUG_ENCODING
1678                 xmlGenericError(xmlGenericErrorContext,
1679                         "Found registered handler for encoding %s\n", name);
1680 #endif
1681                 return(handlers[i]);
1682             }
1683         }
1684     }
1685
1686 #ifdef LIBXML_ICONV_ENABLED
1687     /* check whether iconv can handle this */
1688     icv_in = iconv_open("UTF-8", name);
1689     icv_out = iconv_open(name, "UTF-8");
1690     if (icv_in == (iconv_t) -1) {
1691         icv_in = iconv_open("UTF-8", upper);
1692     }
1693     if (icv_out == (iconv_t) -1) {
1694         icv_out = iconv_open(upper, "UTF-8");
1695     }
1696     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1697             enc = (xmlCharEncodingHandlerPtr)
1698                   xmlMalloc(sizeof(xmlCharEncodingHandler));
1699             if (enc == NULL) {
1700                 iconv_close(icv_in);
1701                 iconv_close(icv_out);
1702                 return(NULL);
1703             }
1704             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1705             enc->name = xmlMemStrdup(name);
1706             enc->input = NULL;
1707             enc->output = NULL;
1708             enc->iconv_in = icv_in;
1709             enc->iconv_out = icv_out;
1710 #ifdef DEBUG_ENCODING
1711             xmlGenericError(xmlGenericErrorContext,
1712                     "Found iconv handler for encoding %s\n", name);
1713 #endif
1714             return enc;
1715     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1716             xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1717                     "iconv : problems with filters for '%s'\n", name);
1718     }
1719 #endif /* LIBXML_ICONV_ENABLED */
1720 #ifdef LIBXML_ICU_ENABLED
1721     /* check whether icu can handle this */
1722     ucv_in = openIcuConverter(name, 1);
1723     ucv_out = openIcuConverter(name, 0);
1724     if (ucv_in != NULL && ucv_out != NULL) {
1725             encu = (xmlCharEncodingHandlerPtr)
1726                    xmlMalloc(sizeof(xmlCharEncodingHandler));
1727             if (encu == NULL) {
1728                 closeIcuConverter(ucv_in);
1729                 closeIcuConverter(ucv_out);
1730                 return(NULL);
1731             }
1732             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1733             encu->name = xmlMemStrdup(name);
1734             encu->input = NULL;
1735             encu->output = NULL;
1736             encu->uconv_in = ucv_in;
1737             encu->uconv_out = ucv_out;
1738 #ifdef DEBUG_ENCODING
1739             xmlGenericError(xmlGenericErrorContext,
1740                     "Found ICU converter handler for encoding %s\n", name);
1741 #endif
1742             return encu;
1743     } else if (ucv_in != NULL || ucv_out != NULL) {
1744             closeIcuConverter(ucv_in);
1745             closeIcuConverter(ucv_out);
1746             xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1747                     "ICU converter : problems with filters for '%s'\n", name);
1748     }
1749 #endif /* LIBXML_ICU_ENABLED */
1750
1751 #ifdef DEBUG_ENCODING
1752     xmlGenericError(xmlGenericErrorContext,
1753             "No handler found for encoding %s\n", name);
1754 #endif
1755
1756     /*
1757      * Fallback using the canonical names
1758      */
1759     alias = xmlParseCharEncoding(norig);
1760     if (alias != XML_CHAR_ENCODING_ERROR) {
1761         const char* canon;
1762         canon = xmlGetCharEncodingName(alias);
1763         if ((canon != NULL) && (strcmp(name, canon))) {
1764             return(xmlFindCharEncodingHandler(canon));
1765         }
1766     }
1767
1768     /* If "none of the above", give up */
1769     return(NULL);
1770 }
1771
1772 /************************************************************************
1773  *                                                                      *
1774  *              ICONV based generic conversion functions                *
1775  *                                                                      *
1776  ************************************************************************/
1777
1778 #ifdef LIBXML_ICONV_ENABLED
1779 /**
1780  * xmlIconvWrapper:
1781  * @cd:         iconv converter data structure
1782  * @out:  a pointer to an array of bytes to store the result
1783  * @outlen:  the length of @out
1784  * @in:  a pointer to an array of ISO Latin 1 chars
1785  * @inlen:  the length of @in
1786  *
1787  * Returns 0 if success, or
1788  *     -1 by lack of space, or
1789  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1790  *        the result of transformation can't fit into the encoding we want), or
1791  *     -3 if there the last byte can't form a single output char.
1792  *
1793  * The value of @inlen after return is the number of octets consumed
1794  *     as the return value is positive, else unpredictable.
1795  * The value of @outlen after return is the number of ocetes consumed.
1796  */
1797 static int
1798 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1799                 const unsigned char *in, int *inlen) {
1800     size_t icv_inlen, icv_outlen;
1801     const char *icv_in = (const char *) in;
1802     char *icv_out = (char *) out;
1803     int ret;
1804
1805     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1806         if (outlen != NULL) *outlen = 0;
1807         return(-1);
1808     }
1809     icv_inlen = *inlen;
1810     icv_outlen = *outlen;
1811     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1812     *inlen -= icv_inlen;
1813     *outlen -= icv_outlen;
1814     if ((icv_inlen != 0) || (ret == -1)) {
1815 #ifdef EILSEQ
1816         if (errno == EILSEQ) {
1817             return -2;
1818         } else
1819 #endif
1820 #ifdef E2BIG
1821         if (errno == E2BIG) {
1822             return -1;
1823         } else
1824 #endif
1825 #ifdef EINVAL
1826         if (errno == EINVAL) {
1827             return -3;
1828         } else
1829 #endif
1830         {
1831             return -3;
1832         }
1833     }
1834     return 0;
1835 }
1836 #endif /* LIBXML_ICONV_ENABLED */
1837
1838 /************************************************************************
1839  *                                                                      *
1840  *              ICU based generic conversion functions          *
1841  *                                                                      *
1842  ************************************************************************/
1843
1844 #ifdef LIBXML_ICU_ENABLED
1845 /**
1846  * xmlUconvWrapper:
1847  * @cd: ICU uconverter data structure
1848  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1849  * @out:  a pointer to an array of bytes to store the result
1850  * @outlen:  the length of @out
1851  * @in:  a pointer to an array of ISO Latin 1 chars
1852  * @inlen:  the length of @in
1853  *
1854  * Returns 0 if success, or
1855  *     -1 by lack of space, or
1856  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1857  *        the result of transformation can't fit into the encoding we want), or
1858  *     -3 if there the last byte can't form a single output char.
1859  *
1860  * The value of @inlen after return is the number of octets consumed
1861  *     as the return value is positive, else unpredictable.
1862  * The value of @outlen after return is the number of ocetes consumed.
1863  */
1864 static int
1865 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1866                 const unsigned char *in, int *inlen) {
1867     const char *ucv_in = (const char *) in;
1868     char *ucv_out = (char *) out;
1869     UErrorCode err = U_ZERO_ERROR;
1870
1871     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1872         if (outlen != NULL) *outlen = 0;
1873         return(-1);
1874     }
1875
1876     /*
1877      * TODO(jungshik)
1878      * 1. is ucnv_convert(To|From)Algorithmic better?
1879      * 2. had we better use an explicit pivot buffer?
1880      * 3. error returned comes from 'fromUnicode' only even
1881      *    when toUnicode is true !
1882      */
1883     if (toUnicode) {
1884         /* encoding => UTF-16 => UTF-8 */
1885         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1886                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1887                        0, TRUE, &err);
1888     } else {
1889         /* UTF-8 => UTF-16 => encoding */
1890         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1891                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1892                        0, TRUE, &err);
1893     }
1894     *inlen = ucv_in - (const char*) in;
1895     *outlen = ucv_out - (char *) out;
1896     if (U_SUCCESS(err))
1897         return 0;
1898     if (err == U_BUFFER_OVERFLOW_ERROR)
1899         return -1;
1900     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1901         return -2;
1902     /* if (err == U_TRUNCATED_CHAR_FOUND) */
1903     return -3;
1904 }
1905 #endif /* LIBXML_ICU_ENABLED */
1906
1907 /************************************************************************
1908  *                                                                      *
1909  *              The real API used by libxml for on-the-fly conversion   *
1910  *                                                                      *
1911  ************************************************************************/
1912
1913 static int
1914 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1915                  int *outlen, const unsigned char *in, int *inlen) {
1916     int ret;
1917
1918     if (handler->input != NULL) {
1919         ret = handler->input(out, outlen, in, inlen);
1920     }
1921 #ifdef LIBXML_ICONV_ENABLED
1922     else if (handler->iconv_in != NULL) {
1923         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1924     }
1925 #endif /* LIBXML_ICONV_ENABLED */
1926 #ifdef LIBXML_ICU_ENABLED
1927     else if (handler->uconv_in != NULL) {
1928         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
1929     }
1930 #endif /* LIBXML_ICU_ENABLED */
1931     else {
1932         *outlen = 0;
1933         *inlen = 0;
1934         ret = -2;
1935     }
1936
1937     return(ret);
1938 }
1939
1940 /* Returns -4 if no output function was found. */
1941 static int
1942 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1943                   int *outlen, const unsigned char *in, int *inlen) {
1944     int ret;
1945
1946     if (handler->output != NULL) {
1947         ret = handler->output(out, outlen, in, inlen);
1948     }
1949 #ifdef LIBXML_ICONV_ENABLED
1950     else if (handler->iconv_out != NULL) {
1951         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1952     }
1953 #endif /* LIBXML_ICONV_ENABLED */
1954 #ifdef LIBXML_ICU_ENABLED
1955     else if (handler->uconv_out != NULL) {
1956         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
1957     }
1958 #endif /* LIBXML_ICU_ENABLED */
1959     else {
1960         *outlen = 0;
1961         *inlen = 0;
1962         ret = -4;
1963     }
1964
1965     return(ret);
1966 }
1967
1968 /**
1969  * xmlCharEncFirstLineInt:
1970  * @handler:    char enconding transformation data structure
1971  * @out:  an xmlBuffer for the output.
1972  * @in:  an xmlBuffer for the input
1973  * @len:  number of bytes to convert for the first line, or -1
1974  *
1975  * Front-end for the encoding handler input function, but handle only
1976  * the very first line, i.e. limit itself to 45 chars.
1977  *
1978  * Returns the number of byte written if success, or
1979  *     -1 general error
1980  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1981  *        the result of transformation can't fit into the encoding we want), or
1982  */
1983 int
1984 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1985                        xmlBufferPtr in, int len) {
1986     int ret;
1987     int written;
1988     int toconv;
1989
1990     if (handler == NULL) return(-1);
1991     if (out == NULL) return(-1);
1992     if (in == NULL) return(-1);
1993
1994     /* calculate space available */
1995     written = out->size - out->use - 1; /* count '\0' */
1996     toconv = in->use;
1997     /*
1998      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1999      * 45 chars should be sufficient to reach the end of the encoding
2000      * declaration without going too far inside the document content.
2001      * on UTF-16 this means 90bytes, on UCS4 this means 180
2002      * The actual value depending on guessed encoding is passed as @len
2003      * if provided
2004      */
2005     if (len >= 0) {
2006         if (toconv > len)
2007             toconv = len;
2008     } else {
2009         if (toconv > 180)
2010             toconv = 180;
2011     }
2012     if (toconv * 2 >= written) {
2013         xmlBufferGrow(out, toconv * 2);
2014         written = out->size - out->use - 1;
2015     }
2016
2017     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2018                            in->content, &toconv);
2019     xmlBufferShrink(in, toconv);
2020     out->use += written;
2021     out->content[out->use] = 0;
2022     if (ret == -1) ret = -3;
2023
2024 #ifdef DEBUG_ENCODING
2025     switch (ret) {
2026         case 0:
2027             xmlGenericError(xmlGenericErrorContext,
2028                     "converted %d bytes to %d bytes of input\n",
2029                     toconv, written);
2030             break;
2031         case -1:
2032             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2033                     toconv, written, in->use);
2034             break;
2035         case -2:
2036             xmlGenericError(xmlGenericErrorContext,
2037                     "input conversion failed due to input error\n");
2038             break;
2039         case -3:
2040             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2041                     toconv, written, in->use);
2042             break;
2043         default:
2044             xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2045     }
2046 #endif /* DEBUG_ENCODING */
2047     /*
2048      * Ignore when input buffer is not on a boundary
2049      */
2050     if (ret == -3) ret = 0;
2051     if (ret == -1) ret = 0;
2052     return(ret);
2053 }
2054
2055 /**
2056  * xmlCharEncFirstLine:
2057  * @handler:    char enconding transformation data structure
2058  * @out:  an xmlBuffer for the output.
2059  * @in:  an xmlBuffer for the input
2060  *
2061  * Front-end for the encoding handler input function, but handle only
2062  * the very first line, i.e. limit itself to 45 chars.
2063  *
2064  * Returns the number of byte written if success, or
2065  *     -1 general error
2066  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2067  *        the result of transformation can't fit into the encoding we want), or
2068  */
2069 int
2070 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2071                  xmlBufferPtr in) {
2072     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2073 }
2074
2075 /**
2076  * xmlCharEncFirstLineInput:
2077  * @input: a parser input buffer
2078  * @len:  number of bytes to convert for the first line, or -1
2079  *
2080  * Front-end for the encoding handler input function, but handle only
2081  * the very first line. Point is that this is based on autodetection
2082  * of the encoding and once that first line is converted we may find
2083  * out that a different decoder is needed to process the input.
2084  *
2085  * Returns the number of byte written if success, or
2086  *     -1 general error
2087  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2088  *        the result of transformation can't fit into the encoding we want), or
2089  */
2090 int
2091 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2092 {
2093     int ret;
2094     size_t written;
2095     size_t toconv;
2096     int c_in;
2097     int c_out;
2098     xmlBufPtr in;
2099     xmlBufPtr out;
2100
2101     if ((input == NULL) || (input->encoder == NULL) ||
2102         (input->buffer == NULL) || (input->raw == NULL))
2103         return (-1);
2104     out = input->buffer;
2105     in = input->raw;
2106
2107     toconv = xmlBufUse(in);
2108     if (toconv == 0)
2109         return (0);
2110     written = xmlBufAvail(out) - 1; /* count '\0' */
2111     /*
2112      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2113      * 45 chars should be sufficient to reach the end of the encoding
2114      * declaration without going too far inside the document content.
2115      * on UTF-16 this means 90bytes, on UCS4 this means 180
2116      * The actual value depending on guessed encoding is passed as @len
2117      * if provided
2118      */
2119     if (len >= 0) {
2120         if (toconv > (unsigned int) len)
2121             toconv = len;
2122     } else {
2123         if (toconv > 180)
2124             toconv = 180;
2125     }
2126     if (toconv * 2 >= written) {
2127         xmlBufGrow(out, toconv * 2);
2128         written = xmlBufAvail(out) - 1;
2129     }
2130     if (written > 360)
2131         written = 360;
2132
2133     c_in = toconv;
2134     c_out = written;
2135     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2136                            xmlBufContent(in), &c_in);
2137     xmlBufShrink(in, c_in);
2138     xmlBufAddLen(out, c_out);
2139     if (ret == -1)
2140         ret = -3;
2141
2142     switch (ret) {
2143         case 0:
2144 #ifdef DEBUG_ENCODING
2145             xmlGenericError(xmlGenericErrorContext,
2146                             "converted %d bytes to %d bytes of input\n",
2147                             c_in, c_out);
2148 #endif
2149             break;
2150         case -1:
2151 #ifdef DEBUG_ENCODING
2152             xmlGenericError(xmlGenericErrorContext,
2153                          "converted %d bytes to %d bytes of input, %d left\n",
2154                             c_in, c_out, (int)xmlBufUse(in));
2155 #endif
2156             break;
2157         case -3:
2158 #ifdef DEBUG_ENCODING
2159             xmlGenericError(xmlGenericErrorContext,
2160                         "converted %d bytes to %d bytes of input, %d left\n",
2161                             c_in, c_out, (int)xmlBufUse(in));
2162 #endif
2163             break;
2164         case -2: {
2165             char buf[50];
2166             const xmlChar *content = xmlBufContent(in);
2167
2168             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2169                      content[0], content[1],
2170                      content[2], content[3]);
2171             buf[49] = 0;
2172             xmlEncodingErr(XML_I18N_CONV_FAILED,
2173                     "input conversion failed due to input error, bytes %s\n",
2174                            buf);
2175         }
2176     }
2177     /*
2178      * Ignore when input buffer is not on a boundary
2179      */
2180     if (ret == -3) ret = 0;
2181     if (ret == -1) ret = 0;
2182     return(ret);
2183 }
2184
2185 /**
2186  * xmlCharEncInput:
2187  * @input: a parser input buffer
2188  * @flush: try to flush all the raw buffer
2189  *
2190  * Generic front-end for the encoding handler on parser input
2191  *
2192  * Returns the number of byte written if success, or
2193  *     -1 general error
2194  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2195  *        the result of transformation can't fit into the encoding we want), or
2196  */
2197 int
2198 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2199 {
2200     int ret;
2201     size_t written;
2202     size_t toconv;
2203     int c_in;
2204     int c_out;
2205     xmlBufPtr in;
2206     xmlBufPtr out;
2207
2208     if ((input == NULL) || (input->encoder == NULL) ||
2209         (input->buffer == NULL) || (input->raw == NULL))
2210         return (-1);
2211     out = input->buffer;
2212     in = input->raw;
2213
2214     toconv = xmlBufUse(in);
2215     if (toconv == 0)
2216         return (0);
2217     if ((toconv > 64 * 1024) && (flush == 0))
2218         toconv = 64 * 1024;
2219     written = xmlBufAvail(out);
2220     if (written > 0)
2221         written--; /* count '\0' */
2222     if (toconv * 2 >= written) {
2223         xmlBufGrow(out, toconv * 2);
2224         written = xmlBufAvail(out);
2225         if (written > 0)
2226             written--; /* count '\0' */
2227     }
2228     if ((written > 128 * 1024) && (flush == 0))
2229         written = 128 * 1024;
2230
2231     c_in = toconv;
2232     c_out = written;
2233     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2234                            xmlBufContent(in), &c_in);
2235     xmlBufShrink(in, c_in);
2236     xmlBufAddLen(out, c_out);
2237     if (ret == -1)
2238         ret = -3;
2239
2240     switch (ret) {
2241         case 0:
2242 #ifdef DEBUG_ENCODING
2243             xmlGenericError(xmlGenericErrorContext,
2244                             "converted %d bytes to %d bytes of input\n",
2245                             c_in, c_out);
2246 #endif
2247             break;
2248         case -1:
2249 #ifdef DEBUG_ENCODING
2250             xmlGenericError(xmlGenericErrorContext,
2251                          "converted %d bytes to %d bytes of input, %d left\n",
2252                             c_in, c_out, (int)xmlBufUse(in));
2253 #endif
2254             break;
2255         case -3:
2256 #ifdef DEBUG_ENCODING
2257             xmlGenericError(xmlGenericErrorContext,
2258                         "converted %d bytes to %d bytes of input, %d left\n",
2259                             c_in, c_out, (int)xmlBufUse(in));
2260 #endif
2261             break;
2262         case -2: {
2263             char buf[50];
2264             const xmlChar *content = xmlBufContent(in);
2265
2266             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2267                      content[0], content[1],
2268                      content[2], content[3]);
2269             buf[49] = 0;
2270             xmlEncodingErr(XML_I18N_CONV_FAILED,
2271                     "input conversion failed due to input error, bytes %s\n",
2272                            buf);
2273         }
2274     }
2275     /*
2276      * Ignore when input buffer is not on a boundary
2277      */
2278     if (ret == -3)
2279         ret = 0;
2280     return (c_out? c_out : ret);
2281 }
2282
2283 /**
2284  * xmlCharEncInFunc:
2285  * @handler:    char encoding transformation data structure
2286  * @out:  an xmlBuffer for the output.
2287  * @in:  an xmlBuffer for the input
2288  *
2289  * Generic front-end for the encoding handler input function
2290  *
2291  * Returns the number of byte written if success, or
2292  *     -1 general error
2293  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2294  *        the result of transformation can't fit into the encoding we want), or
2295  */
2296 int
2297 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2298                  xmlBufferPtr in)
2299 {
2300     int ret;
2301     int written;
2302     int toconv;
2303
2304     if (handler == NULL)
2305         return (-1);
2306     if (out == NULL)
2307         return (-1);
2308     if (in == NULL)
2309         return (-1);
2310
2311     toconv = in->use;
2312     if (toconv == 0)
2313         return (0);
2314     written = out->size - out->use -1; /* count '\0' */
2315     if (toconv * 2 >= written) {
2316         xmlBufferGrow(out, out->size + toconv * 2);
2317         written = out->size - out->use - 1;
2318     }
2319     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2320                            in->content, &toconv);
2321     xmlBufferShrink(in, toconv);
2322     out->use += written;
2323     out->content[out->use] = 0;
2324     if (ret == -1)
2325         ret = -3;
2326
2327     switch (ret) {
2328         case 0:
2329 #ifdef DEBUG_ENCODING
2330             xmlGenericError(xmlGenericErrorContext,
2331                             "converted %d bytes to %d bytes of input\n",
2332                             toconv, written);
2333 #endif
2334             break;
2335         case -1:
2336 #ifdef DEBUG_ENCODING
2337             xmlGenericError(xmlGenericErrorContext,
2338                          "converted %d bytes to %d bytes of input, %d left\n",
2339                             toconv, written, in->use);
2340 #endif
2341             break;
2342         case -3:
2343 #ifdef DEBUG_ENCODING
2344             xmlGenericError(xmlGenericErrorContext,
2345                         "converted %d bytes to %d bytes of input, %d left\n",
2346                             toconv, written, in->use);
2347 #endif
2348             break;
2349         case -2: {
2350             char buf[50];
2351
2352             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353                      in->content[0], in->content[1],
2354                      in->content[2], in->content[3]);
2355             buf[49] = 0;
2356             xmlEncodingErr(XML_I18N_CONV_FAILED,
2357                     "input conversion failed due to input error, bytes %s\n",
2358                            buf);
2359         }
2360     }
2361     /*
2362      * Ignore when input buffer is not on a boundary
2363      */
2364     if (ret == -3)
2365         ret = 0;
2366     return (written? written : ret);
2367 }
2368
2369 #ifdef LIBXML_OUTPUT_ENABLED
2370 /**
2371  * xmlCharEncOutput:
2372  * @output: a parser output buffer
2373  * @init: is this an initialization call without data
2374  *
2375  * Generic front-end for the encoding handler on parser output
2376  * a first call with @init == 1 has to be made first to initiate the
2377  * output in case of non-stateless encoding needing to initiate their
2378  * state or the output (like the BOM in UTF16).
2379  * In case of UTF8 sequence conversion errors for the given encoder,
2380  * the content will be automatically remapped to a CharRef sequence.
2381  *
2382  * Returns the number of byte written if success, or
2383  *     -1 general error
2384  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2385  *        the result of transformation can't fit into the encoding we want), or
2386  */
2387 int
2388 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2389 {
2390     int ret;
2391     size_t written;
2392     size_t writtentot = 0;
2393     size_t toconv;
2394     int c_in;
2395     int c_out;
2396     xmlBufPtr in;
2397     xmlBufPtr out;
2398
2399     if ((output == NULL) || (output->encoder == NULL) ||
2400         (output->buffer == NULL) || (output->conv == NULL))
2401         return (-1);
2402     out = output->conv;
2403     in = output->buffer;
2404
2405 retry:
2406
2407     written = xmlBufAvail(out);
2408     if (written > 0)
2409         written--; /* count '\0' */
2410
2411     /*
2412      * First specific handling of the initialization call
2413      */
2414     if (init) {
2415         c_in = 0;
2416         c_out = written;
2417         /* TODO: Check return value. */
2418         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2419                           NULL, &c_in);
2420         xmlBufAddLen(out, c_out);
2421 #ifdef DEBUG_ENCODING
2422         xmlGenericError(xmlGenericErrorContext,
2423                 "initialized encoder\n");
2424 #endif
2425         return(0);
2426     }
2427
2428     /*
2429      * Conversion itself.
2430      */
2431     toconv = xmlBufUse(in);
2432     if (toconv == 0)
2433         return (0);
2434     if (toconv > 64 * 1024)
2435         toconv = 64 * 1024;
2436     if (toconv * 4 >= written) {
2437         xmlBufGrow(out, toconv * 4);
2438         written = xmlBufAvail(out) - 1;
2439     }
2440     if (written > 256 * 1024)
2441         written = 256 * 1024;
2442
2443     c_in = toconv;
2444     c_out = written;
2445     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2446                             xmlBufContent(in), &c_in);
2447     xmlBufShrink(in, c_in);
2448     xmlBufAddLen(out, c_out);
2449     writtentot += c_out;
2450     if (ret == -1) {
2451         if (c_out > 0) {
2452             /* Can be a limitation of iconv or uconv */
2453             goto retry;
2454         }
2455         ret = -3;
2456     }
2457
2458     if (ret >= 0) output += ret;
2459
2460     /*
2461      * Attempt to handle error cases
2462      */
2463     switch (ret) {
2464         case 0:
2465 #ifdef DEBUG_ENCODING
2466             xmlGenericError(xmlGenericErrorContext,
2467                     "converted %d bytes to %d bytes of output\n",
2468                     c_in, c_out);
2469 #endif
2470             break;
2471         case -1:
2472 #ifdef DEBUG_ENCODING
2473             xmlGenericError(xmlGenericErrorContext,
2474                     "output conversion failed by lack of space\n");
2475 #endif
2476             break;
2477         case -3:
2478 #ifdef DEBUG_ENCODING
2479             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2480                     c_in, c_out, (int) xmlBufUse(in));
2481 #endif
2482             break;
2483         case -4:
2484             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2485                            "xmlCharEncOutFunc: no output function !\n", NULL);
2486             ret = -1;
2487             break;
2488         case -2: {
2489             xmlChar charref[20];
2490             int len = (int) xmlBufUse(in);
2491             xmlChar *content = xmlBufContent(in);
2492             int cur, charrefLen;
2493
2494             cur = xmlGetUTF8Char(content, &len);
2495             if (cur <= 0)
2496                 break;
2497
2498 #ifdef DEBUG_ENCODING
2499             xmlGenericError(xmlGenericErrorContext,
2500                     "handling output conversion error\n");
2501             xmlGenericError(xmlGenericErrorContext,
2502                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2503                     content[0], content[1],
2504                     content[2], content[3]);
2505 #endif
2506             /*
2507              * Removes the UTF8 sequence, and replace it by a charref
2508              * and continue the transcoding phase, hoping the error
2509              * did not mangle the encoder state.
2510              */
2511             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2512                              "&#%d;", cur);
2513             xmlBufShrink(in, len);
2514             xmlBufGrow(out, charrefLen * 4);
2515             c_out = xmlBufAvail(out) - 1;
2516             c_in = charrefLen;
2517             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2518                                     charref, &c_in);
2519
2520             if ((ret < 0) || (c_in != charrefLen)) {
2521                 char buf[50];
2522
2523                 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2524                          content[0], content[1],
2525                          content[2], content[3]);
2526                 buf[49] = 0;
2527                 xmlEncodingErr(XML_I18N_CONV_FAILED,
2528                     "output conversion failed due to conv error, bytes %s\n",
2529                                buf);
2530                 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2531                     content[0] = ' ';
2532                 break;
2533             }
2534
2535             xmlBufAddLen(out, c_out);
2536             writtentot += c_out;
2537             goto retry;
2538         }
2539     }
2540     return(ret);
2541 }
2542 #endif
2543
2544 /**
2545  * xmlCharEncOutFunc:
2546  * @handler:    char enconding transformation data structure
2547  * @out:  an xmlBuffer for the output.
2548  * @in:  an xmlBuffer for the input
2549  *
2550  * Generic front-end for the encoding handler output function
2551  * a first call with @in == NULL has to be made firs to initiate the
2552  * output in case of non-stateless encoding needing to initiate their
2553  * state or the output (like the BOM in UTF16).
2554  * In case of UTF8 sequence conversion errors for the given encoder,
2555  * the content will be automatically remapped to a CharRef sequence.
2556  *
2557  * Returns the number of byte written if success, or
2558  *     -1 general error
2559  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2560  *        the result of transformation can't fit into the encoding we want), or
2561  */
2562 int
2563 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2564                   xmlBufferPtr in) {
2565     int ret;
2566     int written;
2567     int writtentot = 0;
2568     int toconv;
2569     int output = 0;
2570
2571     if (handler == NULL) return(-1);
2572     if (out == NULL) return(-1);
2573
2574 retry:
2575
2576     written = out->size - out->use;
2577
2578     if (written > 0)
2579         written--; /* Gennady: count '/0' */
2580
2581     /*
2582      * First specific handling of in = NULL, i.e. the initialization call
2583      */
2584     if (in == NULL) {
2585         toconv = 0;
2586         /* TODO: Check return value. */
2587         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2588                           NULL, &toconv);
2589         out->use += written;
2590         out->content[out->use] = 0;
2591 #ifdef DEBUG_ENCODING
2592         xmlGenericError(xmlGenericErrorContext,
2593                 "initialized encoder\n");
2594 #endif
2595         return(0);
2596     }
2597
2598     /*
2599      * Conversion itself.
2600      */
2601     toconv = in->use;
2602     if (toconv == 0)
2603         return(0);
2604     if (toconv * 4 >= written) {
2605         xmlBufferGrow(out, toconv * 4);
2606         written = out->size - out->use - 1;
2607     }
2608     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2609                             in->content, &toconv);
2610     xmlBufferShrink(in, toconv);
2611     out->use += written;
2612     writtentot += written;
2613     out->content[out->use] = 0;
2614     if (ret == -1) {
2615         if (written > 0) {
2616             /* Can be a limitation of iconv or uconv */
2617             goto retry;
2618         }
2619         ret = -3;
2620     }
2621
2622     if (ret >= 0) output += ret;
2623
2624     /*
2625      * Attempt to handle error cases
2626      */
2627     switch (ret) {
2628         case 0:
2629 #ifdef DEBUG_ENCODING
2630             xmlGenericError(xmlGenericErrorContext,
2631                     "converted %d bytes to %d bytes of output\n",
2632                     toconv, written);
2633 #endif
2634             break;
2635         case -1:
2636 #ifdef DEBUG_ENCODING
2637             xmlGenericError(xmlGenericErrorContext,
2638                     "output conversion failed by lack of space\n");
2639 #endif
2640             break;
2641         case -3:
2642 #ifdef DEBUG_ENCODING
2643             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2644                     toconv, written, in->use);
2645 #endif
2646             break;
2647         case -4:
2648             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2649                            "xmlCharEncOutFunc: no output function !\n", NULL);
2650             ret = -1;
2651             break;
2652         case -2: {
2653             xmlChar charref[20];
2654             int len = in->use;
2655             const xmlChar *utf = (const xmlChar *) in->content;
2656             int cur, charrefLen;
2657
2658             cur = xmlGetUTF8Char(utf, &len);
2659             if (cur <= 0)
2660                 break;
2661
2662 #ifdef DEBUG_ENCODING
2663             xmlGenericError(xmlGenericErrorContext,
2664                     "handling output conversion error\n");
2665             xmlGenericError(xmlGenericErrorContext,
2666                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2667                     in->content[0], in->content[1],
2668                     in->content[2], in->content[3]);
2669 #endif
2670             /*
2671              * Removes the UTF8 sequence, and replace it by a charref
2672              * and continue the transcoding phase, hoping the error
2673              * did not mangle the encoder state.
2674              */
2675             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2676                              "&#%d;", cur);
2677             xmlBufferShrink(in, len);
2678             xmlBufferGrow(out, charrefLen * 4);
2679             written = out->size - out->use - 1;
2680             toconv = charrefLen;
2681             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2682                                     charref, &toconv);
2683
2684             if ((ret < 0) || (toconv != charrefLen)) {
2685                 char buf[50];
2686
2687                 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2688                          in->content[0], in->content[1],
2689                          in->content[2], in->content[3]);
2690                 buf[49] = 0;
2691                 xmlEncodingErr(XML_I18N_CONV_FAILED,
2692                     "output conversion failed due to conv error, bytes %s\n",
2693                                buf);
2694                 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2695                     in->content[0] = ' ';
2696                 break;
2697             }
2698
2699             out->use += written;
2700             writtentot += written;
2701             out->content[out->use] = 0;
2702             goto retry;
2703         }
2704     }
2705     return(ret);
2706 }
2707
2708 /**
2709  * xmlCharEncCloseFunc:
2710  * @handler:    char enconding transformation data structure
2711  *
2712  * Generic front-end for encoding handler close function
2713  *
2714  * Returns 0 if success, or -1 in case of error
2715  */
2716 int
2717 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2718     int ret = 0;
2719     int tofree = 0;
2720     int i, handler_in_list = 0;
2721
2722     if (handler == NULL) return(-1);
2723     if (handler->name == NULL) return(-1);
2724     if (handlers != NULL) {
2725         for (i = 0;i < nbCharEncodingHandler; i++) {
2726             if (handler == handlers[i]) {
2727                 handler_in_list = 1;
2728                 break;
2729             }
2730         }
2731     }
2732 #ifdef LIBXML_ICONV_ENABLED
2733     /*
2734      * Iconv handlers can be used only once, free the whole block.
2735      * and the associated icon resources.
2736      */
2737     if ((handler_in_list == 0) &&
2738         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2739         tofree = 1;
2740         if (handler->iconv_out != NULL) {
2741             if (iconv_close(handler->iconv_out))
2742                 ret = -1;
2743             handler->iconv_out = NULL;
2744         }
2745         if (handler->iconv_in != NULL) {
2746             if (iconv_close(handler->iconv_in))
2747                 ret = -1;
2748             handler->iconv_in = NULL;
2749         }
2750     }
2751 #endif /* LIBXML_ICONV_ENABLED */
2752 #ifdef LIBXML_ICU_ENABLED
2753     if ((handler_in_list == 0) &&
2754         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2755         tofree = 1;
2756         if (handler->uconv_out != NULL) {
2757             closeIcuConverter(handler->uconv_out);
2758             handler->uconv_out = NULL;
2759         }
2760         if (handler->uconv_in != NULL) {
2761             closeIcuConverter(handler->uconv_in);
2762             handler->uconv_in = NULL;
2763         }
2764     }
2765 #endif
2766     if (tofree) {
2767         /* free up only dynamic handlers iconv/uconv */
2768         if (handler->name != NULL)
2769             xmlFree(handler->name);
2770         handler->name = NULL;
2771         xmlFree(handler);
2772     }
2773 #ifdef DEBUG_ENCODING
2774     if (ret)
2775         xmlGenericError(xmlGenericErrorContext,
2776                 "failed to close the encoding handler\n");
2777     else
2778         xmlGenericError(xmlGenericErrorContext,
2779                 "closed the encoding handler\n");
2780 #endif
2781
2782     return(ret);
2783 }
2784
2785 /**
2786  * xmlByteConsumed:
2787  * @ctxt: an XML parser context
2788  *
2789  * This function provides the current index of the parser relative
2790  * to the start of the current entity. This function is computed in
2791  * bytes from the beginning starting at zero and finishing at the
2792  * size in byte of the file if parsing a file. The function is
2793  * of constant cost if the input is UTF-8 but can be costly if run
2794  * on non-UTF-8 input.
2795  *
2796  * Returns the index in bytes from the beginning of the entity or -1
2797  *         in case the index could not be computed.
2798  */
2799 long
2800 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2801     xmlParserInputPtr in;
2802
2803     if (ctxt == NULL) return(-1);
2804     in = ctxt->input;
2805     if (in == NULL)  return(-1);
2806     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2807         unsigned int unused = 0;
2808         xmlCharEncodingHandler * handler = in->buf->encoder;
2809         /*
2810          * Encoding conversion, compute the number of unused original
2811          * bytes from the input not consumed and substract that from
2812          * the raw consumed value, this is not a cheap operation
2813          */
2814         if (in->end - in->cur > 0) {
2815             unsigned char convbuf[32000];
2816             const unsigned char *cur = (const unsigned char *)in->cur;
2817             int toconv = in->end - in->cur, written = 32000;
2818
2819             int ret;
2820
2821             do {
2822                 toconv = in->end - cur;
2823                 written = 32000;
2824                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2825                                         cur, &toconv);
2826                 if (ret < 0) {
2827                     if (written > 0)
2828                         ret = -2;
2829                     else
2830                         return(-1);
2831                 }
2832                 unused += written;
2833                 cur += toconv;
2834             } while (ret == -2);
2835         }
2836         if (in->buf->rawconsumed < unused)
2837             return(-1);
2838         return(in->buf->rawconsumed - unused);
2839     }
2840     return(in->consumed + (in->cur - in->base));
2841 }
2842
2843 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2844 #ifdef LIBXML_ISO8859X_ENABLED
2845
2846 /**
2847  * UTF8ToISO8859x:
2848  * @out:  a pointer to an array of bytes to store the result
2849  * @outlen:  the length of @out
2850  * @in:  a pointer to an array of UTF-8 chars
2851  * @inlen:  the length of @in
2852  * @xlattable: the 2-level transcoding table
2853  *
2854  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2855  * block of chars out.
2856  *
2857  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2858  * The value of @inlen after return is the number of octets consumed
2859  *     as the return value is positive, else unpredictable.
2860  * The value of @outlen after return is the number of ocetes consumed.
2861  */
2862 static int
2863 UTF8ToISO8859x(unsigned char* out, int *outlen,
2864               const unsigned char* in, int *inlen,
2865               unsigned char const *xlattable) {
2866     const unsigned char* outstart = out;
2867     const unsigned char* inend;
2868     const unsigned char* instart = in;
2869     const unsigned char* processed = in;
2870
2871     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2872         (xlattable == NULL))
2873         return(-1);
2874     if (in == NULL) {
2875         /*
2876         * initialization nothing to do
2877         */
2878         *outlen = 0;
2879         *inlen = 0;
2880         return(0);
2881     }
2882     inend = in + (*inlen);
2883     while (in < inend) {
2884         unsigned char d = *in++;
2885         if  (d < 0x80)  {
2886             *out++ = d;
2887         } else if (d < 0xC0) {
2888             /* trailing byte in leading position */
2889             *outlen = out - outstart;
2890             *inlen = processed - instart;
2891             return(-2);
2892         } else if (d < 0xE0) {
2893             unsigned char c;
2894             if (!(in < inend)) {
2895                 /* trailing byte not in input buffer */
2896                 *outlen = out - outstart;
2897                 *inlen = processed - instart;
2898                 return(-3);
2899             }
2900             c = *in++;
2901             if ((c & 0xC0) != 0x80) {
2902                 /* not a trailing byte */
2903                 *outlen = out - outstart;
2904                 *inlen = processed - instart;
2905                 return(-2);
2906             }
2907             c = c & 0x3F;
2908             d = d & 0x1F;
2909             d = xlattable [48 + c + xlattable [d] * 64];
2910             if (d == 0) {
2911                 /* not in character set */
2912                 *outlen = out - outstart;
2913                 *inlen = processed - instart;
2914                 return(-2);
2915             }
2916             *out++ = d;
2917         } else if (d < 0xF0) {
2918             unsigned char c1;
2919             unsigned char c2;
2920             if (!(in < inend - 1)) {
2921                 /* trailing bytes not in input buffer */
2922                 *outlen = out - outstart;
2923                 *inlen = processed - instart;
2924                 return(-3);
2925             }
2926             c1 = *in++;
2927             if ((c1 & 0xC0) != 0x80) {
2928                 /* not a trailing byte (c1) */
2929                 *outlen = out - outstart;
2930                 *inlen = processed - instart;
2931                 return(-2);
2932             }
2933             c2 = *in++;
2934             if ((c2 & 0xC0) != 0x80) {
2935                 /* not a trailing byte (c2) */
2936                 *outlen = out - outstart;
2937                 *inlen = processed - instart;
2938                 return(-2);
2939             }
2940             c1 = c1 & 0x3F;
2941             c2 = c2 & 0x3F;
2942             d = d & 0x0F;
2943             d = xlattable [48 + c2 + xlattable [48 + c1 +
2944                         xlattable [32 + d] * 64] * 64];
2945             if (d == 0) {
2946                 /* not in character set */
2947                 *outlen = out - outstart;
2948                 *inlen = processed - instart;
2949                 return(-2);
2950             }
2951             *out++ = d;
2952         } else {
2953             /* cannot transcode >= U+010000 */
2954             *outlen = out - outstart;
2955             *inlen = processed - instart;
2956             return(-2);
2957         }
2958         processed = in;
2959     }
2960     *outlen = out - outstart;
2961     *inlen = processed - instart;
2962     return(*outlen);
2963 }
2964
2965 /**
2966  * ISO8859xToUTF8
2967  * @out:  a pointer to an array of bytes to store the result
2968  * @outlen:  the length of @out
2969  * @in:  a pointer to an array of ISO Latin 1 chars
2970  * @inlen:  the length of @in
2971  *
2972  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2973  * block of chars out.
2974  * Returns 0 if success, or -1 otherwise
2975  * The value of @inlen after return is the number of octets consumed
2976  * The value of @outlen after return is the number of ocetes produced.
2977  */
2978 static int
2979 ISO8859xToUTF8(unsigned char* out, int *outlen,
2980               const unsigned char* in, int *inlen,
2981               unsigned short const *unicodetable) {
2982     unsigned char* outstart = out;
2983     unsigned char* outend;
2984     const unsigned char* instart = in;
2985     const unsigned char* inend;
2986     const unsigned char* instop;
2987     unsigned int c;
2988
2989     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2990         (in == NULL) || (unicodetable == NULL))
2991         return(-1);
2992     outend = out + *outlen;
2993     inend = in + *inlen;
2994     instop = inend;
2995
2996     while ((in < inend) && (out < outend - 2)) {
2997         if (*in >= 0x80) {
2998             c = unicodetable [*in - 0x80];
2999             if (c == 0) {
3000                 /* undefined code point */
3001                 *outlen = out - outstart;
3002                 *inlen = in - instart;
3003                 return (-1);
3004             }
3005             if (c < 0x800) {
3006                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3007                 *out++ = (c & 0x3F) | 0x80;
3008             } else {
3009                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3010                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3011                 *out++ = (c & 0x3F) | 0x80;
3012             }
3013             ++in;
3014         }
3015         if (instop - in > outend - out) instop = in + (outend - out);
3016         while ((*in < 0x80) && (in < instop)) {
3017             *out++ = *in++;
3018         }
3019     }
3020     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3021         *out++ =  *in++;
3022     }
3023     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3024         *out++ =  *in++;
3025     }
3026     *outlen = out - outstart;
3027     *inlen = in - instart;
3028     return (*outlen);
3029 }
3030
3031
3032 /************************************************************************
3033  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3034  ************************************************************************/
3035
3036 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3037     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3038     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3039     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3040     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3041     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3042     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3043     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3044     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3045     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3046     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3047     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3048     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3049     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3050     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3051     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3052     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3053 };
3054
3055 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3056     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3057     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3064     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3065     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3066     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3067     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3068     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3069     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3071     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3072     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3073     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3076     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3077     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3078     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3079     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3080     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3081     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3082     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3083 };
3084
3085 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3086     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3087     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3088     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3089     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3090     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3091     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3092     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3093     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3094     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3095     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3096     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3097     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3098     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3099     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3100     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3101     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3102 };
3103
3104 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3105     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3106     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3113     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3114     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3115     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3116     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3117     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3118     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3119     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3122     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3130     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3131     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3132     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3133     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3134     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3135     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3136 };
3137
3138 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3139     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3140     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3141     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3142     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3143     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3144     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3145     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3146     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3147     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3148     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3149     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3150     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3151     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3152     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3153     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3154     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3155 };
3156
3157 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3158     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3159     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3166     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3167     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3168     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3169     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3170     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3171     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3172     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3173     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3174     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3175     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3176     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3177     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3178     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3179     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3182     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3183     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3184     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3185 };
3186
3187 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3188     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3189     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3190     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3191     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3192     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3193     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3194     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3195     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3196     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3197     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3198     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3199     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3200     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3201     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3202     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3203     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3204 };
3205
3206 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3207     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3215     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3216     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3217     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3219     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3220     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3221     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3222     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3223     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3224     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234 };
3235
3236 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3237     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3238     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3239     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3240     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3241     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3242     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3243     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3244     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3245     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3246     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3247     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3248     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3249     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3250     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3251     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3253 };
3254
3255 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3256     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3258     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3264     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3265     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3266     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3272     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3273     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3274     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3275     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3276     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 };
3280
3281 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3282     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3283     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3284     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3285     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3286     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3287     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3288     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3289     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3290     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3291     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3292     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3293     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3294     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3295     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3296     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3297     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3298 };
3299
3300 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3301     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3302     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3309     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3310     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3311     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3312     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3318     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3325     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3326     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3327     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3328     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3329     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332 };
3333
3334 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3335     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3336     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3337     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3338     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3339     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3340     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3341     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3342     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3343     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3344     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3345     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3346     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3347     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3348     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3349     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3350     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3351 };
3352
3353 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3354     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3356     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3362     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3363     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3364     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3365     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3371     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3373     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3378     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3379     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3383     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3384     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 };
3386
3387 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3388     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3389     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3390     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3391     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3392     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3393     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3394     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3395     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3396     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3397     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3398     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3399     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3400     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3401     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3402     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3403     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3404 };
3405
3406 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3407     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3415     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3416     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3417     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3418     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3419     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3420     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3421     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3422     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3424     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3428     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430 };
3431
3432 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3433     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3434     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3435     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3436     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3437     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3438     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3439     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3440     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3441     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3442     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3443     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3444     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3445     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3446     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3447     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3448     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3449 };
3450
3451 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3452     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3460     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3461     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3462     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3463     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3464     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3465     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3466     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3467     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3468     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3470     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3471     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3480     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3481     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3482     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3483 };
3484
3485 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3486     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3487     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3488     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3489     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3490     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3491     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3492     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3493     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3494     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3495     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3496     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3497     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3498     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3499     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3500     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3501     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3502 };
3503
3504 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3505     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3513     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3514     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3520     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3524     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3529     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3530     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532 };
3533
3534 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3535     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3536     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3537     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3538     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3539     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3540     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3541     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3542     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3543     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3544     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3545     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3546     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3547     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3548     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3549     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3550     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3551 };
3552
3553 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3554     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3562     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3563     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3564     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3565     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3571     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3574     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3575     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3576     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3577     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3578     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3579     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3580     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3581     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3582     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3583     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3584     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3585 };
3586
3587 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3588     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3589     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3590     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3591     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3592     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3593     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3594     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3595     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3596     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3597     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3598     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3599     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3600     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3601     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3602     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3603     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3604 };
3605
3606 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3607     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3615     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3616     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3617     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3622     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3623     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3624     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3627     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3642     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3644     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3645     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3647     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3648     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3649     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3650 };
3651
3652 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3653     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3654     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3655     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3656     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3657     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3658     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3659     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3660     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3661     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3662     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3663     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3664     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3665     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3666     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3667     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3668     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3669 };
3670
3671 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3672     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3680     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3681     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3682     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3683     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3695     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3696     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3697     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3698     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3699 };
3700
3701 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3702     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3703     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3704     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3705     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3706     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3707     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3708     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3709     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3710     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3711     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3712     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3713     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3714     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3715     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3716     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3717     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3718 };
3719
3720 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3721     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3722     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3729     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3730     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3731     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3732     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3733     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3734     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3738     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3740     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3747     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3750     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3754     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3757     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3758     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3759     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3760 };
3761
3762
3763 /*
3764  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3765  */
3766
3767 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3768     const unsigned char* in, int *inlen) {
3769     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3770 }
3771 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3772     const unsigned char* in, int *inlen) {
3773     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3774 }
3775
3776 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3777     const unsigned char* in, int *inlen) {
3778     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3779 }
3780 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3781     const unsigned char* in, int *inlen) {
3782     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3783 }
3784
3785 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3786     const unsigned char* in, int *inlen) {
3787     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3788 }
3789 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3790     const unsigned char* in, int *inlen) {
3791     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3792 }
3793
3794 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3795     const unsigned char* in, int *inlen) {
3796     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3797 }
3798 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3799     const unsigned char* in, int *inlen) {
3800     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3801 }
3802
3803 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3804     const unsigned char* in, int *inlen) {
3805     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3806 }
3807 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3808     const unsigned char* in, int *inlen) {
3809     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3810 }
3811
3812 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3813     const unsigned char* in, int *inlen) {
3814     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3815 }
3816 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3817     const unsigned char* in, int *inlen) {
3818     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3819 }
3820
3821 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3822     const unsigned char* in, int *inlen) {
3823     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3824 }
3825 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3826     const unsigned char* in, int *inlen) {
3827     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3828 }
3829
3830 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3831     const unsigned char* in, int *inlen) {
3832     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3833 }
3834 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3835     const unsigned char* in, int *inlen) {
3836     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3837 }
3838
3839 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3840     const unsigned char* in, int *inlen) {
3841     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3842 }
3843 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3844     const unsigned char* in, int *inlen) {
3845     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3846 }
3847
3848 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3849     const unsigned char* in, int *inlen) {
3850     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3851 }
3852 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3853     const unsigned char* in, int *inlen) {
3854     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3855 }
3856
3857 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3858     const unsigned char* in, int *inlen) {
3859     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3860 }
3861 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3862     const unsigned char* in, int *inlen) {
3863     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3864 }
3865
3866 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3867     const unsigned char* in, int *inlen) {
3868     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3869 }
3870 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3871     const unsigned char* in, int *inlen) {
3872     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3873 }
3874
3875 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3876     const unsigned char* in, int *inlen) {
3877     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3878 }
3879 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3880     const unsigned char* in, int *inlen) {
3881     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3882 }
3883
3884 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3885     const unsigned char* in, int *inlen) {
3886     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3887 }
3888 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3889     const unsigned char* in, int *inlen) {
3890     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3891 }
3892
3893 static void
3894 xmlRegisterCharEncodingHandlersISO8859x (void) {
3895     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3896     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3897     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3898     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3899     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3900     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3901     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3902     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3903     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3904     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3905     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3906     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3907     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3908     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3909 }
3910
3911 #endif
3912 #endif
3913
3914 #define bottom_encoding
3915 #include "elfgcchack.h"