sdk/lib/3rdparty/libxml2/encoding.c

   1 /*
   2  * encoding.c : implements the encoding conversion functions needed for XML
   3  *
   4  * Related specs:
   5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
   6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
   7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
   8  * [ISO-8859-1]   ISO Latin-1 characters codes.
   9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
  10  *                Worldwide Character Encoding -- Version 1.0", Addison-
  11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
  12  *                described in Unicode Technical Report #4.
  13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
  14  *                Information Interchange, ANSI X3.4-1986.
  15  *
  16  * See Copyright for the status of this software.
  17  *
  18  * daniel@veillard.com
  19  *
  20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
  21  */
  22
  23 #define IN_LIBXML
  24 #include "libxml.h"
  25
  26 #include <string.h>
  27 #include <limits.h>
  28
  29 #ifdef HAVE_CTYPE_H
  30 #include <ctype.h>
  31 #endif
  32 #ifdef HAVE_STDLIB_H
  33 #include <stdlib.h>
  34 #endif
  35 #ifdef LIBXML_ICONV_ENABLED
  36 #ifdef HAVE_ERRNO_H
  37 #include <errno.h>
  38 #endif
  39 #endif
  40 #include <libxml/encoding.h>
  41 #include <libxml/xmlmemory.h>
  42 #ifdef LIBXML_HTML_ENABLED
  43 #include <libxml/HTMLparser.h>
  44 #endif
  45 #include <libxml/globals.h>
  46 #include <libxml/xmlerror.h>
  47
  48 #include "buf.h"
  49 #include "enc.h"
  50
  51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
  52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
  53
  54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
  55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
  56 struct _xmlCharEncodingAlias {
  57     const char *name;
  58     const char *alias;
  59 };
  60
  61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
  62 static int xmlCharEncodingAliasesNb = 0;
  63 static int xmlCharEncodingAliasesMax = 0;
  64
  65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
  66 #if 0
  67 #define DEBUG_ENCODING  /* Define this to get encoding traces */
  68 #endif
  69 #else
  70 #ifdef LIBXML_ISO8859X_ENABLED
  71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
  72 #endif
  73 #endif
  74
  75 static int xmlLittleEndian = 1;
  76
  77 /**
  78  * xmlEncodingErrMemory:
  79  * @extra:  extra informations
  80  *
  81  * Handle an out of memory condition
  82  */
  83 static void
  84 xmlEncodingErrMemory(const char *extra)
  85 {
  86     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
  87 }
  88
  89 /**
  90  * xmlErrEncoding:
  91  * @error:  the error number
  92  * @msg:  the error message
  93  *
  94  * n encoding error
  95  */
  96 static void LIBXML_ATTR_FORMAT(2,0)
  97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
  98 {
  99     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
 100                     XML_FROM_I18N, error, XML_ERR_FATAL,
 101                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
 102 }
 103
 104 #ifdef LIBXML_ICU_ENABLED
 105 static uconv_t*
 106 openIcuConverter(const char* name, int toUnicode)
 107 {
 108   UErrorCode status = U_ZERO_ERROR;
 109   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
 110   if (conv == NULL)
 111     return NULL;
 112
 113   conv->pivot_source = conv->pivot_buf;
 114   conv->pivot_target = conv->pivot_buf;
 115
 116   conv->uconv = ucnv_open(name, &status);
 117   if (U_FAILURE(status))
 118     goto error;
 119
 120   status = U_ZERO_ERROR;
 121   if (toUnicode) {
 122     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
 123                         NULL, NULL, NULL, &status);
 124   }
 125   else {
 126     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
 127                         NULL, NULL, NULL, &status);
 128   }
 129   if (U_FAILURE(status))
 130     goto error;
 131
 132   status = U_ZERO_ERROR;
 133   conv->utf8 = ucnv_open("UTF-8", &status);
 134   if (U_SUCCESS(status))
 135     return conv;
 136
 137 error:
 138   if (conv->uconv)
 139     ucnv_close(conv->uconv);
 140   xmlFree(conv);
 141   return NULL;
 142 }
 143
 144 static void
 145 closeIcuConverter(uconv_t *conv)
 146 {
 147   if (conv != NULL) {
 148     ucnv_close(conv->uconv);
 149     ucnv_close(conv->utf8);
 150     xmlFree(conv);
 151   }
 152 }
 153 #endif /* LIBXML_ICU_ENABLED */
 154
 155 /************************************************************************
 156  *                                                                      *
 157  *              Conversions To/From UTF8 encoding                       *
 158  *                                                                      *
 159  ************************************************************************/
 160
 161 /**
 162  * asciiToUTF8:
 163  * @out:  a pointer to an array of bytes to store the result
 164  * @outlen:  the length of @out
 165  * @in:  a pointer to an array of ASCII chars
 166  * @inlen:  the length of @in
 167  *
 168  * Take a block of ASCII chars in and try to convert it to an UTF-8
 169  * block of chars out.
 170  * Returns 0 if success, or -1 otherwise
 171  * The value of @inlen after return is the number of octets consumed
 172  *     if the return value is positive, else unpredictable.
 173  * The value of @outlen after return is the number of octets consumed.
 174  */
 175 static int
 176 asciiToUTF8(unsigned char* out, int *outlen,
 177               const unsigned char* in, int *inlen) {
 178     unsigned char* outstart = out;
 179     const unsigned char* base = in;
 180     const unsigned char* processed = in;
 181     unsigned char* outend = out + *outlen;
 182     const unsigned char* inend;
 183     unsigned int c;
 184
 185     inend = in + (*inlen);
 186     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 187         c= *in++;
 188
 189         if (out >= outend)
 190             break;
 191         if (c < 0x80) {
 192             *out++ = c;
 193         } else {
 194             *outlen = out - outstart;
 195             *inlen = processed - base;
 196             return(-1);
 197         }
 198
 199         processed = (const unsigned char*) in;
 200     }
 201     *outlen = out - outstart;
 202     *inlen = processed - base;
 203     return(*outlen);
 204 }
 205
 206 #ifdef LIBXML_OUTPUT_ENABLED
 207 /**
 208  * UTF8Toascii:
 209  * @out:  a pointer to an array of bytes to store the result
 210  * @outlen:  the length of @out
 211  * @in:  a pointer to an array of UTF-8 chars
 212  * @inlen:  the length of @in
 213  *
 214  * Take a block of UTF-8 chars in and try to convert it to an ASCII
 215  * block of chars out.
 216  *
 217  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
 218  * The value of @inlen after return is the number of octets consumed
 219  *     if the return value is positive, else unpredictable.
 220  * The value of @outlen after return is the number of octets consumed.
 221  */
 222 static int
 223 UTF8Toascii(unsigned char* out, int *outlen,
 224               const unsigned char* in, int *inlen) {
 225     const unsigned char* processed = in;
 226     const unsigned char* outend;
 227     const unsigned char* outstart = out;
 228     const unsigned char* instart = in;
 229     const unsigned char* inend;
 230     unsigned int c, d;
 231     int trailing;
 232
 233     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 234     if (in == NULL) {
 235         /*
 236          * initialization nothing to do
 237          */
 238         *outlen = 0;
 239         *inlen = 0;
 240         return(0);
 241     }
 242     inend = in + (*inlen);
 243     outend = out + (*outlen);
 244     while (in < inend) {
 245         d = *in++;
 246         if      (d < 0x80)  { c= d; trailing= 0; }
 247         else if (d < 0xC0) {
 248             /* trailing byte in leading position */
 249             *outlen = out - outstart;
 250             *inlen = processed - instart;
 251             return(-2);
 252         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 253         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 254         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 255         else {
 256             /* no chance for this in Ascii */
 257             *outlen = out - outstart;
 258             *inlen = processed - instart;
 259             return(-2);
 260         }
 261
 262         if (inend - in < trailing) {
 263             break;
 264         }
 265
 266         for ( ; trailing; trailing--) {
 267             if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 268                 break;
 269             c <<= 6;
 270             c |= d & 0x3F;
 271         }
 272
 273         /* assertion: c is a single UTF-4 value */
 274         if (c < 0x80) {
 275             if (out >= outend)
 276                 break;
 277             *out++ = c;
 278         } else {
 279             /* no chance for this in Ascii */
 280             *outlen = out - outstart;
 281             *inlen = processed - instart;
 282             return(-2);
 283         }
 284         processed = in;
 285     }
 286     *outlen = out - outstart;
 287     *inlen = processed - instart;
 288     return(*outlen);
 289 }
 290 #endif /* LIBXML_OUTPUT_ENABLED */
 291
 292 /**
 293  * isolat1ToUTF8:
 294  * @out:  a pointer to an array of bytes to store the result
 295  * @outlen:  the length of @out
 296  * @in:  a pointer to an array of ISO Latin 1 chars
 297  * @inlen:  the length of @in
 298  *
 299  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
 300  * block of chars out.
 301  * Returns the number of bytes written if success, or -1 otherwise
 302  * The value of @inlen after return is the number of octets consumed
 303  *     if the return value is positive, else unpredictable.
 304  * The value of @outlen after return is the number of octets consumed.
 305  */
 306 int
 307 isolat1ToUTF8(unsigned char* out, int *outlen,
 308               const unsigned char* in, int *inlen) {
 309     unsigned char* outstart = out;
 310     const unsigned char* base = in;
 311     unsigned char* outend;
 312     const unsigned char* inend;
 313     const unsigned char* instop;
 314
 315     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
 316         return(-1);
 317
 318     outend = out + *outlen;
 319     inend = in + (*inlen);
 320     instop = inend;
 321
 322     while ((in < inend) && (out < outend - 1)) {
 323         if (*in >= 0x80) {
 324             *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
 325             *out++ = ((*in) & 0x3F) | 0x80;
 326             ++in;
 327         }
 328         if ((instop - in) > (outend - out)) instop = in + (outend - out);
 329         while ((in < instop) && (*in < 0x80)) {
 330             *out++ = *in++;
 331         }
 332     }
 333     if ((in < inend) && (out < outend) && (*in < 0x80)) {
 334         *out++ = *in++;
 335     }
 336     *outlen = out - outstart;
 337     *inlen = in - base;
 338     return(*outlen);
 339 }
 340
 341 /**
 342  * UTF8ToUTF8:
 343  * @out:  a pointer to an array of bytes to store the result
 344  * @outlen:  the length of @out
 345  * @inb:  a pointer to an array of UTF-8 chars
 346  * @inlenb:  the length of @in in UTF-8 chars
 347  *
 348  * No op copy operation for UTF8 handling.
 349  *
 350  * Returns the number of bytes written, or -1 if lack of space.
 351  *     The value of *inlen after return is the number of octets consumed
 352  *     if the return value is positive, else unpredictable.
 353  */
 354 static int
 355 UTF8ToUTF8(unsigned char* out, int *outlen,
 356            const unsigned char* inb, int *inlenb)
 357 {
 358     int len;
 359
 360     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
 361         return(-1);
 362     if (inb == NULL) {
 363         /* inb == NULL means output is initialized. */
 364         *outlen = 0;
 365         *inlenb = 0;
 366         return(0);
 367     }
 368     if (*outlen > *inlenb) {
 369         len = *inlenb;
 370     } else {
 371         len = *outlen;
 372     }
 373     if (len < 0)
 374         return(-1);
 375
 376     memcpy(out, inb, len);
 377
 378     *outlen = len;
 379     *inlenb = len;
 380     return(*outlen);
 381 }
 382
 383
 384 #ifdef LIBXML_OUTPUT_ENABLED
 385 /**
 386  * UTF8Toisolat1:
 387  * @out:  a pointer to an array of bytes to store the result
 388  * @outlen:  the length of @out
 389  * @in:  a pointer to an array of UTF-8 chars
 390  * @inlen:  the length of @in
 391  *
 392  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
 393  * block of chars out.
 394  *
 395  * Returns the number of bytes written if success, -2 if the transcoding fails,
 396            or -1 otherwise
 397  * The value of @inlen after return is the number of octets consumed
 398  *     if the return value is positive, else unpredictable.
 399  * The value of @outlen after return is the number of octets consumed.
 400  */
 401 int
 402 UTF8Toisolat1(unsigned char* out, int *outlen,
 403               const unsigned char* in, int *inlen) {
 404     const unsigned char* processed = in;
 405     const unsigned char* outend;
 406     const unsigned char* outstart = out;
 407     const unsigned char* instart = in;
 408     const unsigned char* inend;
 409     unsigned int c, d;
 410     int trailing;
 411
 412     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 413     if (in == NULL) {
 414         /*
 415          * initialization nothing to do
 416          */
 417         *outlen = 0;
 418         *inlen = 0;
 419         return(0);
 420     }
 421     inend = in + (*inlen);
 422     outend = out + (*outlen);
 423     while (in < inend) {
 424         d = *in++;
 425         if      (d < 0x80)  { c= d; trailing= 0; }
 426         else if (d < 0xC0) {
 427             /* trailing byte in leading position */
 428             *outlen = out - outstart;
 429             *inlen = processed - instart;
 430             return(-2);
 431         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 432         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 433         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 434         else {
 435             /* no chance for this in IsoLat1 */
 436             *outlen = out - outstart;
 437             *inlen = processed - instart;
 438             return(-2);
 439         }
 440
 441         if (inend - in < trailing) {
 442             break;
 443         }
 444
 445         for ( ; trailing; trailing--) {
 446             if (in >= inend)
 447                 break;
 448             if (((d= *in++) & 0xC0) != 0x80) {
 449                 *outlen = out - outstart;
 450                 *inlen = processed - instart;
 451                 return(-2);
 452             }
 453             c <<= 6;
 454             c |= d & 0x3F;
 455         }
 456
 457         /* assertion: c is a single UTF-4 value */
 458         if (c <= 0xFF) {
 459             if (out >= outend)
 460                 break;
 461             *out++ = c;
 462         } else {
 463             /* no chance for this in IsoLat1 */
 464             *outlen = out - outstart;
 465             *inlen = processed - instart;
 466             return(-2);
 467         }
 468         processed = in;
 469     }
 470     *outlen = out - outstart;
 471     *inlen = processed - instart;
 472     return(*outlen);
 473 }
 474 #endif /* LIBXML_OUTPUT_ENABLED */
 475
 476 /**
 477  * UTF16LEToUTF8:
 478  * @out:  a pointer to an array of bytes to store the result
 479  * @outlen:  the length of @out
 480  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
 481  * @inlenb:  the length of @in in UTF-16LE chars
 482  *
 483  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
 484  * block of chars out. This function assumes the endian property
 485  * is the same between the native type of this machine and the
 486  * inputed one.
 487  *
 488  * Returns the number of bytes written, or -1 if lack of space, or -2
 489  *     if the transcoding fails (if *in is not a valid utf16 string)
 490  *     The value of *inlen after return is the number of octets consumed
 491  *     if the return value is positive, else unpredictable.
 492  */
 493 static int
 494 UTF16LEToUTF8(unsigned char* out, int *outlen,
 495             const unsigned char* inb, int *inlenb)
 496 {
 497     unsigned char* outstart = out;
 498     const unsigned char* processed = inb;
 499     unsigned char* outend = out + *outlen;
 500     unsigned short* in = (unsigned short*) inb;
 501     unsigned short* inend;
 502     unsigned int c, d, inlen;
 503     unsigned char *tmp;
 504     int bits;
 505
 506     if ((*inlenb % 2) == 1)
 507         (*inlenb)--;
 508     inlen = *inlenb / 2;
 509     inend = in + inlen;
 510     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 511         if (xmlLittleEndian) {
 512             c= *in++;
 513         } else {
 514             tmp = (unsigned char *) in;
 515             c = *tmp++;
 516             c = c | (((unsigned int)*tmp) << 8);
 517             in++;
 518         }
 519         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 520             if (in >= inend) {           /* (in > inend) shouldn't happens */
 521                 break;
 522             }
 523             if (xmlLittleEndian) {
 524                 d = *in++;
 525             } else {
 526                 tmp = (unsigned char *) in;
 527                 d = *tmp++;
 528                 d = d | (((unsigned int)*tmp) << 8);
 529                 in++;
 530             }
 531             if ((d & 0xFC00) == 0xDC00) {
 532                 c &= 0x03FF;
 533                 c <<= 10;
 534                 c |= d & 0x03FF;
 535                 c += 0x10000;
 536             }
 537             else {
 538                 *outlen = out - outstart;
 539                 *inlenb = processed - inb;
 540                 return(-2);
 541             }
 542         }
 543
 544         /* assertion: c is a single UTF-4 value */
 545         if (out >= outend)
 546             break;
 547         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 548         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 549         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 550         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 551
 552         for ( ; bits >= 0; bits-= 6) {
 553             if (out >= outend)
 554                 break;
 555             *out++= ((c >> bits) & 0x3F) | 0x80;
 556         }
 557         processed = (const unsigned char*) in;
 558     }
 559     *outlen = out - outstart;
 560     *inlenb = processed - inb;
 561     return(*outlen);
 562 }
 563
 564 #ifdef LIBXML_OUTPUT_ENABLED
 565 /**
 566  * UTF8ToUTF16LE:
 567  * @outb:  a pointer to an array of bytes to store the result
 568  * @outlen:  the length of @outb
 569  * @in:  a pointer to an array of UTF-8 chars
 570  * @inlen:  the length of @in
 571  *
 572  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
 573  * block of chars out.
 574  *
 575  * Returns the number of bytes written, or -1 if lack of space, or -2
 576  *     if the transcoding failed.
 577  */
 578 static int
 579 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
 580             const unsigned char* in, int *inlen)
 581 {
 582     unsigned short* out = (unsigned short*) outb;
 583     const unsigned char* processed = in;
 584     const unsigned char *const instart = in;
 585     unsigned short* outstart= out;
 586     unsigned short* outend;
 587     const unsigned char* inend;
 588     unsigned int c, d;
 589     int trailing;
 590     unsigned char *tmp;
 591     unsigned short tmp1, tmp2;
 592
 593     /* UTF16LE encoding has no BOM */
 594     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 595     if (in == NULL) {
 596         *outlen = 0;
 597         *inlen = 0;
 598         return(0);
 599     }
 600     inend= in + *inlen;
 601     outend = out + (*outlen / 2);
 602     while (in < inend) {
 603       d= *in++;
 604       if      (d < 0x80)  { c= d; trailing= 0; }
 605       else if (d < 0xC0) {
 606           /* trailing byte in leading position */
 607           *outlen = (out - outstart) * 2;
 608           *inlen = processed - instart;
 609           return(-2);
 610       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 611       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 612       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 613       else {
 614         /* no chance for this in UTF-16 */
 615         *outlen = (out - outstart) * 2;
 616         *inlen = processed - instart;
 617         return(-2);
 618       }
 619
 620       if (inend - in < trailing) {
 621           break;
 622       }
 623
 624       for ( ; trailing; trailing--) {
 625           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 626               break;
 627           c <<= 6;
 628           c |= d & 0x3F;
 629       }
 630
 631       /* assertion: c is a single UTF-4 value */
 632         if (c < 0x10000) {
 633             if (out >= outend)
 634                 break;
 635             if (xmlLittleEndian) {
 636                 *out++ = c;
 637             } else {
 638                 tmp = (unsigned char *) out;
 639                 *tmp = c ;
 640                 *(tmp + 1) = c >> 8 ;
 641                 out++;
 642             }
 643         }
 644         else if (c < 0x110000) {
 645             if (out+1 >= outend)
 646                 break;
 647             c -= 0x10000;
 648             if (xmlLittleEndian) {
 649                 *out++ = 0xD800 | (c >> 10);
 650                 *out++ = 0xDC00 | (c & 0x03FF);
 651             } else {
 652                 tmp1 = 0xD800 | (c >> 10);
 653                 tmp = (unsigned char *) out;
 654                 *tmp = (unsigned char) tmp1;
 655                 *(tmp + 1) = tmp1 >> 8;
 656                 out++;
 657
 658                 tmp2 = 0xDC00 | (c & 0x03FF);
 659                 tmp = (unsigned char *) out;
 660                 *tmp  = (unsigned char) tmp2;
 661                 *(tmp + 1) = tmp2 >> 8;
 662                 out++;
 663             }
 664         }
 665         else
 666             break;
 667         processed = in;
 668     }
 669     *outlen = (out - outstart) * 2;
 670     *inlen = processed - instart;
 671     return(*outlen);
 672 }
 673
 674 /**
 675  * UTF8ToUTF16:
 676  * @outb:  a pointer to an array of bytes to store the result
 677  * @outlen:  the length of @outb
 678  * @in:  a pointer to an array of UTF-8 chars
 679  * @inlen:  the length of @in
 680  *
 681  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
 682  * block of chars out.
 683  *
 684  * Returns the number of bytes written, or -1 if lack of space, or -2
 685  *     if the transcoding failed.
 686  */
 687 static int
 688 UTF8ToUTF16(unsigned char* outb, int *outlen,
 689             const unsigned char* in, int *inlen)
 690 {
 691     if (in == NULL) {
 692         /*
 693          * initialization, add the Byte Order Mark for UTF-16LE
 694          */
 695         if (*outlen >= 2) {
 696             outb[0] = 0xFF;
 697             outb[1] = 0xFE;
 698             *outlen = 2;
 699             *inlen = 0;
 700 #ifdef DEBUG_ENCODING
 701             xmlGenericError(xmlGenericErrorContext,
 702                     "Added FFFE Byte Order Mark\n");
 703 #endif
 704             return(2);
 705         }
 706         *outlen = 0;
 707         *inlen = 0;
 708         return(0);
 709     }
 710     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
 711 }
 712 #endif /* LIBXML_OUTPUT_ENABLED */
 713
 714 /**
 715  * UTF16BEToUTF8:
 716  * @out:  a pointer to an array of bytes to store the result
 717  * @outlen:  the length of @out
 718  * @inb:  a pointer to an array of UTF-16 passed as a byte array
 719  * @inlenb:  the length of @in in UTF-16 chars
 720  *
 721  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
 722  * block of chars out. This function assumes the endian property
 723  * is the same between the native type of this machine and the
 724  * inputed one.
 725  *
 726  * Returns the number of bytes written, or -1 if lack of space, or -2
 727  *     if the transcoding fails (if *in is not a valid utf16 string)
 728  * The value of *inlen after return is the number of octets consumed
 729  *     if the return value is positive, else unpredictable.
 730  */
 731 static int
 732 UTF16BEToUTF8(unsigned char* out, int *outlen,
 733             const unsigned char* inb, int *inlenb)
 734 {
 735     unsigned char* outstart = out;
 736     const unsigned char* processed = inb;
 737     unsigned char* outend = out + *outlen;
 738     unsigned short* in = (unsigned short*) inb;
 739     unsigned short* inend;
 740     unsigned int c, d, inlen;
 741     unsigned char *tmp;
 742     int bits;
 743
 744     if ((*inlenb % 2) == 1)
 745         (*inlenb)--;
 746     inlen = *inlenb / 2;
 747     inend= in + inlen;
 748     while (in < inend) {
 749         if (xmlLittleEndian) {
 750             tmp = (unsigned char *) in;
 751             c = *tmp++;
 752             c = c << 8;
 753             c = c | (unsigned int) *tmp;
 754             in++;
 755         } else {
 756             c= *in++;
 757         }
 758         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 759             if (in >= inend) {           /* (in > inend) shouldn't happens */
 760                 *outlen = out - outstart;
 761                 *inlenb = processed - inb;
 762                 return(-2);
 763             }
 764             if (xmlLittleEndian) {
 765                 tmp = (unsigned char *) in;
 766                 d = *tmp++;
 767                 d = d << 8;
 768                 d = d | (unsigned int) *tmp;
 769                 in++;
 770             } else {
 771                 d= *in++;
 772             }
 773             if ((d & 0xFC00) == 0xDC00) {
 774                 c &= 0x03FF;
 775                 c <<= 10;
 776                 c |= d & 0x03FF;
 777                 c += 0x10000;
 778             }
 779             else {
 780                 *outlen = out - outstart;
 781                 *inlenb = processed - inb;
 782                 return(-2);
 783             }
 784         }
 785
 786         /* assertion: c is a single UTF-4 value */
 787         if (out >= outend)
 788             break;
 789         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 790         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 791         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 792         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 793
 794         for ( ; bits >= 0; bits-= 6) {
 795             if (out >= outend)
 796                 break;
 797             *out++= ((c >> bits) & 0x3F) | 0x80;
 798         }
 799         processed = (const unsigned char*) in;
 800     }
 801     *outlen = out - outstart;
 802     *inlenb = processed - inb;
 803     return(*outlen);
 804 }
 805
 806 #ifdef LIBXML_OUTPUT_ENABLED
 807 /**
 808  * UTF8ToUTF16BE:
 809  * @outb:  a pointer to an array of bytes to store the result
 810  * @outlen:  the length of @outb
 811  * @in:  a pointer to an array of UTF-8 chars
 812  * @inlen:  the length of @in
 813  *
 814  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
 815  * block of chars out.
 816  *
 817  * Returns the number of byte written, or -1 by lack of space, or -2
 818  *     if the transcoding failed.
 819  */
 820 static int
 821 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
 822             const unsigned char* in, int *inlen)
 823 {
 824     unsigned short* out = (unsigned short*) outb;
 825     const unsigned char* processed = in;
 826     const unsigned char *const instart = in;
 827     unsigned short* outstart= out;
 828     unsigned short* outend;
 829     const unsigned char* inend;
 830     unsigned int c, d;
 831     int trailing;
 832     unsigned char *tmp;
 833     unsigned short tmp1, tmp2;
 834
 835     /* UTF-16BE has no BOM */
 836     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 837     if (in == NULL) {
 838         *outlen = 0;
 839         *inlen = 0;
 840         return(0);
 841     }
 842     inend= in + *inlen;
 843     outend = out + (*outlen / 2);
 844     while (in < inend) {
 845       d= *in++;
 846       if      (d < 0x80)  { c= d; trailing= 0; }
 847       else if (d < 0xC0)  {
 848           /* trailing byte in leading position */
 849           *outlen = out - outstart;
 850           *inlen = processed - instart;
 851           return(-2);
 852       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 853       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 854       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 855       else {
 856           /* no chance for this in UTF-16 */
 857           *outlen = out - outstart;
 858           *inlen = processed - instart;
 859           return(-2);
 860       }
 861
 862       if (inend - in < trailing) {
 863           break;
 864       }
 865
 866       for ( ; trailing; trailing--) {
 867           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
 868           c <<= 6;
 869           c |= d & 0x3F;
 870       }
 871
 872       /* assertion: c is a single UTF-4 value */
 873         if (c < 0x10000) {
 874             if (out >= outend)  break;
 875             if (xmlLittleEndian) {
 876                 tmp = (unsigned char *) out;
 877                 *tmp = c >> 8;
 878                 *(tmp + 1) = c;
 879                 out++;
 880             } else {
 881                 *out++ = c;
 882             }
 883         }
 884         else if (c < 0x110000) {
 885             if (out+1 >= outend)  break;
 886             c -= 0x10000;
 887             if (xmlLittleEndian) {
 888                 tmp1 = 0xD800 | (c >> 10);
 889                 tmp = (unsigned char *) out;
 890                 *tmp = tmp1 >> 8;
 891                 *(tmp + 1) = (unsigned char) tmp1;
 892                 out++;
 893
 894                 tmp2 = 0xDC00 | (c & 0x03FF);
 895                 tmp = (unsigned char *) out;
 896                 *tmp = tmp2 >> 8;
 897                 *(tmp + 1) = (unsigned char) tmp2;
 898                 out++;
 899             } else {
 900                 *out++ = 0xD800 | (c >> 10);
 901                 *out++ = 0xDC00 | (c & 0x03FF);
 902             }
 903         }
 904         else
 905             break;
 906         processed = in;
 907     }
 908     *outlen = (out - outstart) * 2;
 909     *inlen = processed - instart;
 910     return(*outlen);
 911 }
 912 #endif /* LIBXML_OUTPUT_ENABLED */
 913
 914 /************************************************************************
 915  *                                                                      *
 916  *              Generic encoding handling routines                      *
 917  *                                                                      *
 918  ************************************************************************/
 919
 920 /**
 921  * xmlDetectCharEncoding:
 922  * @in:  a pointer to the first bytes of the XML entity, must be at least
 923  *       2 bytes long (at least 4 if encoding is UTF4 variant).
 924  * @len:  pointer to the length of the buffer
 925  *
 926  * Guess the encoding of the entity using the first bytes of the entity content
 927  * according to the non-normative appendix F of the XML-1.0 recommendation.
 928  *
 929  * Returns one of the XML_CHAR_ENCODING_... values.
 930  */
 931 xmlCharEncoding
 932 xmlDetectCharEncoding(const unsigned char* in, int len)
 933 {
 934     if (in == NULL)
 935         return(XML_CHAR_ENCODING_NONE);
 936     if (len >= 4) {
 937         if ((in[0] == 0x00) && (in[1] == 0x00) &&
 938             (in[2] == 0x00) && (in[3] == 0x3C))
 939             return(XML_CHAR_ENCODING_UCS4BE);
 940         if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 941             (in[2] == 0x00) && (in[3] == 0x00))
 942             return(XML_CHAR_ENCODING_UCS4LE);
 943         if ((in[0] == 0x00) && (in[1] == 0x00) &&
 944             (in[2] == 0x3C) && (in[3] == 0x00))
 945             return(XML_CHAR_ENCODING_UCS4_2143);
 946         if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 947             (in[2] == 0x00) && (in[3] == 0x00))
 948             return(XML_CHAR_ENCODING_UCS4_3412);
 949         if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
 950             (in[2] == 0xA7) && (in[3] == 0x94))
 951             return(XML_CHAR_ENCODING_EBCDIC);
 952         if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
 953             (in[2] == 0x78) && (in[3] == 0x6D))
 954             return(XML_CHAR_ENCODING_UTF8);
 955         /*
 956          * Although not part of the recommendation, we also
 957          * attempt an "auto-recognition" of UTF-16LE and
 958          * UTF-16BE encodings.
 959          */
 960         if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 961             (in[2] == 0x3F) && (in[3] == 0x00))
 962             return(XML_CHAR_ENCODING_UTF16LE);
 963         if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 964             (in[2] == 0x00) && (in[3] == 0x3F))
 965             return(XML_CHAR_ENCODING_UTF16BE);
 966     }
 967     if (len >= 3) {
 968         /*
 969          * Errata on XML-1.0 June 20 2001
 970          * We now allow an UTF8 encoded BOM
 971          */
 972         if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
 973             (in[2] == 0xBF))
 974             return(XML_CHAR_ENCODING_UTF8);
 975     }
 976     /* For UTF-16 we can recognize by the BOM */
 977     if (len >= 2) {
 978         if ((in[0] == 0xFE) && (in[1] == 0xFF))
 979             return(XML_CHAR_ENCODING_UTF16BE);
 980         if ((in[0] == 0xFF) && (in[1] == 0xFE))
 981             return(XML_CHAR_ENCODING_UTF16LE);
 982     }
 983     return(XML_CHAR_ENCODING_NONE);
 984 }
 985
 986 /**
 987  * xmlCleanupEncodingAliases:
 988  *
 989  * Unregisters all aliases
 990  */
 991 void
 992 xmlCleanupEncodingAliases(void) {
 993     int i;
 994
 995     if (xmlCharEncodingAliases == NULL)
 996         return;
 997
 998     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
 999         if (xmlCharEncodingAliases[i].name != NULL)
1000             xmlFree((char *) xmlCharEncodingAliases[i].name);
1001         if (xmlCharEncodingAliases[i].alias != NULL)
1002             xmlFree((char *) xmlCharEncodingAliases[i].alias);
1003     }
1004     xmlCharEncodingAliasesNb = 0;
1005     xmlCharEncodingAliasesMax = 0;
1006     xmlFree(xmlCharEncodingAliases);
1007     xmlCharEncodingAliases = NULL;
1008 }
1009
1010 /**
1011  * xmlGetEncodingAlias:
1012  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1013  *
1014  * Lookup an encoding name for the given alias.
1015  *
1016  * Returns NULL if not found, otherwise the original name
1017  */
1018 const char *
1019 xmlGetEncodingAlias(const char *alias) {
1020     int i;
1021     char upper[100];
1022
1023     if (alias == NULL)
1024         return(NULL);
1025
1026     if (xmlCharEncodingAliases == NULL)
1027         return(NULL);
1028
1029     for (i = 0;i < 99;i++) {
1030         upper[i] = toupper(alias[i]);
1031         if (upper[i] == 0) break;
1032     }
1033     upper[i] = 0;
1034
1035     /*
1036      * Walk down the list looking for a definition of the alias
1037      */
1038     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1039         if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1040             return(xmlCharEncodingAliases[i].name);
1041         }
1042     }
1043     return(NULL);
1044 }
1045
1046 /**
1047  * xmlAddEncodingAlias:
1048  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1049  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1050  *
1051  * Registers an alias @alias for an encoding named @name. Existing alias
1052  * will be overwritten.
1053  *
1054  * Returns 0 in case of success, -1 in case of error
1055  */
1056 int
1057 xmlAddEncodingAlias(const char *name, const char *alias) {
1058     int i;
1059     char upper[100];
1060
1061     if ((name == NULL) || (alias == NULL))
1062         return(-1);
1063
1064     for (i = 0;i < 99;i++) {
1065         upper[i] = toupper(alias[i]);
1066         if (upper[i] == 0) break;
1067     }
1068     upper[i] = 0;
1069
1070     if (xmlCharEncodingAliases == NULL) {
1071         xmlCharEncodingAliasesNb = 0;
1072         xmlCharEncodingAliasesMax = 20;
1073         xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1074               xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1075         if (xmlCharEncodingAliases == NULL)
1076             return(-1);
1077     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1078         xmlCharEncodingAliasesMax *= 2;
1079         xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1080               xmlRealloc(xmlCharEncodingAliases,
1081                          xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1082     }
1083     /*
1084      * Walk down the list looking for a definition of the alias
1085      */
1086     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1087         if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1088             /*
1089              * Replace the definition.
1090              */
1091             xmlFree((char *) xmlCharEncodingAliases[i].name);
1092             xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1093             return(0);
1094         }
1095     }
1096     /*
1097      * Add the definition
1098      */
1099     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1100     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1101     xmlCharEncodingAliasesNb++;
1102     return(0);
1103 }
1104
1105 /**
1106  * xmlDelEncodingAlias:
1107  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1108  *
1109  * Unregisters an encoding alias @alias
1110  *
1111  * Returns 0 in case of success, -1 in case of error
1112  */
1113 int
1114 xmlDelEncodingAlias(const char *alias) {
1115     int i;
1116
1117     if (alias == NULL)
1118         return(-1);
1119
1120     if (xmlCharEncodingAliases == NULL)
1121         return(-1);
1122     /*
1123      * Walk down the list looking for a definition of the alias
1124      */
1125     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1126         if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1127             xmlFree((char *) xmlCharEncodingAliases[i].name);
1128             xmlFree((char *) xmlCharEncodingAliases[i].alias);
1129             xmlCharEncodingAliasesNb--;
1130             memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1131                     sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1132             return(0);
1133         }
1134     }
1135     return(-1);
1136 }
1137
1138 /**
1139  * xmlParseCharEncoding:
1140  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1141  *
1142  * Compare the string to the encoding schemes already known. Note
1143  * that the comparison is case insensitive accordingly to the section
1144  * [XML] 4.3.3 Character Encoding in Entities.
1145  *
1146  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1147  * if not recognized.
1148  */
1149 xmlCharEncoding
1150 xmlParseCharEncoding(const char* name)
1151 {
1152     const char *alias;
1153     char upper[500];
1154     int i;
1155
1156     if (name == NULL)
1157         return(XML_CHAR_ENCODING_NONE);
1158
1159     /*
1160      * Do the alias resolution
1161      */
1162     alias = xmlGetEncodingAlias(name);
1163     if (alias != NULL)
1164         name = alias;
1165
1166     for (i = 0;i < 499;i++) {
1167         upper[i] = toupper(name[i]);
1168         if (upper[i] == 0) break;
1169     }
1170     upper[i] = 0;
1171
1172     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1173     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1174     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1175
1176     /*
1177      * NOTE: if we were able to parse this, the endianness of UTF16 is
1178      *       already found and in use
1179      */
1180     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1181     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1182
1183     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1184     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1185     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1186
1187     /*
1188      * NOTE: if we were able to parse this, the endianness of UCS4 is
1189      *       already found and in use
1190      */
1191     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1192     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1193     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1194
1195
1196     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1197     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1198     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1199
1200     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1201     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1202     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1203
1204     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1205     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1206     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1207     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1208     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1209     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1210     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1211
1212     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1213     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1214     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1215
1216 #ifdef DEBUG_ENCODING
1217     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1218 #endif
1219     return(XML_CHAR_ENCODING_ERROR);
1220 }
1221
1222 /**
1223  * xmlGetCharEncodingName:
1224  * @enc:  the encoding
1225  *
1226  * The "canonical" name for XML encoding.
1227  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1228  * Section 4.3.3  Character Encoding in Entities
1229  *
1230  * Returns the canonical name for the given encoding
1231  */
1232
1233 const char*
1234 xmlGetCharEncodingName(xmlCharEncoding enc) {
1235     switch (enc) {
1236         case XML_CHAR_ENCODING_ERROR:
1237             return(NULL);
1238         case XML_CHAR_ENCODING_NONE:
1239             return(NULL);
1240         case XML_CHAR_ENCODING_UTF8:
1241             return("UTF-8");
1242         case XML_CHAR_ENCODING_UTF16LE:
1243             return("UTF-16");
1244         case XML_CHAR_ENCODING_UTF16BE:
1245             return("UTF-16");
1246         case XML_CHAR_ENCODING_EBCDIC:
1247             return("EBCDIC");
1248         case XML_CHAR_ENCODING_UCS4LE:
1249             return("ISO-10646-UCS-4");
1250         case XML_CHAR_ENCODING_UCS4BE:
1251             return("ISO-10646-UCS-4");
1252         case XML_CHAR_ENCODING_UCS4_2143:
1253             return("ISO-10646-UCS-4");
1254         case XML_CHAR_ENCODING_UCS4_3412:
1255             return("ISO-10646-UCS-4");
1256         case XML_CHAR_ENCODING_UCS2:
1257             return("ISO-10646-UCS-2");
1258         case XML_CHAR_ENCODING_8859_1:
1259             return("ISO-8859-1");
1260         case XML_CHAR_ENCODING_8859_2:
1261             return("ISO-8859-2");
1262         case XML_CHAR_ENCODING_8859_3:
1263             return("ISO-8859-3");
1264         case XML_CHAR_ENCODING_8859_4:
1265             return("ISO-8859-4");
1266         case XML_CHAR_ENCODING_8859_5:
1267             return("ISO-8859-5");
1268         case XML_CHAR_ENCODING_8859_6:
1269             return("ISO-8859-6");
1270         case XML_CHAR_ENCODING_8859_7:
1271             return("ISO-8859-7");
1272         case XML_CHAR_ENCODING_8859_8:
1273             return("ISO-8859-8");
1274         case XML_CHAR_ENCODING_8859_9:
1275             return("ISO-8859-9");
1276         case XML_CHAR_ENCODING_2022_JP:
1277             return("ISO-2022-JP");
1278         case XML_CHAR_ENCODING_SHIFT_JIS:
1279             return("Shift-JIS");
1280         case XML_CHAR_ENCODING_EUC_JP:
1281             return("EUC-JP");
1282         case XML_CHAR_ENCODING_ASCII:
1283             return(NULL);
1284     }
1285     return(NULL);
1286 }
1287
1288 /************************************************************************
1289  *                                                                      *
1290  *                      Char encoding handlers                          *
1291  *                                                                      *
1292  ************************************************************************/
1293
1294
1295 /* the size should be growable, but it's not a big deal ... */
1296 #define MAX_ENCODING_HANDLERS 50
1297 static xmlCharEncodingHandlerPtr *handlers = NULL;
1298 static int nbCharEncodingHandler = 0;
1299
1300 /*
1301  * The default is UTF-8 for XML, that's also the default used for the
1302  * parser internals, so the default encoding handler is NULL
1303  */
1304
1305 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1306
1307 /**
1308  * xmlNewCharEncodingHandler:
1309  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1310  * @input:  the xmlCharEncodingInputFunc to read that encoding
1311  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1312  *
1313  * Create and registers an xmlCharEncodingHandler.
1314  *
1315  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1316  */
1317 xmlCharEncodingHandlerPtr
1318 xmlNewCharEncodingHandler(const char *name,
1319                           xmlCharEncodingInputFunc input,
1320                           xmlCharEncodingOutputFunc output) {
1321     xmlCharEncodingHandlerPtr handler;
1322     const char *alias;
1323     char upper[500];
1324     int i;
1325     char *up = NULL;
1326
1327     /*
1328      * Do the alias resolution
1329      */
1330     alias = xmlGetEncodingAlias(name);
1331     if (alias != NULL)
1332         name = alias;
1333
1334     /*
1335      * Keep only the uppercase version of the encoding.
1336      */
1337     if (name == NULL) {
1338         xmlEncodingErr(XML_I18N_NO_NAME,
1339                        "xmlNewCharEncodingHandler : no name !\n", NULL);
1340         return(NULL);
1341     }
1342     for (i = 0;i < 499;i++) {
1343         upper[i] = toupper(name[i]);
1344         if (upper[i] == 0) break;
1345     }
1346     upper[i] = 0;
1347     up = xmlMemStrdup(upper);
1348     if (up == NULL) {
1349         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1350         return(NULL);
1351     }
1352
1353     /*
1354      * allocate and fill-up an handler block.
1355      */
1356     handler = (xmlCharEncodingHandlerPtr)
1357               xmlMalloc(sizeof(xmlCharEncodingHandler));
1358     if (handler == NULL) {
1359         xmlFree(up);
1360         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1361         return(NULL);
1362     }
1363     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1364     handler->input = input;
1365     handler->output = output;
1366     handler->name = up;
1367
1368 #ifdef LIBXML_ICONV_ENABLED
1369     handler->iconv_in = NULL;
1370     handler->iconv_out = NULL;
1371 #endif
1372 #ifdef LIBXML_ICU_ENABLED
1373     handler->uconv_in = NULL;
1374     handler->uconv_out = NULL;
1375 #endif
1376
1377     /*
1378      * registers and returns the handler.
1379      */
1380     xmlRegisterCharEncodingHandler(handler);
1381 #ifdef DEBUG_ENCODING
1382     xmlGenericError(xmlGenericErrorContext,
1383             "Registered encoding handler for %s\n", name);
1384 #endif
1385     return(handler);
1386 }
1387
1388 /**
1389  * xmlInitCharEncodingHandlers:
1390  *
1391  * Initialize the char encoding support, it registers the default
1392  * encoding supported.
1393  * NOTE: while public, this function usually doesn't need to be called
1394  *       in normal processing.
1395  */
1396 void
1397 xmlInitCharEncodingHandlers(void) {
1398     unsigned short int tst = 0x1234;
1399     unsigned char *ptr = (unsigned char *) &tst;
1400
1401     if (handlers != NULL) return;
1402
1403     handlers = (xmlCharEncodingHandlerPtr *)
1404         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1405
1406     if (*ptr == 0x12) xmlLittleEndian = 0;
1407     else if (*ptr == 0x34) xmlLittleEndian = 1;
1408     else {
1409         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1410                        "Odd problem at endianness detection\n", NULL);
1411     }
1412
1413     if (handlers == NULL) {
1414         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1415         return;
1416     }
1417     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1418 #ifdef LIBXML_OUTPUT_ENABLED
1419     xmlUTF16LEHandler =
1420           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1421     xmlUTF16BEHandler =
1422           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1423     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1424     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1425     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1426     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1427 #ifdef LIBXML_HTML_ENABLED
1428     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1429 #endif
1430 #else
1431     xmlUTF16LEHandler =
1432           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1433     xmlUTF16BEHandler =
1434           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1435     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1436     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1437     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1438     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1439 #endif /* LIBXML_OUTPUT_ENABLED */
1440 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1441 #ifdef LIBXML_ISO8859X_ENABLED
1442     xmlRegisterCharEncodingHandlersISO8859x ();
1443 #endif
1444 #endif
1445
1446 }
1447
1448 /**
1449  * xmlCleanupCharEncodingHandlers:
1450  *
1451  * Cleanup the memory allocated for the char encoding support, it
1452  * unregisters all the encoding handlers and the aliases.
1453  */
1454 void
1455 xmlCleanupCharEncodingHandlers(void) {
1456     xmlCleanupEncodingAliases();
1457
1458     if (handlers == NULL) return;
1459
1460     for (;nbCharEncodingHandler > 0;) {
1461         nbCharEncodingHandler--;
1462         if (handlers[nbCharEncodingHandler] != NULL) {
1463             if (handlers[nbCharEncodingHandler]->name != NULL)
1464                 xmlFree(handlers[nbCharEncodingHandler]->name);
1465             xmlFree(handlers[nbCharEncodingHandler]);
1466         }
1467     }
1468     xmlFree(handlers);
1469     handlers = NULL;
1470     nbCharEncodingHandler = 0;
1471     xmlDefaultCharEncodingHandler = NULL;
1472 }
1473
1474 /**
1475  * xmlRegisterCharEncodingHandler:
1476  * @handler:  the xmlCharEncodingHandlerPtr handler block
1477  *
1478  * Register the char encoding handler, surprising, isn't it ?
1479  */
1480 void
1481 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1482     if (handlers == NULL) xmlInitCharEncodingHandlers();
1483     if ((handler == NULL) || (handlers == NULL)) {
1484         xmlEncodingErr(XML_I18N_NO_HANDLER,
1485                 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1486         return;
1487     }
1488
1489     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1490         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1491         "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1492                        "MAX_ENCODING_HANDLERS");
1493         return;
1494     }
1495     handlers[nbCharEncodingHandler++] = handler;
1496 }
1497
1498 /**
1499  * xmlGetCharEncodingHandler:
1500  * @enc:  an xmlCharEncoding value.
1501  *
1502  * Search in the registered set the handler able to read/write that encoding.
1503  *
1504  * Returns the handler or NULL if not found
1505  */
1506 xmlCharEncodingHandlerPtr
1507 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1508     xmlCharEncodingHandlerPtr handler;
1509
1510     if (handlers == NULL) xmlInitCharEncodingHandlers();
1511     switch (enc) {
1512         case XML_CHAR_ENCODING_ERROR:
1513             return(NULL);
1514         case XML_CHAR_ENCODING_NONE:
1515             return(NULL);
1516         case XML_CHAR_ENCODING_UTF8:
1517             return(NULL);
1518         case XML_CHAR_ENCODING_UTF16LE:
1519             return(xmlUTF16LEHandler);
1520         case XML_CHAR_ENCODING_UTF16BE:
1521             return(xmlUTF16BEHandler);
1522         case XML_CHAR_ENCODING_EBCDIC:
1523             handler = xmlFindCharEncodingHandler("EBCDIC");
1524             if (handler != NULL) return(handler);
1525             handler = xmlFindCharEncodingHandler("ebcdic");
1526             if (handler != NULL) return(handler);
1527             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1528             if (handler != NULL) return(handler);
1529             handler = xmlFindCharEncodingHandler("IBM-037");
1530             if (handler != NULL) return(handler);
1531             break;
1532         case XML_CHAR_ENCODING_UCS4BE:
1533             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1534             if (handler != NULL) return(handler);
1535             handler = xmlFindCharEncodingHandler("UCS-4");
1536             if (handler != NULL) return(handler);
1537             handler = xmlFindCharEncodingHandler("UCS4");
1538             if (handler != NULL) return(handler);
1539             break;
1540         case XML_CHAR_ENCODING_UCS4LE:
1541             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1542             if (handler != NULL) return(handler);
1543             handler = xmlFindCharEncodingHandler("UCS-4");
1544             if (handler != NULL) return(handler);
1545             handler = xmlFindCharEncodingHandler("UCS4");
1546             if (handler != NULL) return(handler);
1547             break;
1548         case XML_CHAR_ENCODING_UCS4_2143:
1549             break;
1550         case XML_CHAR_ENCODING_UCS4_3412:
1551             break;
1552         case XML_CHAR_ENCODING_UCS2:
1553             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1554             if (handler != NULL) return(handler);
1555             handler = xmlFindCharEncodingHandler("UCS-2");
1556             if (handler != NULL) return(handler);
1557             handler = xmlFindCharEncodingHandler("UCS2");
1558             if (handler != NULL) return(handler);
1559             break;
1560
1561             /*
1562              * We used to keep ISO Latin encodings native in the
1563              * generated data. This led to so many problems that
1564              * this has been removed. One can still change this
1565              * back by registering no-ops encoders for those
1566              */
1567         case XML_CHAR_ENCODING_8859_1:
1568             handler = xmlFindCharEncodingHandler("ISO-8859-1");
1569             if (handler != NULL) return(handler);
1570             break;
1571         case XML_CHAR_ENCODING_8859_2:
1572             handler = xmlFindCharEncodingHandler("ISO-8859-2");
1573             if (handler != NULL) return(handler);
1574             break;
1575         case XML_CHAR_ENCODING_8859_3:
1576             handler = xmlFindCharEncodingHandler("ISO-8859-3");
1577             if (handler != NULL) return(handler);
1578             break;
1579         case XML_CHAR_ENCODING_8859_4:
1580             handler = xmlFindCharEncodingHandler("ISO-8859-4");
1581             if (handler != NULL) return(handler);
1582             break;
1583         case XML_CHAR_ENCODING_8859_5:
1584             handler = xmlFindCharEncodingHandler("ISO-8859-5");
1585             if (handler != NULL) return(handler);
1586             break;
1587         case XML_CHAR_ENCODING_8859_6:
1588             handler = xmlFindCharEncodingHandler("ISO-8859-6");
1589             if (handler != NULL) return(handler);
1590             break;
1591         case XML_CHAR_ENCODING_8859_7:
1592             handler = xmlFindCharEncodingHandler("ISO-8859-7");
1593             if (handler != NULL) return(handler);
1594             break;
1595         case XML_CHAR_ENCODING_8859_8:
1596             handler = xmlFindCharEncodingHandler("ISO-8859-8");
1597             if (handler != NULL) return(handler);
1598             break;
1599         case XML_CHAR_ENCODING_8859_9:
1600             handler = xmlFindCharEncodingHandler("ISO-8859-9");
1601             if (handler != NULL) return(handler);
1602             break;
1603
1604
1605         case XML_CHAR_ENCODING_2022_JP:
1606             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1607             if (handler != NULL) return(handler);
1608             break;
1609         case XML_CHAR_ENCODING_SHIFT_JIS:
1610             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1611             if (handler != NULL) return(handler);
1612             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1613             if (handler != NULL) return(handler);
1614             handler = xmlFindCharEncodingHandler("Shift_JIS");
1615             if (handler != NULL) return(handler);
1616             break;
1617         case XML_CHAR_ENCODING_EUC_JP:
1618             handler = xmlFindCharEncodingHandler("EUC-JP");
1619             if (handler != NULL) return(handler);
1620             break;
1621         default:
1622             break;
1623     }
1624
1625 #ifdef DEBUG_ENCODING
1626     xmlGenericError(xmlGenericErrorContext,
1627             "No handler found for encoding %d\n", enc);
1628 #endif
1629     return(NULL);
1630 }
1631
1632 /**
1633  * xmlFindCharEncodingHandler:
1634  * @name:  a string describing the char encoding.
1635  *
1636  * Search in the registered set the handler able to read/write that encoding.
1637  *
1638  * Returns the handler or NULL if not found
1639  */
1640 xmlCharEncodingHandlerPtr
1641 xmlFindCharEncodingHandler(const char *name) {
1642     const char *nalias;
1643     const char *norig;
1644     xmlCharEncoding alias;
1645 #ifdef LIBXML_ICONV_ENABLED
1646     xmlCharEncodingHandlerPtr enc;
1647     iconv_t icv_in, icv_out;
1648 #endif /* LIBXML_ICONV_ENABLED */
1649 #ifdef LIBXML_ICU_ENABLED
1650     xmlCharEncodingHandlerPtr encu;
1651     uconv_t *ucv_in, *ucv_out;
1652 #endif /* LIBXML_ICU_ENABLED */
1653     char upper[100];
1654     int i;
1655
1656     if (handlers == NULL) xmlInitCharEncodingHandlers();
1657     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1658     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1659
1660     /*
1661      * Do the alias resolution
1662      */
1663     norig = name;
1664     nalias = xmlGetEncodingAlias(name);
1665     if (nalias != NULL)
1666         name = nalias;
1667
1668     /*
1669      * Check first for directly registered encoding names
1670      */
1671     for (i = 0;i < 99;i++) {
1672         upper[i] = toupper(name[i]);
1673         if (upper[i] == 0) break;
1674     }
1675     upper[i] = 0;
1676
1677     if (handlers != NULL) {
1678         for (i = 0;i < nbCharEncodingHandler; i++) {
1679             if (!strcmp(upper, handlers[i]->name)) {
1680 #ifdef DEBUG_ENCODING
1681                 xmlGenericError(xmlGenericErrorContext,
1682                         "Found registered handler for encoding %s\n", name);
1683 #endif
1684                 return(handlers[i]);
1685             }
1686         }
1687     }
1688
1689 #ifdef LIBXML_ICONV_ENABLED
1690     /* check whether iconv can handle this */
1691     icv_in = iconv_open("UTF-8", name);
1692     icv_out = iconv_open(name, "UTF-8");
1693     if (icv_in == (iconv_t) -1) {
1694         icv_in = iconv_open("UTF-8", upper);
1695     }
1696     if (icv_out == (iconv_t) -1) {
1697         icv_out = iconv_open(upper, "UTF-8");
1698     }
1699     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1700             enc = (xmlCharEncodingHandlerPtr)
1701                   xmlMalloc(sizeof(xmlCharEncodingHandler));
1702             if (enc == NULL) {
1703                 iconv_close(icv_in);
1704                 iconv_close(icv_out);
1705                 return(NULL);
1706             }
1707             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1708             enc->name = xmlMemStrdup(name);
1709             enc->input = NULL;
1710             enc->output = NULL;
1711             enc->iconv_in = icv_in;
1712             enc->iconv_out = icv_out;
1713 #ifdef DEBUG_ENCODING
1714             xmlGenericError(xmlGenericErrorContext,
1715                     "Found iconv handler for encoding %s\n", name);
1716 #endif
1717             return enc;
1718     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1719             xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1720                     "iconv : problems with filters for '%s'\n", name);
1721     }
1722 #endif /* LIBXML_ICONV_ENABLED */
1723 #ifdef LIBXML_ICU_ENABLED
1724     /* check whether icu can handle this */
1725     ucv_in = openIcuConverter(name, 1);
1726     ucv_out = openIcuConverter(name, 0);
1727     if (ucv_in != NULL && ucv_out != NULL) {
1728             encu = (xmlCharEncodingHandlerPtr)
1729                    xmlMalloc(sizeof(xmlCharEncodingHandler));
1730             if (encu == NULL) {
1731                 closeIcuConverter(ucv_in);
1732                 closeIcuConverter(ucv_out);
1733                 return(NULL);
1734             }
1735             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1736             encu->name = xmlMemStrdup(name);
1737             encu->input = NULL;
1738             encu->output = NULL;
1739             encu->uconv_in = ucv_in;
1740             encu->uconv_out = ucv_out;
1741 #ifdef DEBUG_ENCODING
1742             xmlGenericError(xmlGenericErrorContext,
1743                     "Found ICU converter handler for encoding %s\n", name);
1744 #endif
1745             return encu;
1746     } else if (ucv_in != NULL || ucv_out != NULL) {
1747             closeIcuConverter(ucv_in);
1748             closeIcuConverter(ucv_out);
1749             xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1750                     "ICU converter : problems with filters for '%s'\n", name);
1751     }
1752 #endif /* LIBXML_ICU_ENABLED */
1753
1754 #ifdef DEBUG_ENCODING
1755     xmlGenericError(xmlGenericErrorContext,
1756             "No handler found for encoding %s\n", name);
1757 #endif
1758
1759     /*
1760      * Fallback using the canonical names
1761      */
1762     alias = xmlParseCharEncoding(norig);
1763     if (alias != XML_CHAR_ENCODING_ERROR) {
1764         const char* canon;
1765         canon = xmlGetCharEncodingName(alias);
1766         if ((canon != NULL) && (strcmp(name, canon))) {
1767             return(xmlFindCharEncodingHandler(canon));
1768         }
1769     }
1770
1771     /* If "none of the above", give up */
1772     return(NULL);
1773 }
1774
1775 /************************************************************************
1776  *                                                                      *
1777  *              ICONV based generic conversion functions                *
1778  *                                                                      *
1779  ************************************************************************/
1780
1781 #ifdef LIBXML_ICONV_ENABLED
1782 /**
1783  * xmlIconvWrapper:
1784  * @cd:         iconv converter data structure
1785  * @out:  a pointer to an array of bytes to store the result
1786  * @outlen:  the length of @out
1787  * @in:  a pointer to an array of ISO Latin 1 chars
1788  * @inlen:  the length of @in
1789  *
1790  * Returns 0 if success, or
1791  *     -1 by lack of space, or
1792  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1793  *        the result of transformation can't fit into the encoding we want), or
1794  *     -3 if there the last byte can't form a single output char.
1795  *
1796  * The value of @inlen after return is the number of octets consumed
1797  *     as the return value is positive, else unpredictable.
1798  * The value of @outlen after return is the number of ocetes consumed.
1799  */
1800 static int
1801 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1802                 const unsigned char *in, int *inlen) {
1803     size_t icv_inlen, icv_outlen;
1804     const char *icv_in = (const char *) in;
1805     char *icv_out = (char *) out;
1806     int ret;
1807
1808     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1809         if (outlen != NULL) *outlen = 0;
1810         return(-1);
1811     }
1812     icv_inlen = *inlen;
1813     icv_outlen = *outlen;
1814     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1815     *inlen -= icv_inlen;
1816     *outlen -= icv_outlen;
1817     if ((icv_inlen != 0) || (ret == -1)) {
1818 #ifdef EILSEQ
1819         if (errno == EILSEQ) {
1820             return -2;
1821         } else
1822 #endif
1823 #ifdef E2BIG
1824         if (errno == E2BIG) {
1825             return -1;
1826         } else
1827 #endif
1828 #ifdef EINVAL
1829         if (errno == EINVAL) {
1830             return -3;
1831         } else
1832 #endif
1833         {
1834             return -3;
1835         }
1836     }
1837     return 0;
1838 }
1839 #endif /* LIBXML_ICONV_ENABLED */
1840
1841 /************************************************************************
1842  *                                                                      *
1843  *              ICU based generic conversion functions          *
1844  *                                                                      *
1845  ************************************************************************/
1846
1847 #ifdef LIBXML_ICU_ENABLED
1848 /**
1849  * xmlUconvWrapper:
1850  * @cd: ICU uconverter data structure
1851  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1852  * @out:  a pointer to an array of bytes to store the result
1853  * @outlen:  the length of @out
1854  * @in:  a pointer to an array of ISO Latin 1 chars
1855  * @inlen:  the length of @in
1856  * @flush: if true, indicates end of input
1857  *
1858  * Returns 0 if success, or
1859  *     -1 by lack of space, or
1860  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1861  *        the result of transformation can't fit into the encoding we want), or
1862  *     -3 if there the last byte can't form a single output char.
1863  *
1864  * The value of @inlen after return is the number of octets consumed
1865  *     as the return value is positive, else unpredictable.
1866  * The value of @outlen after return is the number of ocetes consumed.
1867  */
1868 static int
1869 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1870                 const unsigned char *in, int *inlen, int flush) {
1871     const char *ucv_in = (const char *) in;
1872     char *ucv_out = (char *) out;
1873     UErrorCode err = U_ZERO_ERROR;
1874
1875     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1876         if (outlen != NULL) *outlen = 0;
1877         return(-1);
1878     }
1879
1880     if (toUnicode) {
1881         /* encoding => UTF-16 => UTF-8 */
1882         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1883                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1884                        &cd->pivot_source, &cd->pivot_target,
1885                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1886     } else {
1887         /* UTF-8 => UTF-16 => encoding */
1888         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1889                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1890                        &cd->pivot_source, &cd->pivot_target,
1891                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1892     }
1893     *inlen = ucv_in - (const char*) in;
1894     *outlen = ucv_out - (char *) out;
1895     if (U_SUCCESS(err)) {
1896         /* reset pivot buf if this is the last call for input (flush==TRUE) */
1897         if (flush)
1898             cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1899         return 0;
1900     }
1901     if (err == U_BUFFER_OVERFLOW_ERROR)
1902         return -1;
1903     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1904         return -2;
1905     return -3;
1906 }
1907 #endif /* LIBXML_ICU_ENABLED */
1908
1909 /************************************************************************
1910  *                                                                      *
1911  *              The real API used by libxml for on-the-fly conversion   *
1912  *                                                                      *
1913  ************************************************************************/
1914
1915 static int
1916 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1917                  int *outlen, const unsigned char *in, int *inlen, int flush) {
1918     int ret;
1919     (void)flush;
1920
1921     if (handler->input != NULL) {
1922         ret = handler->input(out, outlen, in, inlen);
1923     }
1924 #ifdef LIBXML_ICONV_ENABLED
1925     else if (handler->iconv_in != NULL) {
1926         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1927     }
1928 #endif /* LIBXML_ICONV_ENABLED */
1929 #ifdef LIBXML_ICU_ENABLED
1930     else if (handler->uconv_in != NULL) {
1931         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1932                               flush);
1933     }
1934 #endif /* LIBXML_ICU_ENABLED */
1935     else {
1936         *outlen = 0;
1937         *inlen = 0;
1938         ret = -2;
1939     }
1940
1941     return(ret);
1942 }
1943
1944 /* Returns -4 if no output function was found. */
1945 static int
1946 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1947                   int *outlen, const unsigned char *in, int *inlen) {
1948     int ret;
1949
1950     if (handler->output != NULL) {
1951         ret = handler->output(out, outlen, in, inlen);
1952     }
1953 #ifdef LIBXML_ICONV_ENABLED
1954     else if (handler->iconv_out != NULL) {
1955         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1956     }
1957 #endif /* LIBXML_ICONV_ENABLED */
1958 #ifdef LIBXML_ICU_ENABLED
1959     else if (handler->uconv_out != NULL) {
1960         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
1961                               TRUE);
1962     }
1963 #endif /* LIBXML_ICU_ENABLED */
1964     else {
1965         *outlen = 0;
1966         *inlen = 0;
1967         ret = -4;
1968     }
1969
1970     return(ret);
1971 }
1972
1973 /**
1974  * xmlCharEncFirstLineInt:
1975  * @handler:    char enconding transformation data structure
1976  * @out:  an xmlBuffer for the output.
1977  * @in:  an xmlBuffer for the input
1978  * @len:  number of bytes to convert for the first line, or -1
1979  *
1980  * Front-end for the encoding handler input function, but handle only
1981  * the very first line, i.e. limit itself to 45 chars.
1982  *
1983  * Returns the number of byte written if success, or
1984  *     -1 general error
1985  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1986  *        the result of transformation can't fit into the encoding we want), or
1987  */
1988 int
1989 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1990                        xmlBufferPtr in, int len) {
1991     int ret;
1992     int written;
1993     int toconv;
1994
1995     if (handler == NULL) return(-1);
1996     if (out == NULL) return(-1);
1997     if (in == NULL) return(-1);
1998
1999     /* calculate space available */
2000     written = out->size - out->use - 1; /* count '\0' */
2001     toconv = in->use;
2002     /*
2003      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2004      * 45 chars should be sufficient to reach the end of the encoding
2005      * declaration without going too far inside the document content.
2006      * on UTF-16 this means 90bytes, on UCS4 this means 180
2007      * The actual value depending on guessed encoding is passed as @len
2008      * if provided
2009      */
2010     if (len >= 0) {
2011         if (toconv > len)
2012             toconv = len;
2013     } else {
2014         if (toconv > 180)
2015             toconv = 180;
2016     }
2017     if (toconv * 2 >= written) {
2018         xmlBufferGrow(out, toconv * 2);
2019         written = out->size - out->use - 1;
2020     }
2021
2022     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2023                            in->content, &toconv, 0);
2024     xmlBufferShrink(in, toconv);
2025     out->use += written;
2026     out->content[out->use] = 0;
2027     if (ret == -1) ret = -3;
2028
2029 #ifdef DEBUG_ENCODING
2030     switch (ret) {
2031         case 0:
2032             xmlGenericError(xmlGenericErrorContext,
2033                     "converted %d bytes to %d bytes of input\n",
2034                     toconv, written);
2035             break;
2036         case -1:
2037             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2038                     toconv, written, in->use);
2039             break;
2040         case -2:
2041             xmlGenericError(xmlGenericErrorContext,
2042                     "input conversion failed due to input error\n");
2043             break;
2044         case -3:
2045             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2046                     toconv, written, in->use);
2047             break;
2048         default:
2049             xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2050     }
2051 #endif /* DEBUG_ENCODING */
2052     /*
2053      * Ignore when input buffer is not on a boundary
2054      */
2055     if (ret == -3) ret = 0;
2056     if (ret == -1) ret = 0;
2057     return(ret);
2058 }
2059
2060 /**
2061  * xmlCharEncFirstLine:
2062  * @handler:    char enconding transformation data structure
2063  * @out:  an xmlBuffer for the output.
2064  * @in:  an xmlBuffer for the input
2065  *
2066  * Front-end for the encoding handler input function, but handle only
2067  * the very first line, i.e. limit itself to 45 chars.
2068  *
2069  * Returns the number of byte written if success, or
2070  *     -1 general error
2071  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2072  *        the result of transformation can't fit into the encoding we want), or
2073  */
2074 int
2075 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2076                  xmlBufferPtr in) {
2077     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2078 }
2079
2080 /**
2081  * xmlCharEncFirstLineInput:
2082  * @input: a parser input buffer
2083  * @len:  number of bytes to convert for the first line, or -1
2084  *
2085  * Front-end for the encoding handler input function, but handle only
2086  * the very first line. Point is that this is based on autodetection
2087  * of the encoding and once that first line is converted we may find
2088  * out that a different decoder is needed to process the input.
2089  *
2090  * Returns the number of byte written if success, or
2091  *     -1 general error
2092  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2093  *        the result of transformation can't fit into the encoding we want), or
2094  */
2095 int
2096 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2097 {
2098     int ret;
2099     size_t written;
2100     size_t toconv;
2101     int c_in;
2102     int c_out;
2103     xmlBufPtr in;
2104     xmlBufPtr out;
2105
2106     if ((input == NULL) || (input->encoder == NULL) ||
2107         (input->buffer == NULL) || (input->raw == NULL))
2108         return (-1);
2109     out = input->buffer;
2110     in = input->raw;
2111
2112     toconv = xmlBufUse(in);
2113     if (toconv == 0)
2114         return (0);
2115     written = xmlBufAvail(out) - 1; /* count '\0' */
2116     /*
2117      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2118      * 45 chars should be sufficient to reach the end of the encoding
2119      * declaration without going too far inside the document content.
2120      * on UTF-16 this means 90bytes, on UCS4 this means 180
2121      * The actual value depending on guessed encoding is passed as @len
2122      * if provided
2123      */
2124     if (len >= 0) {
2125         if (toconv > (unsigned int) len)
2126             toconv = len;
2127     } else {
2128         if (toconv > 180)
2129             toconv = 180;
2130     }
2131     if (toconv * 2 >= written) {
2132         xmlBufGrow(out, toconv * 2);
2133         written = xmlBufAvail(out) - 1;
2134     }
2135     if (written > 360)
2136         written = 360;
2137
2138     c_in = toconv;
2139     c_out = written;
2140     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2141                            xmlBufContent(in), &c_in, 0);
2142     xmlBufShrink(in, c_in);
2143     xmlBufAddLen(out, c_out);
2144     if (ret == -1)
2145         ret = -3;
2146
2147     switch (ret) {
2148         case 0:
2149 #ifdef DEBUG_ENCODING
2150             xmlGenericError(xmlGenericErrorContext,
2151                             "converted %d bytes to %d bytes of input\n",
2152                             c_in, c_out);
2153 #endif
2154             break;
2155         case -1:
2156 #ifdef DEBUG_ENCODING
2157             xmlGenericError(xmlGenericErrorContext,
2158                          "converted %d bytes to %d bytes of input, %d left\n",
2159                             c_in, c_out, (int)xmlBufUse(in));
2160 #endif
2161             break;
2162         case -3:
2163 #ifdef DEBUG_ENCODING
2164             xmlGenericError(xmlGenericErrorContext,
2165                         "converted %d bytes to %d bytes of input, %d left\n",
2166                             c_in, c_out, (int)xmlBufUse(in));
2167 #endif
2168             break;
2169         case -2: {
2170             char buf[50];
2171             const xmlChar *content = xmlBufContent(in);
2172
2173             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2174                      content[0], content[1],
2175                      content[2], content[3]);
2176             buf[49] = 0;
2177             xmlEncodingErr(XML_I18N_CONV_FAILED,
2178                     "input conversion failed due to input error, bytes %s\n",
2179                            buf);
2180         }
2181     }
2182     /*
2183      * Ignore when input buffer is not on a boundary
2184      */
2185     if (ret == -3) ret = 0;
2186     if (ret == -1) ret = 0;
2187     return(ret);
2188 }
2189
2190 /**
2191  * xmlCharEncInput:
2192  * @input: a parser input buffer
2193  * @flush: try to flush all the raw buffer
2194  *
2195  * Generic front-end for the encoding handler on parser input
2196  *
2197  * Returns the number of byte written if success, or
2198  *     -1 general error
2199  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2200  *        the result of transformation can't fit into the encoding we want), or
2201  */
2202 int
2203 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2204 {
2205     int ret;
2206     size_t written;
2207     size_t toconv;
2208     int c_in;
2209     int c_out;
2210     xmlBufPtr in;
2211     xmlBufPtr out;
2212
2213     if ((input == NULL) || (input->encoder == NULL) ||
2214         (input->buffer == NULL) || (input->raw == NULL))
2215         return (-1);
2216     out = input->buffer;
2217     in = input->raw;
2218
2219     toconv = xmlBufUse(in);
2220     if (toconv == 0)
2221         return (0);
2222     if ((toconv > 64 * 1024) && (flush == 0))
2223         toconv = 64 * 1024;
2224     written = xmlBufAvail(out);
2225     if (written > 0)
2226         written--; /* count '\0' */
2227     if (toconv * 2 >= written) {
2228         xmlBufGrow(out, toconv * 2);
2229         written = xmlBufAvail(out);
2230         if (written > 0)
2231             written--; /* count '\0' */
2232     }
2233     if ((written > 128 * 1024) && (flush == 0))
2234         written = 128 * 1024;
2235
2236     c_in = toconv;
2237     c_out = written;
2238     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2239                            xmlBufContent(in), &c_in, flush);
2240     xmlBufShrink(in, c_in);
2241     xmlBufAddLen(out, c_out);
2242     if (ret == -1)
2243         ret = -3;
2244
2245     switch (ret) {
2246         case 0:
2247 #ifdef DEBUG_ENCODING
2248             xmlGenericError(xmlGenericErrorContext,
2249                             "converted %d bytes to %d bytes of input\n",
2250                             c_in, c_out);
2251 #endif
2252             break;
2253         case -1:
2254 #ifdef DEBUG_ENCODING
2255             xmlGenericError(xmlGenericErrorContext,
2256                          "converted %d bytes to %d bytes of input, %d left\n",
2257                             c_in, c_out, (int)xmlBufUse(in));
2258 #endif
2259             break;
2260         case -3:
2261 #ifdef DEBUG_ENCODING
2262             xmlGenericError(xmlGenericErrorContext,
2263                         "converted %d bytes to %d bytes of input, %d left\n",
2264                             c_in, c_out, (int)xmlBufUse(in));
2265 #endif
2266             break;
2267         case -2: {
2268             char buf[50];
2269             const xmlChar *content = xmlBufContent(in);
2270
2271             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2272                      content[0], content[1],
2273                      content[2], content[3]);
2274             buf[49] = 0;
2275             xmlEncodingErr(XML_I18N_CONV_FAILED,
2276                     "input conversion failed due to input error, bytes %s\n",
2277                            buf);
2278         }
2279     }
2280     /*
2281      * Ignore when input buffer is not on a boundary
2282      */
2283     if (ret == -3)
2284         ret = 0;
2285     return (c_out? c_out : ret);
2286 }
2287
2288 /**
2289  * xmlCharEncInFunc:
2290  * @handler:    char encoding transformation data structure
2291  * @out:  an xmlBuffer for the output.
2292  * @in:  an xmlBuffer for the input
2293  *
2294  * Generic front-end for the encoding handler input function
2295  *
2296  * Returns the number of byte written if success, or
2297  *     -1 general error
2298  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2299  *        the result of transformation can't fit into the encoding we want), or
2300  */
2301 int
2302 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2303                  xmlBufferPtr in)
2304 {
2305     int ret;
2306     int written;
2307     int toconv;
2308
2309     if (handler == NULL)
2310         return (-1);
2311     if (out == NULL)
2312         return (-1);
2313     if (in == NULL)
2314         return (-1);
2315
2316     toconv = in->use;
2317     if (toconv == 0)
2318         return (0);
2319     written = out->size - out->use -1; /* count '\0' */
2320     if (toconv * 2 >= written) {
2321         xmlBufferGrow(out, out->size + toconv * 2);
2322         written = out->size - out->use - 1;
2323     }
2324     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2325                            in->content, &toconv, 1);
2326     xmlBufferShrink(in, toconv);
2327     out->use += written;
2328     out->content[out->use] = 0;
2329     if (ret == -1)
2330         ret = -3;
2331
2332     switch (ret) {
2333         case 0:
2334 #ifdef DEBUG_ENCODING
2335             xmlGenericError(xmlGenericErrorContext,
2336                             "converted %d bytes to %d bytes of input\n",
2337                             toconv, written);
2338 #endif
2339             break;
2340         case -1:
2341 #ifdef DEBUG_ENCODING
2342             xmlGenericError(xmlGenericErrorContext,
2343                          "converted %d bytes to %d bytes of input, %d left\n",
2344                             toconv, written, in->use);
2345 #endif
2346             break;
2347         case -3:
2348 #ifdef DEBUG_ENCODING
2349             xmlGenericError(xmlGenericErrorContext,
2350                         "converted %d bytes to %d bytes of input, %d left\n",
2351                             toconv, written, in->use);
2352 #endif
2353             break;
2354         case -2: {
2355             char buf[50];
2356
2357             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2358                      in->content[0], in->content[1],
2359                      in->content[2], in->content[3]);
2360             buf[49] = 0;
2361             xmlEncodingErr(XML_I18N_CONV_FAILED,
2362                     "input conversion failed due to input error, bytes %s\n",
2363                            buf);
2364         }
2365     }
2366     /*
2367      * Ignore when input buffer is not on a boundary
2368      */
2369     if (ret == -3)
2370         ret = 0;
2371     return (written? written : ret);
2372 }
2373
2374 #ifdef LIBXML_OUTPUT_ENABLED
2375 /**
2376  * xmlCharEncOutput:
2377  * @output: a parser output buffer
2378  * @init: is this an initialization call without data
2379  *
2380  * Generic front-end for the encoding handler on parser output
2381  * a first call with @init == 1 has to be made first to initiate the
2382  * output in case of non-stateless encoding needing to initiate their
2383  * state or the output (like the BOM in UTF16).
2384  * In case of UTF8 sequence conversion errors for the given encoder,
2385  * the content will be automatically remapped to a CharRef sequence.
2386  *
2387  * Returns the number of byte written if success, or
2388  *     -1 general error
2389  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2390  *        the result of transformation can't fit into the encoding we want), or
2391  */
2392 int
2393 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2394 {
2395     int ret;
2396     size_t written;
2397     size_t writtentot = 0;
2398     size_t toconv;
2399     int c_in;
2400     int c_out;
2401     xmlBufPtr in;
2402     xmlBufPtr out;
2403
2404     if ((output == NULL) || (output->encoder == NULL) ||
2405         (output->buffer == NULL) || (output->conv == NULL))
2406         return (-1);
2407     out = output->conv;
2408     in = output->buffer;
2409
2410 retry:
2411
2412     written = xmlBufAvail(out);
2413     if (written > 0)
2414         written--; /* count '\0' */
2415
2416     /*
2417      * First specific handling of the initialization call
2418      */
2419     if (init) {
2420         c_in = 0;
2421         c_out = written;
2422         /* TODO: Check return value. */
2423         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2424                           NULL, &c_in);
2425         xmlBufAddLen(out, c_out);
2426 #ifdef DEBUG_ENCODING
2427         xmlGenericError(xmlGenericErrorContext,
2428                 "initialized encoder\n");
2429 #endif
2430         return(0);
2431     }
2432
2433     /*
2434      * Conversion itself.
2435      */
2436     toconv = xmlBufUse(in);
2437     if (toconv == 0)
2438         return (0);
2439     if (toconv > 64 * 1024)
2440         toconv = 64 * 1024;
2441     if (toconv * 4 >= written) {
2442         xmlBufGrow(out, toconv * 4);
2443         written = xmlBufAvail(out) - 1;
2444     }
2445     if (written > 256 * 1024)
2446         written = 256 * 1024;
2447
2448     c_in = toconv;
2449     c_out = written;
2450     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2451                             xmlBufContent(in), &c_in);
2452     xmlBufShrink(in, c_in);
2453     xmlBufAddLen(out, c_out);
2454     writtentot += c_out;
2455     if (ret == -1) {
2456         if (c_out > 0) {
2457             /* Can be a limitation of iconv or uconv */
2458             goto retry;
2459         }
2460         ret = -3;
2461     }
2462
2463     /*
2464      * Attempt to handle error cases
2465      */
2466     switch (ret) {
2467         case 0:
2468 #ifdef DEBUG_ENCODING
2469             xmlGenericError(xmlGenericErrorContext,
2470                     "converted %d bytes to %d bytes of output\n",
2471                     c_in, c_out);
2472 #endif
2473             break;
2474         case -1:
2475 #ifdef DEBUG_ENCODING
2476             xmlGenericError(xmlGenericErrorContext,
2477                     "output conversion failed by lack of space\n");
2478 #endif
2479             break;
2480         case -3:
2481 #ifdef DEBUG_ENCODING
2482             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2483                     c_in, c_out, (int) xmlBufUse(in));
2484 #endif
2485             break;
2486         case -4:
2487             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2488                            "xmlCharEncOutFunc: no output function !\n", NULL);
2489             ret = -1;
2490             break;
2491         case -2: {
2492             xmlChar charref[20];
2493             int len = (int) xmlBufUse(in);
2494             xmlChar *content = xmlBufContent(in);
2495             int cur, charrefLen;
2496
2497             cur = xmlGetUTF8Char(content, &len);
2498             if (cur <= 0)
2499                 break;
2500
2501 #ifdef DEBUG_ENCODING
2502             xmlGenericError(xmlGenericErrorContext,
2503                     "handling output conversion error\n");
2504             xmlGenericError(xmlGenericErrorContext,
2505                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2506                     content[0], content[1],
2507                     content[2], content[3]);
2508 #endif
2509             /*
2510              * Removes the UTF8 sequence, and replace it by a charref
2511              * and continue the transcoding phase, hoping the error
2512              * did not mangle the encoder state.
2513              */
2514             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2515                              "&#%d;", cur);
2516             xmlBufShrink(in, len);
2517             xmlBufGrow(out, charrefLen * 4);
2518             c_out = xmlBufAvail(out) - 1;
2519             c_in = charrefLen;
2520             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2521                                     charref, &c_in);
2522
2523             if ((ret < 0) || (c_in != charrefLen)) {
2524                 char buf[50];
2525
2526                 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2527                          content[0], content[1],
2528                          content[2], content[3]);
2529                 buf[49] = 0;
2530                 xmlEncodingErr(XML_I18N_CONV_FAILED,
2531                     "output conversion failed due to conv error, bytes %s\n",
2532                                buf);
2533                 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2534                     content[0] = ' ';
2535                 break;
2536             }
2537
2538             xmlBufAddLen(out, c_out);
2539             writtentot += c_out;
2540             goto retry;
2541         }
2542     }
2543     return(ret);
2544 }
2545 #endif
2546
2547 /**
2548  * xmlCharEncOutFunc:
2549  * @handler:    char enconding transformation data structure
2550  * @out:  an xmlBuffer for the output.
2551  * @in:  an xmlBuffer for the input
2552  *
2553  * Generic front-end for the encoding handler output function
2554  * a first call with @in == NULL has to be made firs to initiate the
2555  * output in case of non-stateless encoding needing to initiate their
2556  * state or the output (like the BOM in UTF16).
2557  * In case of UTF8 sequence conversion errors for the given encoder,
2558  * the content will be automatically remapped to a CharRef sequence.
2559  *
2560  * Returns the number of byte written if success, or
2561  *     -1 general error
2562  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2563  *        the result of transformation can't fit into the encoding we want), or
2564  */
2565 int
2566 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2567                   xmlBufferPtr in) {
2568     int ret;
2569     int written;
2570     int writtentot = 0;
2571     int toconv;
2572     int output = 0;
2573
2574     if (handler == NULL) return(-1);
2575     if (out == NULL) return(-1);
2576
2577 retry:
2578
2579     written = out->size - out->use;
2580
2581     if (written > 0)
2582         written--; /* Gennady: count '/0' */
2583
2584     /*
2585      * First specific handling of in = NULL, i.e. the initialization call
2586      */
2587     if (in == NULL) {
2588         toconv = 0;
2589         /* TODO: Check return value. */
2590         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2591                           NULL, &toconv);
2592         out->use += written;
2593         out->content[out->use] = 0;
2594 #ifdef DEBUG_ENCODING
2595         xmlGenericError(xmlGenericErrorContext,
2596                 "initialized encoder\n");
2597 #endif
2598         return(0);
2599     }
2600
2601     /*
2602      * Conversion itself.
2603      */
2604     toconv = in->use;
2605     if (toconv == 0)
2606         return(0);
2607     if (toconv * 4 >= written) {
2608         xmlBufferGrow(out, toconv * 4);
2609         written = out->size - out->use - 1;
2610     }
2611     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2612                             in->content, &toconv);
2613     xmlBufferShrink(in, toconv);
2614     out->use += written;
2615     writtentot += written;
2616     out->content[out->use] = 0;
2617     if (ret == -1) {
2618         if (written > 0) {
2619             /* Can be a limitation of iconv or uconv */
2620             goto retry;
2621         }
2622         ret = -3;
2623     }
2624
2625     if (ret >= 0) output += ret;
2626
2627     /*
2628      * Attempt to handle error cases
2629      */
2630     switch (ret) {
2631         case 0:
2632 #ifdef DEBUG_ENCODING
2633             xmlGenericError(xmlGenericErrorContext,
2634                     "converted %d bytes to %d bytes of output\n",
2635                     toconv, written);
2636 #endif
2637             break;
2638         case -1:
2639 #ifdef DEBUG_ENCODING
2640             xmlGenericError(xmlGenericErrorContext,
2641                     "output conversion failed by lack of space\n");
2642 #endif
2643             break;
2644         case -3:
2645 #ifdef DEBUG_ENCODING
2646             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2647                     toconv, written, in->use);
2648 #endif
2649             break;
2650         case -4:
2651             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2652                            "xmlCharEncOutFunc: no output function !\n", NULL);
2653             ret = -1;
2654             break;
2655         case -2: {
2656             xmlChar charref[20];
2657             int len = in->use;
2658             const xmlChar *utf = (const xmlChar *) in->content;
2659             int cur, charrefLen;
2660
2661             cur = xmlGetUTF8Char(utf, &len);
2662             if (cur <= 0)
2663                 break;
2664
2665 #ifdef DEBUG_ENCODING
2666             xmlGenericError(xmlGenericErrorContext,
2667                     "handling output conversion error\n");
2668             xmlGenericError(xmlGenericErrorContext,
2669                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2670                     in->content[0], in->content[1],
2671                     in->content[2], in->content[3]);
2672 #endif
2673             /*
2674              * Removes the UTF8 sequence, and replace it by a charref
2675              * and continue the transcoding phase, hoping the error
2676              * did not mangle the encoder state.
2677              */
2678             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2679                              "&#%d;", cur);
2680             xmlBufferShrink(in, len);
2681             xmlBufferGrow(out, charrefLen * 4);
2682             written = out->size - out->use - 1;
2683             toconv = charrefLen;
2684             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2685                                     charref, &toconv);
2686
2687             if ((ret < 0) || (toconv != charrefLen)) {
2688                 char buf[50];
2689
2690                 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2691                          in->content[0], in->content[1],
2692                          in->content[2], in->content[3]);
2693                 buf[49] = 0;
2694                 xmlEncodingErr(XML_I18N_CONV_FAILED,
2695                     "output conversion failed due to conv error, bytes %s\n",
2696                                buf);
2697                 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2698                     in->content[0] = ' ';
2699                 break;
2700             }
2701
2702             out->use += written;
2703             writtentot += written;
2704             out->content[out->use] = 0;
2705             goto retry;
2706         }
2707     }
2708     return(ret);
2709 }
2710
2711 /**
2712  * xmlCharEncCloseFunc:
2713  * @handler:    char enconding transformation data structure
2714  *
2715  * Generic front-end for encoding handler close function
2716  *
2717  * Returns 0 if success, or -1 in case of error
2718  */
2719 int
2720 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2721     int ret = 0;
2722     int tofree = 0;
2723     int i, handler_in_list = 0;
2724
2725     if (handler == NULL) return(-1);
2726     if (handler->name == NULL) return(-1);
2727     if (handlers != NULL) {
2728         for (i = 0;i < nbCharEncodingHandler; i++) {
2729             if (handler == handlers[i]) {
2730                 handler_in_list = 1;
2731                 break;
2732             }
2733         }
2734     }
2735 #ifdef LIBXML_ICONV_ENABLED
2736     /*
2737      * Iconv handlers can be used only once, free the whole block.
2738      * and the associated icon resources.
2739      */
2740     if ((handler_in_list == 0) &&
2741         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2742         tofree = 1;
2743         if (handler->iconv_out != NULL) {
2744             if (iconv_close(handler->iconv_out))
2745                 ret = -1;
2746             handler->iconv_out = NULL;
2747         }
2748         if (handler->iconv_in != NULL) {
2749             if (iconv_close(handler->iconv_in))
2750                 ret = -1;
2751             handler->iconv_in = NULL;
2752         }
2753     }
2754 #endif /* LIBXML_ICONV_ENABLED */
2755 #ifdef LIBXML_ICU_ENABLED
2756     if ((handler_in_list == 0) &&
2757         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2758         tofree = 1;
2759         if (handler->uconv_out != NULL) {
2760             closeIcuConverter(handler->uconv_out);
2761             handler->uconv_out = NULL;
2762         }
2763         if (handler->uconv_in != NULL) {
2764             closeIcuConverter(handler->uconv_in);
2765             handler->uconv_in = NULL;
2766         }
2767     }
2768 #endif
2769     if (tofree) {
2770         /* free up only dynamic handlers iconv/uconv */
2771         if (handler->name != NULL)
2772             xmlFree(handler->name);
2773         handler->name = NULL;
2774         xmlFree(handler);
2775     }
2776 #ifdef DEBUG_ENCODING
2777     if (ret)
2778         xmlGenericError(xmlGenericErrorContext,
2779                 "failed to close the encoding handler\n");
2780     else
2781         xmlGenericError(xmlGenericErrorContext,
2782                 "closed the encoding handler\n");
2783 #endif
2784
2785     return(ret);
2786 }
2787
2788 /**
2789  * xmlByteConsumed:
2790  * @ctxt: an XML parser context
2791  *
2792  * This function provides the current index of the parser relative
2793  * to the start of the current entity. This function is computed in
2794  * bytes from the beginning starting at zero and finishing at the
2795  * size in byte of the file if parsing a file. The function is
2796  * of constant cost if the input is UTF-8 but can be costly if run
2797  * on non-UTF-8 input.
2798  *
2799  * Returns the index in bytes from the beginning of the entity or -1
2800  *         in case the index could not be computed.
2801  */
2802 long
2803 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2804     xmlParserInputPtr in;
2805
2806     if (ctxt == NULL) return(-1);
2807     in = ctxt->input;
2808     if (in == NULL)  return(-1);
2809     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2810         unsigned int unused = 0;
2811         xmlCharEncodingHandler * handler = in->buf->encoder;
2812         /*
2813          * Encoding conversion, compute the number of unused original
2814          * bytes from the input not consumed and substract that from
2815          * the raw consumed value, this is not a cheap operation
2816          */
2817         if (in->end - in->cur > 0) {
2818             unsigned char convbuf[32000];
2819             const unsigned char *cur = (const unsigned char *)in->cur;
2820             int toconv = in->end - in->cur, written = 32000;
2821
2822             int ret;
2823
2824             do {
2825                 toconv = in->end - cur;
2826                 written = 32000;
2827                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2828                                         cur, &toconv);
2829                 if (ret < 0) {
2830                     if (written > 0)
2831                         ret = -2;
2832                     else
2833                         return(-1);
2834                 }
2835                 unused += written;
2836                 cur += toconv;
2837             } while (ret == -2);
2838         }
2839         if (in->buf->rawconsumed < unused)
2840             return(-1);
2841         return(in->buf->rawconsumed - unused);
2842     }
2843     return(in->consumed + (in->cur - in->base));
2844 }
2845
2846 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2847 #ifdef LIBXML_ISO8859X_ENABLED
2848
2849 /**
2850  * UTF8ToISO8859x:
2851  * @out:  a pointer to an array of bytes to store the result
2852  * @outlen:  the length of @out
2853  * @in:  a pointer to an array of UTF-8 chars
2854  * @inlen:  the length of @in
2855  * @xlattable: the 2-level transcoding table
2856  *
2857  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2858  * block of chars out.
2859  *
2860  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2861  * The value of @inlen after return is the number of octets consumed
2862  *     as the return value is positive, else unpredictable.
2863  * The value of @outlen after return is the number of ocetes consumed.
2864  */
2865 static int
2866 UTF8ToISO8859x(unsigned char* out, int *outlen,
2867               const unsigned char* in, int *inlen,
2868               unsigned char const *xlattable) {
2869     const unsigned char* outstart = out;
2870     const unsigned char* inend;
2871     const unsigned char* instart = in;
2872     const unsigned char* processed = in;
2873
2874     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2875         (xlattable == NULL))
2876         return(-1);
2877     if (in == NULL) {
2878         /*
2879         * initialization nothing to do
2880         */
2881         *outlen = 0;
2882         *inlen = 0;
2883         return(0);
2884     }
2885     inend = in + (*inlen);
2886     while (in < inend) {
2887         unsigned char d = *in++;
2888         if  (d < 0x80)  {
2889             *out++ = d;
2890         } else if (d < 0xC0) {
2891             /* trailing byte in leading position */
2892             *outlen = out - outstart;
2893             *inlen = processed - instart;
2894             return(-2);
2895         } else if (d < 0xE0) {
2896             unsigned char c;
2897             if (!(in < inend)) {
2898                 /* trailing byte not in input buffer */
2899                 *outlen = out - outstart;
2900                 *inlen = processed - instart;
2901                 return(-3);
2902             }
2903             c = *in++;
2904             if ((c & 0xC0) != 0x80) {
2905                 /* not a trailing byte */
2906                 *outlen = out - outstart;
2907                 *inlen = processed - instart;
2908                 return(-2);
2909             }
2910             c = c & 0x3F;
2911             d = d & 0x1F;
2912             d = xlattable [48 + c + xlattable [d] * 64];
2913             if (d == 0) {
2914                 /* not in character set */
2915                 *outlen = out - outstart;
2916                 *inlen = processed - instart;
2917                 return(-2);
2918             }
2919             *out++ = d;
2920         } else if (d < 0xF0) {
2921             unsigned char c1;
2922             unsigned char c2;
2923             if (!(in < inend - 1)) {
2924                 /* trailing bytes not in input buffer */
2925                 *outlen = out - outstart;
2926                 *inlen = processed - instart;
2927                 return(-3);
2928             }
2929             c1 = *in++;
2930             if ((c1 & 0xC0) != 0x80) {
2931                 /* not a trailing byte (c1) */
2932                 *outlen = out - outstart;
2933                 *inlen = processed - instart;
2934                 return(-2);
2935             }
2936             c2 = *in++;
2937             if ((c2 & 0xC0) != 0x80) {
2938                 /* not a trailing byte (c2) */
2939                 *outlen = out - outstart;
2940                 *inlen = processed - instart;
2941                 return(-2);
2942             }
2943             c1 = c1 & 0x3F;
2944             c2 = c2 & 0x3F;
2945             d = d & 0x0F;
2946             d = xlattable [48 + c2 + xlattable [48 + c1 +
2947                         xlattable [32 + d] * 64] * 64];
2948             if (d == 0) {
2949                 /* not in character set */
2950                 *outlen = out - outstart;
2951                 *inlen = processed - instart;
2952                 return(-2);
2953             }
2954             *out++ = d;
2955         } else {
2956             /* cannot transcode >= U+010000 */
2957             *outlen = out - outstart;
2958             *inlen = processed - instart;
2959             return(-2);
2960         }
2961         processed = in;
2962     }
2963     *outlen = out - outstart;
2964     *inlen = processed - instart;
2965     return(*outlen);
2966 }
2967
2968 /**
2969  * ISO8859xToUTF8
2970  * @out:  a pointer to an array of bytes to store the result
2971  * @outlen:  the length of @out
2972  * @in:  a pointer to an array of ISO Latin 1 chars
2973  * @inlen:  the length of @in
2974  *
2975  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2976  * block of chars out.
2977  * Returns 0 if success, or -1 otherwise
2978  * The value of @inlen after return is the number of octets consumed
2979  * The value of @outlen after return is the number of ocetes produced.
2980  */
2981 static int
2982 ISO8859xToUTF8(unsigned char* out, int *outlen,
2983               const unsigned char* in, int *inlen,
2984               unsigned short const *unicodetable) {
2985     unsigned char* outstart = out;
2986     unsigned char* outend;
2987     const unsigned char* instart = in;
2988     const unsigned char* inend;
2989     const unsigned char* instop;
2990     unsigned int c;
2991
2992     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2993         (in == NULL) || (unicodetable == NULL))
2994         return(-1);
2995     outend = out + *outlen;
2996     inend = in + *inlen;
2997     instop = inend;
2998
2999     while ((in < inend) && (out < outend - 2)) {
3000         if (*in >= 0x80) {
3001             c = unicodetable [*in - 0x80];
3002             if (c == 0) {
3003                 /* undefined code point */
3004                 *outlen = out - outstart;
3005                 *inlen = in - instart;
3006                 return (-1);
3007             }
3008             if (c < 0x800) {
3009                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3010                 *out++ = (c & 0x3F) | 0x80;
3011             } else {
3012                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3013                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3014                 *out++ = (c & 0x3F) | 0x80;
3015             }
3016             ++in;
3017         }
3018         if (instop - in > outend - out) instop = in + (outend - out);
3019         while ((*in < 0x80) && (in < instop)) {
3020             *out++ = *in++;
3021         }
3022     }
3023     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3024         *out++ =  *in++;
3025     }
3026     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3027         *out++ =  *in++;
3028     }
3029     *outlen = out - outstart;
3030     *inlen = in - instart;
3031     return (*outlen);
3032 }
3033
3034
3035 /************************************************************************
3036  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3037  ************************************************************************/
3038
3039 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3040     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3041     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3042     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3043     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3044     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3045     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3046     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3047     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3048     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3049     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3050     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3051     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3052     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3053     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3054     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3055     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3056 };
3057
3058 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3059     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3060     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3067     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3068     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3069     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3070     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3071     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3072     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3074     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3075     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3076     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3079     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3080     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3081     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3082     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3083     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3084     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3085     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3086 };
3087
3088 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3089     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3090     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3091     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3092     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3093     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3094     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3095     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3096     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3097     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3098     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3099     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3100     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3101     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3102     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3103     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3104     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3105 };
3106
3107 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3108     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3109     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3116     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3117     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3118     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3119     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3120     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3121     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3122     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3125     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3133     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3134     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3135     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3136     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3137     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3138     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3139 };
3140
3141 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3142     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3143     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3144     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3145     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3146     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3147     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3148     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3149     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3150     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3151     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3152     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3153     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3154     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3155     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3156     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3157     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3158 };
3159
3160 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3161     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3162     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3169     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3170     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3171     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3172     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3173     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3174     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3175     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3176     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3177     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3178     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3179     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3180     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3181     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3182     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3185     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3186     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3187     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3188 };
3189
3190 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3191     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3192     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3193     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3194     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3195     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3196     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3197     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3198     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3199     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3200     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3201     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3202     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3203     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3204     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3205     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3206     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3207 };
3208
3209 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3210     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3218     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3219     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3220     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3222     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3223     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3224     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3225     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3226     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3227     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237 };
3238
3239 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3240     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3241     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3242     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3243     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3244     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3245     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3246     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3247     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3248     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3249     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3250     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3251     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3253     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3254     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3255     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3256 };
3257
3258 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3259     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3261     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3267     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3268     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3269     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3275     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3276     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3277     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3278     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3279     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282 };
3283
3284 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3285     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3286     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3287     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3288     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3289     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3290     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3291     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3292     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3293     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3294     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3295     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3296     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3297     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3298     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3299     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3300     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3301 };
3302
3303 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3304     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3305     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3312     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3313     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3314     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3315     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3321     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3328     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3329     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3330     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3331     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3332     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335 };
3336
3337 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3338     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3339     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3340     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3341     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3342     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3343     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3344     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3345     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3346     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3347     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3348     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3349     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3350     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3351     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3352     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3353     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3354 };
3355
3356 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3357     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3359     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3365     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3366     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3367     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3368     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3374     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3376     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3381     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3382     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3386     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3387     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388 };
3389
3390 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3391     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3392     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3393     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3394     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3395     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3396     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3397     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3398     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3399     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3400     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3401     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3402     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3403     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3404     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3405     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3406     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3407 };
3408
3409 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3410     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3418     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3419     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3420     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3421     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3422     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3423     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3424     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3425     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3427     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3431     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 };
3434
3435 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3436     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3437     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3438     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3439     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3440     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3441     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3442     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3443     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3444     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3445     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3446     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3447     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3448     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3449     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3450     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3451     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3452 };
3453
3454 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3455     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3460     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3463     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3464     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3465     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3466     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3467     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3468     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3469     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3470     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3471     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3473     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3474     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3483     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3484     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3485     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3486 };
3487
3488 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3489     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3490     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3491     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3492     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3493     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3494     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3495     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3496     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3497     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3498     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3499     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3500     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3501     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3502     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3503     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3504     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3505 };
3506
3507 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3508     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3516     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3517     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3523     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3524     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3525     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3526     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3527     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3532     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3533     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535 };
3536
3537 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3538     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3539     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3540     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3541     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3542     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3543     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3544     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3545     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3546     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3547     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3548     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3549     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3550     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3551     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3552     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3553     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3554 };
3555
3556 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3557     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3565     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3566     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3567     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3568     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3574     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3577     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3578     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3579     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3580     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3581     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3582     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3583     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3584     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3585     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3586     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3587     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3588 };
3589
3590 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3591     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3592     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3593     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3594     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3595     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3596     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3597     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3598     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3599     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3600     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3601     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3602     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3603     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3604     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3605     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3606     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3607 };
3608
3609 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3610     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3618     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3619     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3620     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3625     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3626     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3627     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3630     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3645     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3647     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3648     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3650     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3651     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3652     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3653 };
3654
3655 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3656     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3657     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3658     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3659     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3660     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3661     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3662     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3663     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3664     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3665     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3666     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3667     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3668     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3669     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3670     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3671     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3672 };
3673
3674 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3675     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3683     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3684     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3685     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3686     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3693     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3698     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3699     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3700     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3701     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3702 };
3703
3704 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3705     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3706     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3707     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3708     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3709     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3710     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3711     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3712     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3713     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3714     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3715     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3716     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3717     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3718     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3719     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3720     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3721 };
3722
3723 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3724     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3725     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3731     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3732     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3733     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3734     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3735     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3736     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3737     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3741     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3743     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3750     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3753     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3757     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3760     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3761     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3762     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3763 };
3764
3765
3766 /*
3767  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3768  */
3769
3770 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3771     const unsigned char* in, int *inlen) {
3772     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3773 }
3774 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3775     const unsigned char* in, int *inlen) {
3776     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3777 }
3778
3779 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3780     const unsigned char* in, int *inlen) {
3781     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3782 }
3783 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3784     const unsigned char* in, int *inlen) {
3785     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3786 }
3787
3788 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3789     const unsigned char* in, int *inlen) {
3790     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3791 }
3792 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3793     const unsigned char* in, int *inlen) {
3794     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3795 }
3796
3797 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3798     const unsigned char* in, int *inlen) {
3799     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3800 }
3801 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3802     const unsigned char* in, int *inlen) {
3803     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3804 }
3805
3806 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3807     const unsigned char* in, int *inlen) {
3808     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3809 }
3810 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3811     const unsigned char* in, int *inlen) {
3812     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3813 }
3814
3815 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3816     const unsigned char* in, int *inlen) {
3817     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3818 }
3819 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3820     const unsigned char* in, int *inlen) {
3821     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3822 }
3823
3824 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3825     const unsigned char* in, int *inlen) {
3826     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3827 }
3828 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3829     const unsigned char* in, int *inlen) {
3830     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3831 }
3832
3833 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3834     const unsigned char* in, int *inlen) {
3835     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3836 }
3837 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3838     const unsigned char* in, int *inlen) {
3839     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3840 }
3841
3842 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3843     const unsigned char* in, int *inlen) {
3844     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3845 }
3846 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3847     const unsigned char* in, int *inlen) {
3848     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3849 }
3850
3851 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3852     const unsigned char* in, int *inlen) {
3853     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3854 }
3855 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3856     const unsigned char* in, int *inlen) {
3857     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3858 }
3859
3860 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3861     const unsigned char* in, int *inlen) {
3862     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3863 }
3864 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3865     const unsigned char* in, int *inlen) {
3866     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3867 }
3868
3869 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3870     const unsigned char* in, int *inlen) {
3871     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3872 }
3873 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3874     const unsigned char* in, int *inlen) {
3875     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3876 }
3877
3878 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3879     const unsigned char* in, int *inlen) {
3880     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3881 }
3882 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3883     const unsigned char* in, int *inlen) {
3884     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3885 }
3886
3887 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3888     const unsigned char* in, int *inlen) {
3889     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3890 }
3891 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3892     const unsigned char* in, int *inlen) {
3893     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3894 }
3895
3896 static void
3897 xmlRegisterCharEncodingHandlersISO8859x (void) {
3898     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3899     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3900     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3901     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3902     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3903     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3904     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3905     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3906     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3907     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3908     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3909     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3910     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3911     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3912 }
3913
3914 #endif
3915 #endif
3916
3917 #define bottom_encoding
3918 #include "elfgcchack.h"