2 * encoding.c : implements the encoding conversion functions needed for XML
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * See Copyright for the status of this software.
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
35 #ifdef LIBXML_ICONV_ENABLED
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler
= NULL
;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler
= NULL
;
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias
;
55 typedef xmlCharEncodingAlias
*xmlCharEncodingAliasPtr
;
56 struct _xmlCharEncodingAlias
{
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases
= NULL
;
62 static int xmlCharEncodingAliasesNb
= 0;
63 static int xmlCharEncodingAliasesMax
= 0;
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
67 #define DEBUG_ENCODING /* Define this to get encoding traces */
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
75 static int xmlLittleEndian
= 1;
78 * xmlEncodingErrMemory:
79 * @extra: extra informations
81 * Handle an out of memory condition
84 xmlEncodingErrMemory(const char *extra
)
86 __xmlSimpleError(XML_FROM_I18N
, XML_ERR_NO_MEMORY
, NULL
, NULL
, extra
);
91 * @error: the error number
92 * @msg: the error message
96 static void LIBXML_ATTR_FORMAT(2,0)
97 xmlEncodingErr(xmlParserErrors error
, const char *msg
, const char *val
)
99 __xmlRaiseError(NULL
, NULL
, NULL
, NULL
, NULL
,
100 XML_FROM_I18N
, error
, XML_ERR_FATAL
,
101 NULL
, 0, val
, NULL
, NULL
, 0, 0, msg
, val
);
104 #ifdef LIBXML_ICU_ENABLED
106 openIcuConverter(const char* name
, int toUnicode
)
108 UErrorCode status
= U_ZERO_ERROR
;
109 uconv_t
*conv
= (uconv_t
*) xmlMalloc(sizeof(uconv_t
));
113 conv
->pivot_source
= conv
->pivot_buf
;
114 conv
->pivot_target
= conv
->pivot_buf
;
116 conv
->uconv
= ucnv_open(name
, &status
);
117 if (U_FAILURE(status
))
120 status
= U_ZERO_ERROR
;
122 ucnv_setToUCallBack(conv
->uconv
, UCNV_TO_U_CALLBACK_STOP
,
123 NULL
, NULL
, NULL
, &status
);
126 ucnv_setFromUCallBack(conv
->uconv
, UCNV_FROM_U_CALLBACK_STOP
,
127 NULL
, NULL
, NULL
, &status
);
129 if (U_FAILURE(status
))
132 status
= U_ZERO_ERROR
;
133 conv
->utf8
= ucnv_open("UTF-8", &status
);
134 if (U_SUCCESS(status
))
139 ucnv_close(conv
->uconv
);
145 closeIcuConverter(uconv_t
*conv
)
148 ucnv_close(conv
->uconv
);
149 ucnv_close(conv
->utf8
);
153 #endif /* LIBXML_ICU_ENABLED */
155 /************************************************************************
157 * Conversions To/From UTF8 encoding *
159 ************************************************************************/
163 * @out: a pointer to an array of bytes to store the result
164 * @outlen: the length of @out
165 * @in: a pointer to an array of ASCII chars
166 * @inlen: the length of @in
168 * Take a block of ASCII chars in and try to convert it to an UTF-8
169 * block of chars out.
170 * Returns 0 if success, or -1 otherwise
171 * The value of @inlen after return is the number of octets consumed
172 * if the return value is positive, else unpredictable.
173 * The value of @outlen after return is the number of octets consumed.
176 asciiToUTF8(unsigned char* out
, int *outlen
,
177 const unsigned char* in
, int *inlen
) {
178 unsigned char* outstart
= out
;
179 const unsigned char* base
= in
;
180 const unsigned char* processed
= in
;
181 unsigned char* outend
= out
+ *outlen
;
182 const unsigned char* inend
;
185 inend
= in
+ (*inlen
);
186 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
194 *outlen
= out
- outstart
;
195 *inlen
= processed
- base
;
199 processed
= (const unsigned char*) in
;
201 *outlen
= out
- outstart
;
202 *inlen
= processed
- base
;
206 #ifdef LIBXML_OUTPUT_ENABLED
209 * @out: a pointer to an array of bytes to store the result
210 * @outlen: the length of @out
211 * @in: a pointer to an array of UTF-8 chars
212 * @inlen: the length of @in
214 * Take a block of UTF-8 chars in and try to convert it to an ASCII
215 * block of chars out.
217 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218 * The value of @inlen after return is the number of octets consumed
219 * if the return value is positive, else unpredictable.
220 * The value of @outlen after return is the number of octets consumed.
223 UTF8Toascii(unsigned char* out
, int *outlen
,
224 const unsigned char* in
, int *inlen
) {
225 const unsigned char* processed
= in
;
226 const unsigned char* outend
;
227 const unsigned char* outstart
= out
;
228 const unsigned char* instart
= in
;
229 const unsigned char* inend
;
233 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
236 * initialization nothing to do
242 inend
= in
+ (*inlen
);
243 outend
= out
+ (*outlen
);
246 if (d
< 0x80) { c
= d
; trailing
= 0; }
248 /* trailing byte in leading position */
249 *outlen
= out
- outstart
;
250 *inlen
= processed
- instart
;
252 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
253 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
254 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
256 /* no chance for this in Ascii */
257 *outlen
= out
- outstart
;
258 *inlen
= processed
- instart
;
262 if (inend
- in
< trailing
) {
266 for ( ; trailing
; trailing
--) {
267 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80))
273 /* assertion: c is a single UTF-4 value */
279 /* no chance for this in Ascii */
280 *outlen
= out
- outstart
;
281 *inlen
= processed
- instart
;
286 *outlen
= out
- outstart
;
287 *inlen
= processed
- instart
;
290 #endif /* LIBXML_OUTPUT_ENABLED */
294 * @out: a pointer to an array of bytes to store the result
295 * @outlen: the length of @out
296 * @in: a pointer to an array of ISO Latin 1 chars
297 * @inlen: the length of @in
299 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300 * block of chars out.
301 * Returns the number of bytes written if success, or -1 otherwise
302 * The value of @inlen after return is the number of octets consumed
303 * if the return value is positive, else unpredictable.
304 * The value of @outlen after return is the number of octets consumed.
307 isolat1ToUTF8(unsigned char* out
, int *outlen
,
308 const unsigned char* in
, int *inlen
) {
309 unsigned char* outstart
= out
;
310 const unsigned char* base
= in
;
311 unsigned char* outend
;
312 const unsigned char* inend
;
313 const unsigned char* instop
;
315 if ((out
== NULL
) || (in
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
))
318 outend
= out
+ *outlen
;
319 inend
= in
+ (*inlen
);
322 while ((in
< inend
) && (out
< outend
- 1)) {
324 *out
++ = (((*in
) >> 6) & 0x1F) | 0xC0;
325 *out
++ = ((*in
) & 0x3F) | 0x80;
328 if ((instop
- in
) > (outend
- out
)) instop
= in
+ (outend
- out
);
329 while ((in
< instop
) && (*in
< 0x80)) {
333 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
336 *outlen
= out
- outstart
;
343 * @out: a pointer to an array of bytes to store the result
344 * @outlen: the length of @out
345 * @inb: a pointer to an array of UTF-8 chars
346 * @inlenb: the length of @in in UTF-8 chars
348 * No op copy operation for UTF8 handling.
350 * Returns the number of bytes written, or -1 if lack of space.
351 * The value of *inlen after return is the number of octets consumed
352 * if the return value is positive, else unpredictable.
355 UTF8ToUTF8(unsigned char* out
, int *outlen
,
356 const unsigned char* inb
, int *inlenb
)
360 if ((out
== NULL
) || (outlen
== NULL
) || (inlenb
== NULL
))
363 /* inb == NULL means output is initialized. */
368 if (*outlen
> *inlenb
) {
376 memcpy(out
, inb
, len
);
384 #ifdef LIBXML_OUTPUT_ENABLED
387 * @out: a pointer to an array of bytes to store the result
388 * @outlen: the length of @out
389 * @in: a pointer to an array of UTF-8 chars
390 * @inlen: the length of @in
392 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
393 * block of chars out.
395 * Returns the number of bytes written if success, -2 if the transcoding fails,
397 * The value of @inlen after return is the number of octets consumed
398 * if the return value is positive, else unpredictable.
399 * The value of @outlen after return is the number of octets consumed.
402 UTF8Toisolat1(unsigned char* out
, int *outlen
,
403 const unsigned char* in
, int *inlen
) {
404 const unsigned char* processed
= in
;
405 const unsigned char* outend
;
406 const unsigned char* outstart
= out
;
407 const unsigned char* instart
= in
;
408 const unsigned char* inend
;
412 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
415 * initialization nothing to do
421 inend
= in
+ (*inlen
);
422 outend
= out
+ (*outlen
);
425 if (d
< 0x80) { c
= d
; trailing
= 0; }
427 /* trailing byte in leading position */
428 *outlen
= out
- outstart
;
429 *inlen
= processed
- instart
;
431 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
432 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
433 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
435 /* no chance for this in IsoLat1 */
436 *outlen
= out
- outstart
;
437 *inlen
= processed
- instart
;
441 if (inend
- in
< trailing
) {
445 for ( ; trailing
; trailing
--) {
448 if (((d
= *in
++) & 0xC0) != 0x80) {
449 *outlen
= out
- outstart
;
450 *inlen
= processed
- instart
;
457 /* assertion: c is a single UTF-4 value */
463 /* no chance for this in IsoLat1 */
464 *outlen
= out
- outstart
;
465 *inlen
= processed
- instart
;
470 *outlen
= out
- outstart
;
471 *inlen
= processed
- instart
;
474 #endif /* LIBXML_OUTPUT_ENABLED */
478 * @out: a pointer to an array of bytes to store the result
479 * @outlen: the length of @out
480 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
481 * @inlenb: the length of @in in UTF-16LE chars
483 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
484 * block of chars out. This function assumes the endian property
485 * is the same between the native type of this machine and the
488 * Returns the number of bytes written, or -1 if lack of space, or -2
489 * if the transcoding fails (if *in is not a valid utf16 string)
490 * The value of *inlen after return is the number of octets consumed
491 * if the return value is positive, else unpredictable.
494 UTF16LEToUTF8(unsigned char* out
, int *outlen
,
495 const unsigned char* inb
, int *inlenb
)
497 unsigned char* outstart
= out
;
498 const unsigned char* processed
= inb
;
499 unsigned char* outend
= out
+ *outlen
;
500 unsigned short* in
= (unsigned short*) inb
;
501 unsigned short* inend
;
502 unsigned int c
, d
, inlen
;
506 if ((*inlenb
% 2) == 1)
510 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
511 if (xmlLittleEndian
) {
514 tmp
= (unsigned char *) in
;
516 c
= c
| (((unsigned int)*tmp
) << 8);
519 if ((c
& 0xFC00) == 0xD800) { /* surrogates */
520 if (in
>= inend
) { /* (in > inend) shouldn't happens */
523 if (xmlLittleEndian
) {
526 tmp
= (unsigned char *) in
;
528 d
= d
| (((unsigned int)*tmp
) << 8);
531 if ((d
& 0xFC00) == 0xDC00) {
538 *outlen
= out
- outstart
;
539 *inlenb
= processed
- inb
;
544 /* assertion: c is a single UTF-4 value */
547 if (c
< 0x80) { *out
++= c
; bits
= -6; }
548 else if (c
< 0x800) { *out
++= ((c
>> 6) & 0x1F) | 0xC0; bits
= 0; }
549 else if (c
< 0x10000) { *out
++= ((c
>> 12) & 0x0F) | 0xE0; bits
= 6; }
550 else { *out
++= ((c
>> 18) & 0x07) | 0xF0; bits
= 12; }
552 for ( ; bits
>= 0; bits
-= 6) {
555 *out
++= ((c
>> bits
) & 0x3F) | 0x80;
557 processed
= (const unsigned char*) in
;
559 *outlen
= out
- outstart
;
560 *inlenb
= processed
- inb
;
564 #ifdef LIBXML_OUTPUT_ENABLED
567 * @outb: a pointer to an array of bytes to store the result
568 * @outlen: the length of @outb
569 * @in: a pointer to an array of UTF-8 chars
570 * @inlen: the length of @in
572 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
573 * block of chars out.
575 * Returns the number of bytes written, or -1 if lack of space, or -2
576 * if the transcoding failed.
579 UTF8ToUTF16LE(unsigned char* outb
, int *outlen
,
580 const unsigned char* in
, int *inlen
)
582 unsigned short* out
= (unsigned short*) outb
;
583 const unsigned char* processed
= in
;
584 const unsigned char *const instart
= in
;
585 unsigned short* outstart
= out
;
586 unsigned short* outend
;
587 const unsigned char* inend
;
591 unsigned short tmp1
, tmp2
;
593 /* UTF16LE encoding has no BOM */
594 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
601 outend
= out
+ (*outlen
/ 2);
604 if (d
< 0x80) { c
= d
; trailing
= 0; }
606 /* trailing byte in leading position */
607 *outlen
= (out
- outstart
) * 2;
608 *inlen
= processed
- instart
;
610 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
611 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
612 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
614 /* no chance for this in UTF-16 */
615 *outlen
= (out
- outstart
) * 2;
616 *inlen
= processed
- instart
;
620 if (inend
- in
< trailing
) {
624 for ( ; trailing
; trailing
--) {
625 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80))
631 /* assertion: c is a single UTF-4 value */
635 if (xmlLittleEndian
) {
638 tmp
= (unsigned char *) out
;
640 *(tmp
+ 1) = c
>> 8 ;
644 else if (c
< 0x110000) {
648 if (xmlLittleEndian
) {
649 *out
++ = 0xD800 | (c
>> 10);
650 *out
++ = 0xDC00 | (c
& 0x03FF);
652 tmp1
= 0xD800 | (c
>> 10);
653 tmp
= (unsigned char *) out
;
654 *tmp
= (unsigned char) tmp1
;
655 *(tmp
+ 1) = tmp1
>> 8;
658 tmp2
= 0xDC00 | (c
& 0x03FF);
659 tmp
= (unsigned char *) out
;
660 *tmp
= (unsigned char) tmp2
;
661 *(tmp
+ 1) = tmp2
>> 8;
669 *outlen
= (out
- outstart
) * 2;
670 *inlen
= processed
- instart
;
676 * @outb: a pointer to an array of bytes to store the result
677 * @outlen: the length of @outb
678 * @in: a pointer to an array of UTF-8 chars
679 * @inlen: the length of @in
681 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
682 * block of chars out.
684 * Returns the number of bytes written, or -1 if lack of space, or -2
685 * if the transcoding failed.
688 UTF8ToUTF16(unsigned char* outb
, int *outlen
,
689 const unsigned char* in
, int *inlen
)
693 * initialization, add the Byte Order Mark for UTF-16LE
700 #ifdef DEBUG_ENCODING
701 xmlGenericError(xmlGenericErrorContext
,
702 "Added FFFE Byte Order Mark\n");
710 return (UTF8ToUTF16LE(outb
, outlen
, in
, inlen
));
712 #endif /* LIBXML_OUTPUT_ENABLED */
716 * @out: a pointer to an array of bytes to store the result
717 * @outlen: the length of @out
718 * @inb: a pointer to an array of UTF-16 passed as a byte array
719 * @inlenb: the length of @in in UTF-16 chars
721 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
722 * block of chars out. This function assumes the endian property
723 * is the same between the native type of this machine and the
726 * Returns the number of bytes written, or -1 if lack of space, or -2
727 * if the transcoding fails (if *in is not a valid utf16 string)
728 * The value of *inlen after return is the number of octets consumed
729 * if the return value is positive, else unpredictable.
732 UTF16BEToUTF8(unsigned char* out
, int *outlen
,
733 const unsigned char* inb
, int *inlenb
)
735 unsigned char* outstart
= out
;
736 const unsigned char* processed
= inb
;
737 unsigned char* outend
= out
+ *outlen
;
738 unsigned short* in
= (unsigned short*) inb
;
739 unsigned short* inend
;
740 unsigned int c
, d
, inlen
;
744 if ((*inlenb
% 2) == 1)
749 if (xmlLittleEndian
) {
750 tmp
= (unsigned char *) in
;
753 c
= c
| (unsigned int) *tmp
;
758 if ((c
& 0xFC00) == 0xD800) { /* surrogates */
759 if (in
>= inend
) { /* (in > inend) shouldn't happens */
760 *outlen
= out
- outstart
;
761 *inlenb
= processed
- inb
;
764 if (xmlLittleEndian
) {
765 tmp
= (unsigned char *) in
;
768 d
= d
| (unsigned int) *tmp
;
773 if ((d
& 0xFC00) == 0xDC00) {
780 *outlen
= out
- outstart
;
781 *inlenb
= processed
- inb
;
786 /* assertion: c is a single UTF-4 value */
789 if (c
< 0x80) { *out
++= c
; bits
= -6; }
790 else if (c
< 0x800) { *out
++= ((c
>> 6) & 0x1F) | 0xC0; bits
= 0; }
791 else if (c
< 0x10000) { *out
++= ((c
>> 12) & 0x0F) | 0xE0; bits
= 6; }
792 else { *out
++= ((c
>> 18) & 0x07) | 0xF0; bits
= 12; }
794 for ( ; bits
>= 0; bits
-= 6) {
797 *out
++= ((c
>> bits
) & 0x3F) | 0x80;
799 processed
= (const unsigned char*) in
;
801 *outlen
= out
- outstart
;
802 *inlenb
= processed
- inb
;
806 #ifdef LIBXML_OUTPUT_ENABLED
809 * @outb: a pointer to an array of bytes to store the result
810 * @outlen: the length of @outb
811 * @in: a pointer to an array of UTF-8 chars
812 * @inlen: the length of @in
814 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
815 * block of chars out.
817 * Returns the number of byte written, or -1 by lack of space, or -2
818 * if the transcoding failed.
821 UTF8ToUTF16BE(unsigned char* outb
, int *outlen
,
822 const unsigned char* in
, int *inlen
)
824 unsigned short* out
= (unsigned short*) outb
;
825 const unsigned char* processed
= in
;
826 const unsigned char *const instart
= in
;
827 unsigned short* outstart
= out
;
828 unsigned short* outend
;
829 const unsigned char* inend
;
833 unsigned short tmp1
, tmp2
;
835 /* UTF-16BE has no BOM */
836 if ((outb
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
843 outend
= out
+ (*outlen
/ 2);
846 if (d
< 0x80) { c
= d
; trailing
= 0; }
848 /* trailing byte in leading position */
849 *outlen
= out
- outstart
;
850 *inlen
= processed
- instart
;
852 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
853 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
854 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
856 /* no chance for this in UTF-16 */
857 *outlen
= out
- outstart
;
858 *inlen
= processed
- instart
;
862 if (inend
- in
< trailing
) {
866 for ( ; trailing
; trailing
--) {
867 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80)) break;
872 /* assertion: c is a single UTF-4 value */
874 if (out
>= outend
) break;
875 if (xmlLittleEndian
) {
876 tmp
= (unsigned char *) out
;
884 else if (c
< 0x110000) {
885 if (out
+1 >= outend
) break;
887 if (xmlLittleEndian
) {
888 tmp1
= 0xD800 | (c
>> 10);
889 tmp
= (unsigned char *) out
;
891 *(tmp
+ 1) = (unsigned char) tmp1
;
894 tmp2
= 0xDC00 | (c
& 0x03FF);
895 tmp
= (unsigned char *) out
;
897 *(tmp
+ 1) = (unsigned char) tmp2
;
900 *out
++ = 0xD800 | (c
>> 10);
901 *out
++ = 0xDC00 | (c
& 0x03FF);
908 *outlen
= (out
- outstart
) * 2;
909 *inlen
= processed
- instart
;
912 #endif /* LIBXML_OUTPUT_ENABLED */
914 /************************************************************************
916 * Generic encoding handling routines *
918 ************************************************************************/
921 * xmlDetectCharEncoding:
922 * @in: a pointer to the first bytes of the XML entity, must be at least
923 * 2 bytes long (at least 4 if encoding is UTF4 variant).
924 * @len: pointer to the length of the buffer
926 * Guess the encoding of the entity using the first bytes of the entity content
927 * according to the non-normative appendix F of the XML-1.0 recommendation.
929 * Returns one of the XML_CHAR_ENCODING_... values.
932 xmlDetectCharEncoding(const unsigned char* in
, int len
)
935 return(XML_CHAR_ENCODING_NONE
);
937 if ((in
[0] == 0x00) && (in
[1] == 0x00) &&
938 (in
[2] == 0x00) && (in
[3] == 0x3C))
939 return(XML_CHAR_ENCODING_UCS4BE
);
940 if ((in
[0] == 0x3C) && (in
[1] == 0x00) &&
941 (in
[2] == 0x00) && (in
[3] == 0x00))
942 return(XML_CHAR_ENCODING_UCS4LE
);
943 if ((in
[0] == 0x00) && (in
[1] == 0x00) &&
944 (in
[2] == 0x3C) && (in
[3] == 0x00))
945 return(XML_CHAR_ENCODING_UCS4_2143
);
946 if ((in
[0] == 0x00) && (in
[1] == 0x3C) &&
947 (in
[2] == 0x00) && (in
[3] == 0x00))
948 return(XML_CHAR_ENCODING_UCS4_3412
);
949 if ((in
[0] == 0x4C) && (in
[1] == 0x6F) &&
950 (in
[2] == 0xA7) && (in
[3] == 0x94))
951 return(XML_CHAR_ENCODING_EBCDIC
);
952 if ((in
[0] == 0x3C) && (in
[1] == 0x3F) &&
953 (in
[2] == 0x78) && (in
[3] == 0x6D))
954 return(XML_CHAR_ENCODING_UTF8
);
956 * Although not part of the recommendation, we also
957 * attempt an "auto-recognition" of UTF-16LE and
958 * UTF-16BE encodings.
960 if ((in
[0] == 0x3C) && (in
[1] == 0x00) &&
961 (in
[2] == 0x3F) && (in
[3] == 0x00))
962 return(XML_CHAR_ENCODING_UTF16LE
);
963 if ((in
[0] == 0x00) && (in
[1] == 0x3C) &&
964 (in
[2] == 0x00) && (in
[3] == 0x3F))
965 return(XML_CHAR_ENCODING_UTF16BE
);
969 * Errata on XML-1.0 June 20 2001
970 * We now allow an UTF8 encoded BOM
972 if ((in
[0] == 0xEF) && (in
[1] == 0xBB) &&
974 return(XML_CHAR_ENCODING_UTF8
);
976 /* For UTF-16 we can recognize by the BOM */
978 if ((in
[0] == 0xFE) && (in
[1] == 0xFF))
979 return(XML_CHAR_ENCODING_UTF16BE
);
980 if ((in
[0] == 0xFF) && (in
[1] == 0xFE))
981 return(XML_CHAR_ENCODING_UTF16LE
);
983 return(XML_CHAR_ENCODING_NONE
);
987 * xmlCleanupEncodingAliases:
989 * Unregisters all aliases
992 xmlCleanupEncodingAliases(void) {
995 if (xmlCharEncodingAliases
== NULL
)
998 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
999 if (xmlCharEncodingAliases
[i
].name
!= NULL
)
1000 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1001 if (xmlCharEncodingAliases
[i
].alias
!= NULL
)
1002 xmlFree((char *) xmlCharEncodingAliases
[i
].alias
);
1004 xmlCharEncodingAliasesNb
= 0;
1005 xmlCharEncodingAliasesMax
= 0;
1006 xmlFree(xmlCharEncodingAliases
);
1007 xmlCharEncodingAliases
= NULL
;
1011 * xmlGetEncodingAlias:
1012 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1014 * Lookup an encoding name for the given alias.
1016 * Returns NULL if not found, otherwise the original name
1019 xmlGetEncodingAlias(const char *alias
) {
1026 if (xmlCharEncodingAliases
== NULL
)
1029 for (i
= 0;i
< 99;i
++) {
1030 upper
[i
] = toupper(alias
[i
]);
1031 if (upper
[i
] == 0) break;
1036 * Walk down the list looking for a definition of the alias
1038 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1039 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, upper
)) {
1040 return(xmlCharEncodingAliases
[i
].name
);
1047 * xmlAddEncodingAlias:
1048 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1049 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1051 * Registers an alias @alias for an encoding named @name. Existing alias
1052 * will be overwritten.
1054 * Returns 0 in case of success, -1 in case of error
1057 xmlAddEncodingAlias(const char *name
, const char *alias
) {
1061 if ((name
== NULL
) || (alias
== NULL
))
1064 for (i
= 0;i
< 99;i
++) {
1065 upper
[i
] = toupper(alias
[i
]);
1066 if (upper
[i
] == 0) break;
1070 if (xmlCharEncodingAliases
== NULL
) {
1071 xmlCharEncodingAliasesNb
= 0;
1072 xmlCharEncodingAliasesMax
= 20;
1073 xmlCharEncodingAliases
= (xmlCharEncodingAliasPtr
)
1074 xmlMalloc(xmlCharEncodingAliasesMax
* sizeof(xmlCharEncodingAlias
));
1075 if (xmlCharEncodingAliases
== NULL
)
1077 } else if (xmlCharEncodingAliasesNb
>= xmlCharEncodingAliasesMax
) {
1078 xmlCharEncodingAliasesMax
*= 2;
1079 xmlCharEncodingAliases
= (xmlCharEncodingAliasPtr
)
1080 xmlRealloc(xmlCharEncodingAliases
,
1081 xmlCharEncodingAliasesMax
* sizeof(xmlCharEncodingAlias
));
1084 * Walk down the list looking for a definition of the alias
1086 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1087 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, upper
)) {
1089 * Replace the definition.
1091 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1092 xmlCharEncodingAliases
[i
].name
= xmlMemStrdup(name
);
1097 * Add the definition
1099 xmlCharEncodingAliases
[xmlCharEncodingAliasesNb
].name
= xmlMemStrdup(name
);
1100 xmlCharEncodingAliases
[xmlCharEncodingAliasesNb
].alias
= xmlMemStrdup(upper
);
1101 xmlCharEncodingAliasesNb
++;
1106 * xmlDelEncodingAlias:
1107 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1109 * Unregisters an encoding alias @alias
1111 * Returns 0 in case of success, -1 in case of error
1114 xmlDelEncodingAlias(const char *alias
) {
1120 if (xmlCharEncodingAliases
== NULL
)
1123 * Walk down the list looking for a definition of the alias
1125 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1126 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, alias
)) {
1127 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1128 xmlFree((char *) xmlCharEncodingAliases
[i
].alias
);
1129 xmlCharEncodingAliasesNb
--;
1130 memmove(&xmlCharEncodingAliases
[i
], &xmlCharEncodingAliases
[i
+ 1],
1131 sizeof(xmlCharEncodingAlias
) * (xmlCharEncodingAliasesNb
- i
));
1139 * xmlParseCharEncoding:
1140 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1142 * Compare the string to the encoding schemes already known. Note
1143 * that the comparison is case insensitive accordingly to the section
1144 * [XML] 4.3.3 Character Encoding in Entities.
1146 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1147 * if not recognized.
1150 xmlParseCharEncoding(const char* name
)
1157 return(XML_CHAR_ENCODING_NONE
);
1160 * Do the alias resolution
1162 alias
= xmlGetEncodingAlias(name
);
1166 for (i
= 0;i
< 499;i
++) {
1167 upper
[i
] = toupper(name
[i
]);
1168 if (upper
[i
] == 0) break;
1172 if (!strcmp(upper
, "")) return(XML_CHAR_ENCODING_NONE
);
1173 if (!strcmp(upper
, "UTF-8")) return(XML_CHAR_ENCODING_UTF8
);
1174 if (!strcmp(upper
, "UTF8")) return(XML_CHAR_ENCODING_UTF8
);
1177 * NOTE: if we were able to parse this, the endianness of UTF16 is
1178 * already found and in use
1180 if (!strcmp(upper
, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE
);
1181 if (!strcmp(upper
, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE
);
1183 if (!strcmp(upper
, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2
);
1184 if (!strcmp(upper
, "UCS-2")) return(XML_CHAR_ENCODING_UCS2
);
1185 if (!strcmp(upper
, "UCS2")) return(XML_CHAR_ENCODING_UCS2
);
1188 * NOTE: if we were able to parse this, the endianness of UCS4 is
1189 * already found and in use
1191 if (!strcmp(upper
, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE
);
1192 if (!strcmp(upper
, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE
);
1193 if (!strcmp(upper
, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE
);
1196 if (!strcmp(upper
, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1
);
1197 if (!strcmp(upper
, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1
);
1198 if (!strcmp(upper
, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1
);
1200 if (!strcmp(upper
, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2
);
1201 if (!strcmp(upper
, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2
);
1202 if (!strcmp(upper
, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2
);
1204 if (!strcmp(upper
, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3
);
1205 if (!strcmp(upper
, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4
);
1206 if (!strcmp(upper
, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5
);
1207 if (!strcmp(upper
, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6
);
1208 if (!strcmp(upper
, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7
);
1209 if (!strcmp(upper
, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8
);
1210 if (!strcmp(upper
, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9
);
1212 if (!strcmp(upper
, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP
);
1213 if (!strcmp(upper
, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS
);
1214 if (!strcmp(upper
, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP
);
1216 #ifdef DEBUG_ENCODING
1217 xmlGenericError(xmlGenericErrorContext
, "Unknown encoding %s\n", name
);
1219 return(XML_CHAR_ENCODING_ERROR
);
1223 * xmlGetCharEncodingName:
1224 * @enc: the encoding
1226 * The "canonical" name for XML encoding.
1227 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1228 * Section 4.3.3 Character Encoding in Entities
1230 * Returns the canonical name for the given encoding
1234 xmlGetCharEncodingName(xmlCharEncoding enc
) {
1236 case XML_CHAR_ENCODING_ERROR
:
1238 case XML_CHAR_ENCODING_NONE
:
1240 case XML_CHAR_ENCODING_UTF8
:
1242 case XML_CHAR_ENCODING_UTF16LE
:
1244 case XML_CHAR_ENCODING_UTF16BE
:
1246 case XML_CHAR_ENCODING_EBCDIC
:
1248 case XML_CHAR_ENCODING_UCS4LE
:
1249 return("ISO-10646-UCS-4");
1250 case XML_CHAR_ENCODING_UCS4BE
:
1251 return("ISO-10646-UCS-4");
1252 case XML_CHAR_ENCODING_UCS4_2143
:
1253 return("ISO-10646-UCS-4");
1254 case XML_CHAR_ENCODING_UCS4_3412
:
1255 return("ISO-10646-UCS-4");
1256 case XML_CHAR_ENCODING_UCS2
:
1257 return("ISO-10646-UCS-2");
1258 case XML_CHAR_ENCODING_8859_1
:
1259 return("ISO-8859-1");
1260 case XML_CHAR_ENCODING_8859_2
:
1261 return("ISO-8859-2");
1262 case XML_CHAR_ENCODING_8859_3
:
1263 return("ISO-8859-3");
1264 case XML_CHAR_ENCODING_8859_4
:
1265 return("ISO-8859-4");
1266 case XML_CHAR_ENCODING_8859_5
:
1267 return("ISO-8859-5");
1268 case XML_CHAR_ENCODING_8859_6
:
1269 return("ISO-8859-6");
1270 case XML_CHAR_ENCODING_8859_7
:
1271 return("ISO-8859-7");
1272 case XML_CHAR_ENCODING_8859_8
:
1273 return("ISO-8859-8");
1274 case XML_CHAR_ENCODING_8859_9
:
1275 return("ISO-8859-9");
1276 case XML_CHAR_ENCODING_2022_JP
:
1277 return("ISO-2022-JP");
1278 case XML_CHAR_ENCODING_SHIFT_JIS
:
1279 return("Shift-JIS");
1280 case XML_CHAR_ENCODING_EUC_JP
:
1282 case XML_CHAR_ENCODING_ASCII
:
1288 /************************************************************************
1290 * Char encoding handlers *
1292 ************************************************************************/
1295 /* the size should be growable, but it's not a big deal ... */
1296 #define MAX_ENCODING_HANDLERS 50
1297 static xmlCharEncodingHandlerPtr
*handlers
= NULL
;
1298 static int nbCharEncodingHandler
= 0;
1301 * The default is UTF-8 for XML, that's also the default used for the
1302 * parser internals, so the default encoding handler is NULL
1305 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler
= NULL
;
1308 * xmlNewCharEncodingHandler:
1309 * @name: the encoding name, in UTF-8 format (ASCII actually)
1310 * @input: the xmlCharEncodingInputFunc to read that encoding
1311 * @output: the xmlCharEncodingOutputFunc to write that encoding
1313 * Create and registers an xmlCharEncodingHandler.
1315 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1317 xmlCharEncodingHandlerPtr
1318 xmlNewCharEncodingHandler(const char *name
,
1319 xmlCharEncodingInputFunc input
,
1320 xmlCharEncodingOutputFunc output
) {
1321 xmlCharEncodingHandlerPtr handler
;
1328 * Do the alias resolution
1330 alias
= xmlGetEncodingAlias(name
);
1335 * Keep only the uppercase version of the encoding.
1338 xmlEncodingErr(XML_I18N_NO_NAME
,
1339 "xmlNewCharEncodingHandler : no name !\n", NULL
);
1342 for (i
= 0;i
< 499;i
++) {
1343 upper
[i
] = toupper(name
[i
]);
1344 if (upper
[i
] == 0) break;
1347 up
= xmlMemStrdup(upper
);
1349 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1354 * allocate and fill-up an handler block.
1356 handler
= (xmlCharEncodingHandlerPtr
)
1357 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1358 if (handler
== NULL
) {
1360 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1363 memset(handler
, 0, sizeof(xmlCharEncodingHandler
));
1364 handler
->input
= input
;
1365 handler
->output
= output
;
1368 #ifdef LIBXML_ICONV_ENABLED
1369 handler
->iconv_in
= NULL
;
1370 handler
->iconv_out
= NULL
;
1372 #ifdef LIBXML_ICU_ENABLED
1373 handler
->uconv_in
= NULL
;
1374 handler
->uconv_out
= NULL
;
1378 * registers and returns the handler.
1380 xmlRegisterCharEncodingHandler(handler
);
1381 #ifdef DEBUG_ENCODING
1382 xmlGenericError(xmlGenericErrorContext
,
1383 "Registered encoding handler for %s\n", name
);
1389 * xmlInitCharEncodingHandlers:
1391 * Initialize the char encoding support, it registers the default
1392 * encoding supported.
1393 * NOTE: while public, this function usually doesn't need to be called
1394 * in normal processing.
1397 xmlInitCharEncodingHandlers(void) {
1398 unsigned short int tst
= 0x1234;
1399 unsigned char *ptr
= (unsigned char *) &tst
;
1401 if (handlers
!= NULL
) return;
1403 handlers
= (xmlCharEncodingHandlerPtr
*)
1404 xmlMalloc(MAX_ENCODING_HANDLERS
* sizeof(xmlCharEncodingHandlerPtr
));
1406 if (*ptr
== 0x12) xmlLittleEndian
= 0;
1407 else if (*ptr
== 0x34) xmlLittleEndian
= 1;
1409 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1410 "Odd problem at endianness detection\n", NULL
);
1413 if (handlers
== NULL
) {
1414 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1417 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8
, UTF8ToUTF8
);
1418 #ifdef LIBXML_OUTPUT_ENABLED
1420 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8
, UTF8ToUTF16LE
);
1422 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8
, UTF8ToUTF16BE
);
1423 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8
, UTF8ToUTF16
);
1424 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8
, UTF8Toisolat1
);
1425 xmlNewCharEncodingHandler("ASCII", asciiToUTF8
, UTF8Toascii
);
1426 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8
, UTF8Toascii
);
1427 #ifdef LIBXML_HTML_ENABLED
1428 xmlNewCharEncodingHandler("HTML", NULL
, UTF8ToHtml
);
1432 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8
, NULL
);
1434 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8
, NULL
);
1435 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8
, NULL
);
1436 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8
, NULL
);
1437 xmlNewCharEncodingHandler("ASCII", asciiToUTF8
, NULL
);
1438 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8
, NULL
);
1439 #endif /* LIBXML_OUTPUT_ENABLED */
1440 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1441 #ifdef LIBXML_ISO8859X_ENABLED
1442 xmlRegisterCharEncodingHandlersISO8859x ();
1449 * xmlCleanupCharEncodingHandlers:
1451 * Cleanup the memory allocated for the char encoding support, it
1452 * unregisters all the encoding handlers and the aliases.
1455 xmlCleanupCharEncodingHandlers(void) {
1456 xmlCleanupEncodingAliases();
1458 if (handlers
== NULL
) return;
1460 for (;nbCharEncodingHandler
> 0;) {
1461 nbCharEncodingHandler
--;
1462 if (handlers
[nbCharEncodingHandler
] != NULL
) {
1463 if (handlers
[nbCharEncodingHandler
]->name
!= NULL
)
1464 xmlFree(handlers
[nbCharEncodingHandler
]->name
);
1465 xmlFree(handlers
[nbCharEncodingHandler
]);
1470 nbCharEncodingHandler
= 0;
1471 xmlDefaultCharEncodingHandler
= NULL
;
1475 * xmlRegisterCharEncodingHandler:
1476 * @handler: the xmlCharEncodingHandlerPtr handler block
1478 * Register the char encoding handler, surprising, isn't it ?
1481 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler
) {
1482 if (handlers
== NULL
) xmlInitCharEncodingHandlers();
1483 if ((handler
== NULL
) || (handlers
== NULL
)) {
1484 xmlEncodingErr(XML_I18N_NO_HANDLER
,
1485 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL
);
1489 if (nbCharEncodingHandler
>= MAX_ENCODING_HANDLERS
) {
1490 xmlEncodingErr(XML_I18N_EXCESS_HANDLER
,
1491 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1492 "MAX_ENCODING_HANDLERS");
1495 handlers
[nbCharEncodingHandler
++] = handler
;
1499 * xmlGetCharEncodingHandler:
1500 * @enc: an xmlCharEncoding value.
1502 * Search in the registered set the handler able to read/write that encoding.
1504 * Returns the handler or NULL if not found
1506 xmlCharEncodingHandlerPtr
1507 xmlGetCharEncodingHandler(xmlCharEncoding enc
) {
1508 xmlCharEncodingHandlerPtr handler
;
1510 if (handlers
== NULL
) xmlInitCharEncodingHandlers();
1512 case XML_CHAR_ENCODING_ERROR
:
1514 case XML_CHAR_ENCODING_NONE
:
1516 case XML_CHAR_ENCODING_UTF8
:
1518 case XML_CHAR_ENCODING_UTF16LE
:
1519 return(xmlUTF16LEHandler
);
1520 case XML_CHAR_ENCODING_UTF16BE
:
1521 return(xmlUTF16BEHandler
);
1522 case XML_CHAR_ENCODING_EBCDIC
:
1523 handler
= xmlFindCharEncodingHandler("EBCDIC");
1524 if (handler
!= NULL
) return(handler
);
1525 handler
= xmlFindCharEncodingHandler("ebcdic");
1526 if (handler
!= NULL
) return(handler
);
1527 handler
= xmlFindCharEncodingHandler("EBCDIC-US");
1528 if (handler
!= NULL
) return(handler
);
1529 handler
= xmlFindCharEncodingHandler("IBM-037");
1530 if (handler
!= NULL
) return(handler
);
1532 case XML_CHAR_ENCODING_UCS4BE
:
1533 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1534 if (handler
!= NULL
) return(handler
);
1535 handler
= xmlFindCharEncodingHandler("UCS-4");
1536 if (handler
!= NULL
) return(handler
);
1537 handler
= xmlFindCharEncodingHandler("UCS4");
1538 if (handler
!= NULL
) return(handler
);
1540 case XML_CHAR_ENCODING_UCS4LE
:
1541 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1542 if (handler
!= NULL
) return(handler
);
1543 handler
= xmlFindCharEncodingHandler("UCS-4");
1544 if (handler
!= NULL
) return(handler
);
1545 handler
= xmlFindCharEncodingHandler("UCS4");
1546 if (handler
!= NULL
) return(handler
);
1548 case XML_CHAR_ENCODING_UCS4_2143
:
1550 case XML_CHAR_ENCODING_UCS4_3412
:
1552 case XML_CHAR_ENCODING_UCS2
:
1553 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1554 if (handler
!= NULL
) return(handler
);
1555 handler
= xmlFindCharEncodingHandler("UCS-2");
1556 if (handler
!= NULL
) return(handler
);
1557 handler
= xmlFindCharEncodingHandler("UCS2");
1558 if (handler
!= NULL
) return(handler
);
1562 * We used to keep ISO Latin encodings native in the
1563 * generated data. This led to so many problems that
1564 * this has been removed. One can still change this
1565 * back by registering no-ops encoders for those
1567 case XML_CHAR_ENCODING_8859_1
:
1568 handler
= xmlFindCharEncodingHandler("ISO-8859-1");
1569 if (handler
!= NULL
) return(handler
);
1571 case XML_CHAR_ENCODING_8859_2
:
1572 handler
= xmlFindCharEncodingHandler("ISO-8859-2");
1573 if (handler
!= NULL
) return(handler
);
1575 case XML_CHAR_ENCODING_8859_3
:
1576 handler
= xmlFindCharEncodingHandler("ISO-8859-3");
1577 if (handler
!= NULL
) return(handler
);
1579 case XML_CHAR_ENCODING_8859_4
:
1580 handler
= xmlFindCharEncodingHandler("ISO-8859-4");
1581 if (handler
!= NULL
) return(handler
);
1583 case XML_CHAR_ENCODING_8859_5
:
1584 handler
= xmlFindCharEncodingHandler("ISO-8859-5");
1585 if (handler
!= NULL
) return(handler
);
1587 case XML_CHAR_ENCODING_8859_6
:
1588 handler
= xmlFindCharEncodingHandler("ISO-8859-6");
1589 if (handler
!= NULL
) return(handler
);
1591 case XML_CHAR_ENCODING_8859_7
:
1592 handler
= xmlFindCharEncodingHandler("ISO-8859-7");
1593 if (handler
!= NULL
) return(handler
);
1595 case XML_CHAR_ENCODING_8859_8
:
1596 handler
= xmlFindCharEncodingHandler("ISO-8859-8");
1597 if (handler
!= NULL
) return(handler
);
1599 case XML_CHAR_ENCODING_8859_9
:
1600 handler
= xmlFindCharEncodingHandler("ISO-8859-9");
1601 if (handler
!= NULL
) return(handler
);
1605 case XML_CHAR_ENCODING_2022_JP
:
1606 handler
= xmlFindCharEncodingHandler("ISO-2022-JP");
1607 if (handler
!= NULL
) return(handler
);
1609 case XML_CHAR_ENCODING_SHIFT_JIS
:
1610 handler
= xmlFindCharEncodingHandler("SHIFT-JIS");
1611 if (handler
!= NULL
) return(handler
);
1612 handler
= xmlFindCharEncodingHandler("SHIFT_JIS");
1613 if (handler
!= NULL
) return(handler
);
1614 handler
= xmlFindCharEncodingHandler("Shift_JIS");
1615 if (handler
!= NULL
) return(handler
);
1617 case XML_CHAR_ENCODING_EUC_JP
:
1618 handler
= xmlFindCharEncodingHandler("EUC-JP");
1619 if (handler
!= NULL
) return(handler
);
1625 #ifdef DEBUG_ENCODING
1626 xmlGenericError(xmlGenericErrorContext
,
1627 "No handler found for encoding %d\n", enc
);
1633 * xmlFindCharEncodingHandler:
1634 * @name: a string describing the char encoding.
1636 * Search in the registered set the handler able to read/write that encoding.
1638 * Returns the handler or NULL if not found
1640 xmlCharEncodingHandlerPtr
1641 xmlFindCharEncodingHandler(const char *name
) {
1644 xmlCharEncoding alias
;
1645 #ifdef LIBXML_ICONV_ENABLED
1646 xmlCharEncodingHandlerPtr enc
;
1647 iconv_t icv_in
, icv_out
;
1648 #endif /* LIBXML_ICONV_ENABLED */
1649 #ifdef LIBXML_ICU_ENABLED
1650 xmlCharEncodingHandlerPtr encu
;
1651 uconv_t
*ucv_in
, *ucv_out
;
1652 #endif /* LIBXML_ICU_ENABLED */
1656 if (handlers
== NULL
) xmlInitCharEncodingHandlers();
1657 if (name
== NULL
) return(xmlDefaultCharEncodingHandler
);
1658 if (name
[0] == 0) return(xmlDefaultCharEncodingHandler
);
1661 * Do the alias resolution
1664 nalias
= xmlGetEncodingAlias(name
);
1669 * Check first for directly registered encoding names
1671 for (i
= 0;i
< 99;i
++) {
1672 upper
[i
] = toupper(name
[i
]);
1673 if (upper
[i
] == 0) break;
1677 if (handlers
!= NULL
) {
1678 for (i
= 0;i
< nbCharEncodingHandler
; i
++) {
1679 if (!strcmp(upper
, handlers
[i
]->name
)) {
1680 #ifdef DEBUG_ENCODING
1681 xmlGenericError(xmlGenericErrorContext
,
1682 "Found registered handler for encoding %s\n", name
);
1684 return(handlers
[i
]);
1689 #ifdef LIBXML_ICONV_ENABLED
1690 /* check whether iconv can handle this */
1691 icv_in
= iconv_open("UTF-8", name
);
1692 icv_out
= iconv_open(name
, "UTF-8");
1693 if (icv_in
== (iconv_t
) -1) {
1694 icv_in
= iconv_open("UTF-8", upper
);
1696 if (icv_out
== (iconv_t
) -1) {
1697 icv_out
= iconv_open(upper
, "UTF-8");
1699 if ((icv_in
!= (iconv_t
) -1) && (icv_out
!= (iconv_t
) -1)) {
1700 enc
= (xmlCharEncodingHandlerPtr
)
1701 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1703 iconv_close(icv_in
);
1704 iconv_close(icv_out
);
1707 memset(enc
, 0, sizeof(xmlCharEncodingHandler
));
1708 enc
->name
= xmlMemStrdup(name
);
1711 enc
->iconv_in
= icv_in
;
1712 enc
->iconv_out
= icv_out
;
1713 #ifdef DEBUG_ENCODING
1714 xmlGenericError(xmlGenericErrorContext
,
1715 "Found iconv handler for encoding %s\n", name
);
1718 } else if ((icv_in
!= (iconv_t
) -1) || icv_out
!= (iconv_t
) -1) {
1719 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1720 "iconv : problems with filters for '%s'\n", name
);
1722 #endif /* LIBXML_ICONV_ENABLED */
1723 #ifdef LIBXML_ICU_ENABLED
1724 /* check whether icu can handle this */
1725 ucv_in
= openIcuConverter(name
, 1);
1726 ucv_out
= openIcuConverter(name
, 0);
1727 if (ucv_in
!= NULL
&& ucv_out
!= NULL
) {
1728 encu
= (xmlCharEncodingHandlerPtr
)
1729 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1731 closeIcuConverter(ucv_in
);
1732 closeIcuConverter(ucv_out
);
1735 memset(encu
, 0, sizeof(xmlCharEncodingHandler
));
1736 encu
->name
= xmlMemStrdup(name
);
1738 encu
->output
= NULL
;
1739 encu
->uconv_in
= ucv_in
;
1740 encu
->uconv_out
= ucv_out
;
1741 #ifdef DEBUG_ENCODING
1742 xmlGenericError(xmlGenericErrorContext
,
1743 "Found ICU converter handler for encoding %s\n", name
);
1746 } else if (ucv_in
!= NULL
|| ucv_out
!= NULL
) {
1747 closeIcuConverter(ucv_in
);
1748 closeIcuConverter(ucv_out
);
1749 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1750 "ICU converter : problems with filters for '%s'\n", name
);
1752 #endif /* LIBXML_ICU_ENABLED */
1754 #ifdef DEBUG_ENCODING
1755 xmlGenericError(xmlGenericErrorContext
,
1756 "No handler found for encoding %s\n", name
);
1760 * Fallback using the canonical names
1762 alias
= xmlParseCharEncoding(norig
);
1763 if (alias
!= XML_CHAR_ENCODING_ERROR
) {
1765 canon
= xmlGetCharEncodingName(alias
);
1766 if ((canon
!= NULL
) && (strcmp(name
, canon
))) {
1767 return(xmlFindCharEncodingHandler(canon
));
1771 /* If "none of the above", give up */
1775 /************************************************************************
1777 * ICONV based generic conversion functions *
1779 ************************************************************************/
1781 #ifdef LIBXML_ICONV_ENABLED
1784 * @cd: iconv converter data structure
1785 * @out: a pointer to an array of bytes to store the result
1786 * @outlen: the length of @out
1787 * @in: a pointer to an array of ISO Latin 1 chars
1788 * @inlen: the length of @in
1790 * Returns 0 if success, or
1791 * -1 by lack of space, or
1792 * -2 if the transcoding fails (for *in is not valid utf8 string or
1793 * the result of transformation can't fit into the encoding we want), or
1794 * -3 if there the last byte can't form a single output char.
1796 * The value of @inlen after return is the number of octets consumed
1797 * as the return value is positive, else unpredictable.
1798 * The value of @outlen after return is the number of ocetes consumed.
1801 xmlIconvWrapper(iconv_t cd
, unsigned char *out
, int *outlen
,
1802 const unsigned char *in
, int *inlen
) {
1803 size_t icv_inlen
, icv_outlen
;
1804 const char *icv_in
= (const char *) in
;
1805 char *icv_out
= (char *) out
;
1808 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) || (in
== NULL
)) {
1809 if (outlen
!= NULL
) *outlen
= 0;
1813 icv_outlen
= *outlen
;
1814 ret
= iconv(cd
, (ICONV_CONST
char **) &icv_in
, &icv_inlen
, &icv_out
, &icv_outlen
);
1815 *inlen
-= icv_inlen
;
1816 *outlen
-= icv_outlen
;
1817 if ((icv_inlen
!= 0) || (ret
== -1)) {
1819 if (errno
== EILSEQ
) {
1824 if (errno
== E2BIG
) {
1829 if (errno
== EINVAL
) {
1839 #endif /* LIBXML_ICONV_ENABLED */
1841 /************************************************************************
1843 * ICU based generic conversion functions *
1845 ************************************************************************/
1847 #ifdef LIBXML_ICU_ENABLED
1850 * @cd: ICU uconverter data structure
1851 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1852 * @out: a pointer to an array of bytes to store the result
1853 * @outlen: the length of @out
1854 * @in: a pointer to an array of ISO Latin 1 chars
1855 * @inlen: the length of @in
1856 * @flush: if true, indicates end of input
1858 * Returns 0 if success, or
1859 * -1 by lack of space, or
1860 * -2 if the transcoding fails (for *in is not valid utf8 string or
1861 * the result of transformation can't fit into the encoding we want), or
1862 * -3 if there the last byte can't form a single output char.
1864 * The value of @inlen after return is the number of octets consumed
1865 * as the return value is positive, else unpredictable.
1866 * The value of @outlen after return is the number of ocetes consumed.
1869 xmlUconvWrapper(uconv_t
*cd
, int toUnicode
, unsigned char *out
, int *outlen
,
1870 const unsigned char *in
, int *inlen
, int flush
) {
1871 const char *ucv_in
= (const char *) in
;
1872 char *ucv_out
= (char *) out
;
1873 UErrorCode err
= U_ZERO_ERROR
;
1875 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) || (in
== NULL
)) {
1876 if (outlen
!= NULL
) *outlen
= 0;
1881 /* encoding => UTF-16 => UTF-8 */
1882 ucnv_convertEx(cd
->utf8
, cd
->uconv
, &ucv_out
, ucv_out
+ *outlen
,
1883 &ucv_in
, ucv_in
+ *inlen
, cd
->pivot_buf
,
1884 &cd
->pivot_source
, &cd
->pivot_target
,
1885 cd
->pivot_buf
+ ICU_PIVOT_BUF_SIZE
, 0, flush
, &err
);
1887 /* UTF-8 => UTF-16 => encoding */
1888 ucnv_convertEx(cd
->uconv
, cd
->utf8
, &ucv_out
, ucv_out
+ *outlen
,
1889 &ucv_in
, ucv_in
+ *inlen
, cd
->pivot_buf
,
1890 &cd
->pivot_source
, &cd
->pivot_target
,
1891 cd
->pivot_buf
+ ICU_PIVOT_BUF_SIZE
, 0, flush
, &err
);
1893 *inlen
= ucv_in
- (const char*) in
;
1894 *outlen
= ucv_out
- (char *) out
;
1895 if (U_SUCCESS(err
)) {
1896 /* reset pivot buf if this is the last call for input (flush==TRUE) */
1898 cd
->pivot_source
= cd
->pivot_target
= cd
->pivot_buf
;
1901 if (err
== U_BUFFER_OVERFLOW_ERROR
)
1903 if (err
== U_INVALID_CHAR_FOUND
|| err
== U_ILLEGAL_CHAR_FOUND
)
1907 #endif /* LIBXML_ICU_ENABLED */
1909 /************************************************************************
1911 * The real API used by libxml for on-the-fly conversion *
1913 ************************************************************************/
1916 xmlEncInputChunk(xmlCharEncodingHandler
*handler
, unsigned char *out
,
1917 int *outlen
, const unsigned char *in
, int *inlen
, int flush
) {
1921 if (handler
->input
!= NULL
) {
1922 ret
= handler
->input(out
, outlen
, in
, inlen
);
1924 #ifdef LIBXML_ICONV_ENABLED
1925 else if (handler
->iconv_in
!= NULL
) {
1926 ret
= xmlIconvWrapper(handler
->iconv_in
, out
, outlen
, in
, inlen
);
1928 #endif /* LIBXML_ICONV_ENABLED */
1929 #ifdef LIBXML_ICU_ENABLED
1930 else if (handler
->uconv_in
!= NULL
) {
1931 ret
= xmlUconvWrapper(handler
->uconv_in
, 1, out
, outlen
, in
, inlen
,
1934 #endif /* LIBXML_ICU_ENABLED */
1944 /* Returns -4 if no output function was found. */
1946 xmlEncOutputChunk(xmlCharEncodingHandler
*handler
, unsigned char *out
,
1947 int *outlen
, const unsigned char *in
, int *inlen
) {
1950 if (handler
->output
!= NULL
) {
1951 ret
= handler
->output(out
, outlen
, in
, inlen
);
1953 #ifdef LIBXML_ICONV_ENABLED
1954 else if (handler
->iconv_out
!= NULL
) {
1955 ret
= xmlIconvWrapper(handler
->iconv_out
, out
, outlen
, in
, inlen
);
1957 #endif /* LIBXML_ICONV_ENABLED */
1958 #ifdef LIBXML_ICU_ENABLED
1959 else if (handler
->uconv_out
!= NULL
) {
1960 ret
= xmlUconvWrapper(handler
->uconv_out
, 0, out
, outlen
, in
, inlen
,
1963 #endif /* LIBXML_ICU_ENABLED */
1974 * xmlCharEncFirstLineInt:
1975 * @handler: char enconding transformation data structure
1976 * @out: an xmlBuffer for the output.
1977 * @in: an xmlBuffer for the input
1978 * @len: number of bytes to convert for the first line, or -1
1980 * Front-end for the encoding handler input function, but handle only
1981 * the very first line, i.e. limit itself to 45 chars.
1983 * Returns the number of byte written if success, or
1985 * -2 if the transcoding fails (for *in is not valid utf8 string or
1986 * the result of transformation can't fit into the encoding we want), or
1989 xmlCharEncFirstLineInt(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
1990 xmlBufferPtr in
, int len
) {
1995 if (handler
== NULL
) return(-1);
1996 if (out
== NULL
) return(-1);
1997 if (in
== NULL
) return(-1);
1999 /* calculate space available */
2000 written
= out
->size
- out
->use
- 1; /* count '\0' */
2003 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2004 * 45 chars should be sufficient to reach the end of the encoding
2005 * declaration without going too far inside the document content.
2006 * on UTF-16 this means 90bytes, on UCS4 this means 180
2007 * The actual value depending on guessed encoding is passed as @len
2017 if (toconv
* 2 >= written
) {
2018 xmlBufferGrow(out
, toconv
* 2);
2019 written
= out
->size
- out
->use
- 1;
2022 ret
= xmlEncInputChunk(handler
, &out
->content
[out
->use
], &written
,
2023 in
->content
, &toconv
, 0);
2024 xmlBufferShrink(in
, toconv
);
2025 out
->use
+= written
;
2026 out
->content
[out
->use
] = 0;
2027 if (ret
== -1) ret
= -3;
2029 #ifdef DEBUG_ENCODING
2032 xmlGenericError(xmlGenericErrorContext
,
2033 "converted %d bytes to %d bytes of input\n",
2037 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of input, %d left\n",
2038 toconv
, written
, in
->use
);
2041 xmlGenericError(xmlGenericErrorContext
,
2042 "input conversion failed due to input error\n");
2045 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of input, %d left\n",
2046 toconv
, written
, in
->use
);
2049 xmlGenericError(xmlGenericErrorContext
,"Unknown input conversion failed %d\n", ret
);
2051 #endif /* DEBUG_ENCODING */
2053 * Ignore when input buffer is not on a boundary
2055 if (ret
== -3) ret
= 0;
2056 if (ret
== -1) ret
= 0;
2061 * xmlCharEncFirstLine:
2062 * @handler: char enconding transformation data structure
2063 * @out: an xmlBuffer for the output.
2064 * @in: an xmlBuffer for the input
2066 * Front-end for the encoding handler input function, but handle only
2067 * the very first line, i.e. limit itself to 45 chars.
2069 * Returns the number of byte written if success, or
2071 * -2 if the transcoding fails (for *in is not valid utf8 string or
2072 * the result of transformation can't fit into the encoding we want), or
2075 xmlCharEncFirstLine(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
2077 return(xmlCharEncFirstLineInt(handler
, out
, in
, -1));
2081 * xmlCharEncFirstLineInput:
2082 * @input: a parser input buffer
2083 * @len: number of bytes to convert for the first line, or -1
2085 * Front-end for the encoding handler input function, but handle only
2086 * the very first line. Point is that this is based on autodetection
2087 * of the encoding and once that first line is converted we may find
2088 * out that a different decoder is needed to process the input.
2090 * Returns the number of byte written if success, or
2092 * -2 if the transcoding fails (for *in is not valid utf8 string or
2093 * the result of transformation can't fit into the encoding we want), or
2096 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input
, int len
)
2106 if ((input
== NULL
) || (input
->encoder
== NULL
) ||
2107 (input
->buffer
== NULL
) || (input
->raw
== NULL
))
2109 out
= input
->buffer
;
2112 toconv
= xmlBufUse(in
);
2115 written
= xmlBufAvail(out
) - 1; /* count '\0' */
2117 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2118 * 45 chars should be sufficient to reach the end of the encoding
2119 * declaration without going too far inside the document content.
2120 * on UTF-16 this means 90bytes, on UCS4 this means 180
2121 * The actual value depending on guessed encoding is passed as @len
2125 if (toconv
> (unsigned int) len
)
2131 if (toconv
* 2 >= written
) {
2132 xmlBufGrow(out
, toconv
* 2);
2133 written
= xmlBufAvail(out
) - 1;
2140 ret
= xmlEncInputChunk(input
->encoder
, xmlBufEnd(out
), &c_out
,
2141 xmlBufContent(in
), &c_in
, 0);
2142 xmlBufShrink(in
, c_in
);
2143 xmlBufAddLen(out
, c_out
);
2149 #ifdef DEBUG_ENCODING
2150 xmlGenericError(xmlGenericErrorContext
,
2151 "converted %d bytes to %d bytes of input\n",
2156 #ifdef DEBUG_ENCODING
2157 xmlGenericError(xmlGenericErrorContext
,
2158 "converted %d bytes to %d bytes of input, %d left\n",
2159 c_in
, c_out
, (int)xmlBufUse(in
));
2163 #ifdef DEBUG_ENCODING
2164 xmlGenericError(xmlGenericErrorContext
,
2165 "converted %d bytes to %d bytes of input, %d left\n",
2166 c_in
, c_out
, (int)xmlBufUse(in
));
2171 const xmlChar
*content
= xmlBufContent(in
);
2173 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2174 content
[0], content
[1],
2175 content
[2], content
[3]);
2177 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2178 "input conversion failed due to input error, bytes %s\n",
2183 * Ignore when input buffer is not on a boundary
2185 if (ret
== -3) ret
= 0;
2186 if (ret
== -1) ret
= 0;
2192 * @input: a parser input buffer
2193 * @flush: try to flush all the raw buffer
2195 * Generic front-end for the encoding handler on parser input
2197 * Returns the number of byte written if success, or
2199 * -2 if the transcoding fails (for *in is not valid utf8 string or
2200 * the result of transformation can't fit into the encoding we want), or
2203 xmlCharEncInput(xmlParserInputBufferPtr input
, int flush
)
2213 if ((input
== NULL
) || (input
->encoder
== NULL
) ||
2214 (input
->buffer
== NULL
) || (input
->raw
== NULL
))
2216 out
= input
->buffer
;
2219 toconv
= xmlBufUse(in
);
2222 if ((toconv
> 64 * 1024) && (flush
== 0))
2224 written
= xmlBufAvail(out
);
2226 written
--; /* count '\0' */
2227 if (toconv
* 2 >= written
) {
2228 xmlBufGrow(out
, toconv
* 2);
2229 written
= xmlBufAvail(out
);
2231 written
--; /* count '\0' */
2233 if ((written
> 128 * 1024) && (flush
== 0))
2234 written
= 128 * 1024;
2238 ret
= xmlEncInputChunk(input
->encoder
, xmlBufEnd(out
), &c_out
,
2239 xmlBufContent(in
), &c_in
, flush
);
2240 xmlBufShrink(in
, c_in
);
2241 xmlBufAddLen(out
, c_out
);
2247 #ifdef DEBUG_ENCODING
2248 xmlGenericError(xmlGenericErrorContext
,
2249 "converted %d bytes to %d bytes of input\n",
2254 #ifdef DEBUG_ENCODING
2255 xmlGenericError(xmlGenericErrorContext
,
2256 "converted %d bytes to %d bytes of input, %d left\n",
2257 c_in
, c_out
, (int)xmlBufUse(in
));
2261 #ifdef DEBUG_ENCODING
2262 xmlGenericError(xmlGenericErrorContext
,
2263 "converted %d bytes to %d bytes of input, %d left\n",
2264 c_in
, c_out
, (int)xmlBufUse(in
));
2269 const xmlChar
*content
= xmlBufContent(in
);
2271 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2272 content
[0], content
[1],
2273 content
[2], content
[3]);
2275 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2276 "input conversion failed due to input error, bytes %s\n",
2281 * Ignore when input buffer is not on a boundary
2285 return (c_out
? c_out
: ret
);
2290 * @handler: char encoding transformation data structure
2291 * @out: an xmlBuffer for the output.
2292 * @in: an xmlBuffer for the input
2294 * Generic front-end for the encoding handler input function
2296 * Returns the number of byte written if success, or
2298 * -2 if the transcoding fails (for *in is not valid utf8 string or
2299 * the result of transformation can't fit into the encoding we want), or
2302 xmlCharEncInFunc(xmlCharEncodingHandler
* handler
, xmlBufferPtr out
,
2309 if (handler
== NULL
)
2319 written
= out
->size
- out
->use
-1; /* count '\0' */
2320 if (toconv
* 2 >= written
) {
2321 xmlBufferGrow(out
, out
->size
+ toconv
* 2);
2322 written
= out
->size
- out
->use
- 1;
2324 ret
= xmlEncInputChunk(handler
, &out
->content
[out
->use
], &written
,
2325 in
->content
, &toconv
, 1);
2326 xmlBufferShrink(in
, toconv
);
2327 out
->use
+= written
;
2328 out
->content
[out
->use
] = 0;
2334 #ifdef DEBUG_ENCODING
2335 xmlGenericError(xmlGenericErrorContext
,
2336 "converted %d bytes to %d bytes of input\n",
2341 #ifdef DEBUG_ENCODING
2342 xmlGenericError(xmlGenericErrorContext
,
2343 "converted %d bytes to %d bytes of input, %d left\n",
2344 toconv
, written
, in
->use
);
2348 #ifdef DEBUG_ENCODING
2349 xmlGenericError(xmlGenericErrorContext
,
2350 "converted %d bytes to %d bytes of input, %d left\n",
2351 toconv
, written
, in
->use
);
2357 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2358 in
->content
[0], in
->content
[1],
2359 in
->content
[2], in
->content
[3]);
2361 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2362 "input conversion failed due to input error, bytes %s\n",
2367 * Ignore when input buffer is not on a boundary
2371 return (written
? written
: ret
);
2374 #ifdef LIBXML_OUTPUT_ENABLED
2377 * @output: a parser output buffer
2378 * @init: is this an initialization call without data
2380 * Generic front-end for the encoding handler on parser output
2381 * a first call with @init == 1 has to be made first to initiate the
2382 * output in case of non-stateless encoding needing to initiate their
2383 * state or the output (like the BOM in UTF16).
2384 * In case of UTF8 sequence conversion errors for the given encoder,
2385 * the content will be automatically remapped to a CharRef sequence.
2387 * Returns the number of byte written if success, or
2389 * -2 if the transcoding fails (for *in is not valid utf8 string or
2390 * the result of transformation can't fit into the encoding we want), or
2393 xmlCharEncOutput(xmlOutputBufferPtr output
, int init
)
2397 size_t writtentot
= 0;
2404 if ((output
== NULL
) || (output
->encoder
== NULL
) ||
2405 (output
->buffer
== NULL
) || (output
->conv
== NULL
))
2408 in
= output
->buffer
;
2412 written
= xmlBufAvail(out
);
2414 written
--; /* count '\0' */
2417 * First specific handling of the initialization call
2422 /* TODO: Check return value. */
2423 xmlEncOutputChunk(output
->encoder
, xmlBufEnd(out
), &c_out
,
2425 xmlBufAddLen(out
, c_out
);
2426 #ifdef DEBUG_ENCODING
2427 xmlGenericError(xmlGenericErrorContext
,
2428 "initialized encoder\n");
2434 * Conversion itself.
2436 toconv
= xmlBufUse(in
);
2439 if (toconv
> 64 * 1024)
2441 if (toconv
* 4 >= written
) {
2442 xmlBufGrow(out
, toconv
* 4);
2443 written
= xmlBufAvail(out
) - 1;
2445 if (written
> 256 * 1024)
2446 written
= 256 * 1024;
2450 ret
= xmlEncOutputChunk(output
->encoder
, xmlBufEnd(out
), &c_out
,
2451 xmlBufContent(in
), &c_in
);
2452 xmlBufShrink(in
, c_in
);
2453 xmlBufAddLen(out
, c_out
);
2454 writtentot
+= c_out
;
2457 /* Can be a limitation of iconv or uconv */
2464 * Attempt to handle error cases
2468 #ifdef DEBUG_ENCODING
2469 xmlGenericError(xmlGenericErrorContext
,
2470 "converted %d bytes to %d bytes of output\n",
2475 #ifdef DEBUG_ENCODING
2476 xmlGenericError(xmlGenericErrorContext
,
2477 "output conversion failed by lack of space\n");
2481 #ifdef DEBUG_ENCODING
2482 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of output %d left\n",
2483 c_in
, c_out
, (int) xmlBufUse(in
));
2487 xmlEncodingErr(XML_I18N_NO_OUTPUT
,
2488 "xmlCharEncOutFunc: no output function !\n", NULL
);
2492 xmlChar charref
[20];
2493 int len
= (int) xmlBufUse(in
);
2494 xmlChar
*content
= xmlBufContent(in
);
2495 int cur
, charrefLen
;
2497 cur
= xmlGetUTF8Char(content
, &len
);
2501 #ifdef DEBUG_ENCODING
2502 xmlGenericError(xmlGenericErrorContext
,
2503 "handling output conversion error\n");
2504 xmlGenericError(xmlGenericErrorContext
,
2505 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2506 content
[0], content
[1],
2507 content
[2], content
[3]);
2510 * Removes the UTF8 sequence, and replace it by a charref
2511 * and continue the transcoding phase, hoping the error
2512 * did not mangle the encoder state.
2514 charrefLen
= snprintf((char *) &charref
[0], sizeof(charref
),
2516 xmlBufShrink(in
, len
);
2517 xmlBufGrow(out
, charrefLen
* 4);
2518 c_out
= xmlBufAvail(out
) - 1;
2520 ret
= xmlEncOutputChunk(output
->encoder
, xmlBufEnd(out
), &c_out
,
2523 if ((ret
< 0) || (c_in
!= charrefLen
)) {
2526 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2527 content
[0], content
[1],
2528 content
[2], content
[3]);
2530 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2531 "output conversion failed due to conv error, bytes %s\n",
2533 if (xmlBufGetAllocationScheme(in
) != XML_BUFFER_ALLOC_IMMUTABLE
)
2538 xmlBufAddLen(out
, c_out
);
2539 writtentot
+= c_out
;
2548 * xmlCharEncOutFunc:
2549 * @handler: char enconding transformation data structure
2550 * @out: an xmlBuffer for the output.
2551 * @in: an xmlBuffer for the input
2553 * Generic front-end for the encoding handler output function
2554 * a first call with @in == NULL has to be made firs to initiate the
2555 * output in case of non-stateless encoding needing to initiate their
2556 * state or the output (like the BOM in UTF16).
2557 * In case of UTF8 sequence conversion errors for the given encoder,
2558 * the content will be automatically remapped to a CharRef sequence.
2560 * Returns the number of byte written if success, or
2562 * -2 if the transcoding fails (for *in is not valid utf8 string or
2563 * the result of transformation can't fit into the encoding we want), or
2566 xmlCharEncOutFunc(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
2574 if (handler
== NULL
) return(-1);
2575 if (out
== NULL
) return(-1);
2579 written
= out
->size
- out
->use
;
2582 written
--; /* Gennady: count '/0' */
2585 * First specific handling of in = NULL, i.e. the initialization call
2589 /* TODO: Check return value. */
2590 xmlEncOutputChunk(handler
, &out
->content
[out
->use
], &written
,
2592 out
->use
+= written
;
2593 out
->content
[out
->use
] = 0;
2594 #ifdef DEBUG_ENCODING
2595 xmlGenericError(xmlGenericErrorContext
,
2596 "initialized encoder\n");
2602 * Conversion itself.
2607 if (toconv
* 4 >= written
) {
2608 xmlBufferGrow(out
, toconv
* 4);
2609 written
= out
->size
- out
->use
- 1;
2611 ret
= xmlEncOutputChunk(handler
, &out
->content
[out
->use
], &written
,
2612 in
->content
, &toconv
);
2613 xmlBufferShrink(in
, toconv
);
2614 out
->use
+= written
;
2615 writtentot
+= written
;
2616 out
->content
[out
->use
] = 0;
2619 /* Can be a limitation of iconv or uconv */
2625 if (ret
>= 0) output
+= ret
;
2628 * Attempt to handle error cases
2632 #ifdef DEBUG_ENCODING
2633 xmlGenericError(xmlGenericErrorContext
,
2634 "converted %d bytes to %d bytes of output\n",
2639 #ifdef DEBUG_ENCODING
2640 xmlGenericError(xmlGenericErrorContext
,
2641 "output conversion failed by lack of space\n");
2645 #ifdef DEBUG_ENCODING
2646 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of output %d left\n",
2647 toconv
, written
, in
->use
);
2651 xmlEncodingErr(XML_I18N_NO_OUTPUT
,
2652 "xmlCharEncOutFunc: no output function !\n", NULL
);
2656 xmlChar charref
[20];
2658 const xmlChar
*utf
= (const xmlChar
*) in
->content
;
2659 int cur
, charrefLen
;
2661 cur
= xmlGetUTF8Char(utf
, &len
);
2665 #ifdef DEBUG_ENCODING
2666 xmlGenericError(xmlGenericErrorContext
,
2667 "handling output conversion error\n");
2668 xmlGenericError(xmlGenericErrorContext
,
2669 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2670 in
->content
[0], in
->content
[1],
2671 in
->content
[2], in
->content
[3]);
2674 * Removes the UTF8 sequence, and replace it by a charref
2675 * and continue the transcoding phase, hoping the error
2676 * did not mangle the encoder state.
2678 charrefLen
= snprintf((char *) &charref
[0], sizeof(charref
),
2680 xmlBufferShrink(in
, len
);
2681 xmlBufferGrow(out
, charrefLen
* 4);
2682 written
= out
->size
- out
->use
- 1;
2683 toconv
= charrefLen
;
2684 ret
= xmlEncOutputChunk(handler
, &out
->content
[out
->use
], &written
,
2687 if ((ret
< 0) || (toconv
!= charrefLen
)) {
2690 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2691 in
->content
[0], in
->content
[1],
2692 in
->content
[2], in
->content
[3]);
2694 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2695 "output conversion failed due to conv error, bytes %s\n",
2697 if (in
->alloc
!= XML_BUFFER_ALLOC_IMMUTABLE
)
2698 in
->content
[0] = ' ';
2702 out
->use
+= written
;
2703 writtentot
+= written
;
2704 out
->content
[out
->use
] = 0;
2712 * xmlCharEncCloseFunc:
2713 * @handler: char enconding transformation data structure
2715 * Generic front-end for encoding handler close function
2717 * Returns 0 if success, or -1 in case of error
2720 xmlCharEncCloseFunc(xmlCharEncodingHandler
*handler
) {
2723 int i
, handler_in_list
= 0;
2725 if (handler
== NULL
) return(-1);
2726 if (handler
->name
== NULL
) return(-1);
2727 if (handlers
!= NULL
) {
2728 for (i
= 0;i
< nbCharEncodingHandler
; i
++) {
2729 if (handler
== handlers
[i
]) {
2730 handler_in_list
= 1;
2735 #ifdef LIBXML_ICONV_ENABLED
2737 * Iconv handlers can be used only once, free the whole block.
2738 * and the associated icon resources.
2740 if ((handler_in_list
== 0) &&
2741 ((handler
->iconv_out
!= NULL
) || (handler
->iconv_in
!= NULL
))) {
2743 if (handler
->iconv_out
!= NULL
) {
2744 if (iconv_close(handler
->iconv_out
))
2746 handler
->iconv_out
= NULL
;
2748 if (handler
->iconv_in
!= NULL
) {
2749 if (iconv_close(handler
->iconv_in
))
2751 handler
->iconv_in
= NULL
;
2754 #endif /* LIBXML_ICONV_ENABLED */
2755 #ifdef LIBXML_ICU_ENABLED
2756 if ((handler_in_list
== 0) &&
2757 ((handler
->uconv_out
!= NULL
) || (handler
->uconv_in
!= NULL
))) {
2759 if (handler
->uconv_out
!= NULL
) {
2760 closeIcuConverter(handler
->uconv_out
);
2761 handler
->uconv_out
= NULL
;
2763 if (handler
->uconv_in
!= NULL
) {
2764 closeIcuConverter(handler
->uconv_in
);
2765 handler
->uconv_in
= NULL
;
2770 /* free up only dynamic handlers iconv/uconv */
2771 if (handler
->name
!= NULL
)
2772 xmlFree(handler
->name
);
2773 handler
->name
= NULL
;
2776 #ifdef DEBUG_ENCODING
2778 xmlGenericError(xmlGenericErrorContext
,
2779 "failed to close the encoding handler\n");
2781 xmlGenericError(xmlGenericErrorContext
,
2782 "closed the encoding handler\n");
2790 * @ctxt: an XML parser context
2792 * This function provides the current index of the parser relative
2793 * to the start of the current entity. This function is computed in
2794 * bytes from the beginning starting at zero and finishing at the
2795 * size in byte of the file if parsing a file. The function is
2796 * of constant cost if the input is UTF-8 but can be costly if run
2797 * on non-UTF-8 input.
2799 * Returns the index in bytes from the beginning of the entity or -1
2800 * in case the index could not be computed.
2803 xmlByteConsumed(xmlParserCtxtPtr ctxt
) {
2804 xmlParserInputPtr in
;
2806 if (ctxt
== NULL
) return(-1);
2808 if (in
== NULL
) return(-1);
2809 if ((in
->buf
!= NULL
) && (in
->buf
->encoder
!= NULL
)) {
2810 unsigned int unused
= 0;
2811 xmlCharEncodingHandler
* handler
= in
->buf
->encoder
;
2813 * Encoding conversion, compute the number of unused original
2814 * bytes from the input not consumed and substract that from
2815 * the raw consumed value, this is not a cheap operation
2817 if (in
->end
- in
->cur
> 0) {
2818 unsigned char convbuf
[32000];
2819 const unsigned char *cur
= (const unsigned char *)in
->cur
;
2820 int toconv
= in
->end
- in
->cur
, written
= 32000;
2825 toconv
= in
->end
- cur
;
2827 ret
= xmlEncOutputChunk(handler
, &convbuf
[0], &written
,
2837 } while (ret
== -2);
2839 if (in
->buf
->rawconsumed
< unused
)
2841 return(in
->buf
->rawconsumed
- unused
);
2843 return(in
->consumed
+ (in
->cur
- in
->base
));
2846 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2847 #ifdef LIBXML_ISO8859X_ENABLED
2851 * @out: a pointer to an array of bytes to store the result
2852 * @outlen: the length of @out
2853 * @in: a pointer to an array of UTF-8 chars
2854 * @inlen: the length of @in
2855 * @xlattable: the 2-level transcoding table
2857 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2858 * block of chars out.
2860 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2861 * The value of @inlen after return is the number of octets consumed
2862 * as the return value is positive, else unpredictable.
2863 * The value of @outlen after return is the number of ocetes consumed.
2866 UTF8ToISO8859x(unsigned char* out
, int *outlen
,
2867 const unsigned char* in
, int *inlen
,
2868 unsigned char const *xlattable
) {
2869 const unsigned char* outstart
= out
;
2870 const unsigned char* inend
;
2871 const unsigned char* instart
= in
;
2872 const unsigned char* processed
= in
;
2874 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) ||
2875 (xlattable
== NULL
))
2879 * initialization nothing to do
2885 inend
= in
+ (*inlen
);
2886 while (in
< inend
) {
2887 unsigned char d
= *in
++;
2890 } else if (d
< 0xC0) {
2891 /* trailing byte in leading position */
2892 *outlen
= out
- outstart
;
2893 *inlen
= processed
- instart
;
2895 } else if (d
< 0xE0) {
2897 if (!(in
< inend
)) {
2898 /* trailing byte not in input buffer */
2899 *outlen
= out
- outstart
;
2900 *inlen
= processed
- instart
;
2904 if ((c
& 0xC0) != 0x80) {
2905 /* not a trailing byte */
2906 *outlen
= out
- outstart
;
2907 *inlen
= processed
- instart
;
2912 d
= xlattable
[48 + c
+ xlattable
[d
] * 64];
2914 /* not in character set */
2915 *outlen
= out
- outstart
;
2916 *inlen
= processed
- instart
;
2920 } else if (d
< 0xF0) {
2923 if (!(in
< inend
- 1)) {
2924 /* trailing bytes not in input buffer */
2925 *outlen
= out
- outstart
;
2926 *inlen
= processed
- instart
;
2930 if ((c1
& 0xC0) != 0x80) {
2931 /* not a trailing byte (c1) */
2932 *outlen
= out
- outstart
;
2933 *inlen
= processed
- instart
;
2937 if ((c2
& 0xC0) != 0x80) {
2938 /* not a trailing byte (c2) */
2939 *outlen
= out
- outstart
;
2940 *inlen
= processed
- instart
;
2946 d
= xlattable
[48 + c2
+ xlattable
[48 + c1
+
2947 xlattable
[32 + d
] * 64] * 64];
2949 /* not in character set */
2950 *outlen
= out
- outstart
;
2951 *inlen
= processed
- instart
;
2956 /* cannot transcode >= U+010000 */
2957 *outlen
= out
- outstart
;
2958 *inlen
= processed
- instart
;
2963 *outlen
= out
- outstart
;
2964 *inlen
= processed
- instart
;
2970 * @out: a pointer to an array of bytes to store the result
2971 * @outlen: the length of @out
2972 * @in: a pointer to an array of ISO Latin 1 chars
2973 * @inlen: the length of @in
2975 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2976 * block of chars out.
2977 * Returns 0 if success, or -1 otherwise
2978 * The value of @inlen after return is the number of octets consumed
2979 * The value of @outlen after return is the number of ocetes produced.
2982 ISO8859xToUTF8(unsigned char* out
, int *outlen
,
2983 const unsigned char* in
, int *inlen
,
2984 unsigned short const *unicodetable
) {
2985 unsigned char* outstart
= out
;
2986 unsigned char* outend
;
2987 const unsigned char* instart
= in
;
2988 const unsigned char* inend
;
2989 const unsigned char* instop
;
2992 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) ||
2993 (in
== NULL
) || (unicodetable
== NULL
))
2995 outend
= out
+ *outlen
;
2996 inend
= in
+ *inlen
;
2999 while ((in
< inend
) && (out
< outend
- 2)) {
3001 c
= unicodetable
[*in
- 0x80];
3003 /* undefined code point */
3004 *outlen
= out
- outstart
;
3005 *inlen
= in
- instart
;
3009 *out
++ = ((c
>> 6) & 0x1F) | 0xC0;
3010 *out
++ = (c
& 0x3F) | 0x80;
3012 *out
++ = ((c
>> 12) & 0x0F) | 0xE0;
3013 *out
++ = ((c
>> 6) & 0x3F) | 0x80;
3014 *out
++ = (c
& 0x3F) | 0x80;
3018 if (instop
- in
> outend
- out
) instop
= in
+ (outend
- out
);
3019 while ((*in
< 0x80) && (in
< instop
)) {
3023 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
3026 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
3029 *outlen
= out
- outstart
;
3030 *inlen
= in
- instart
;
3035 /************************************************************************
3036 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3037 ************************************************************************/
3039 static unsigned short const xmlunicodetable_ISO8859_2
[128] = {
3040 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3041 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3042 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3043 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3044 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3045 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3046 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3047 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3048 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3049 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3050 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3051 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3052 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3053 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3054 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3055 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3058 static unsigned char const xmltranscodetable_ISO8859_2
[48 + 6 * 64] = {
3059 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3067 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3068 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3069 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3071 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3076 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3079 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3080 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3081 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3082 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3083 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3084 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3085 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3088 static unsigned short const xmlunicodetable_ISO8859_3
[128] = {
3089 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3090 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3091 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3092 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3093 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3094 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3095 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3096 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3097 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3098 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3099 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3100 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3101 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3102 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3103 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3104 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3107 static unsigned char const xmltranscodetable_ISO8859_3
[48 + 7 * 64] = {
3108 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3116 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3117 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3118 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3119 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3121 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3122 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3135 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3136 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3137 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3138 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3141 static unsigned short const xmlunicodetable_ISO8859_4
[128] = {
3142 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3143 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3144 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3145 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3146 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3147 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3148 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3149 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3150 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3151 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3152 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3153 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3154 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3155 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3156 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3157 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3160 static unsigned char const xmltranscodetable_ISO8859_4
[48 + 6 * 64] = {
3161 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3169 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3170 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3171 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3172 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3173 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3174 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3175 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3176 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3177 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3178 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3179 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3180 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3181 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3182 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3185 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3186 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3187 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3190 static unsigned short const xmlunicodetable_ISO8859_5
[128] = {
3191 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3192 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3193 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3194 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3195 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3196 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3197 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3198 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3199 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3200 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3201 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3202 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3203 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3204 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3205 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3206 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3209 static unsigned char const xmltranscodetable_ISO8859_5
[48 + 6 * 64] = {
3210 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3218 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3219 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3222 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3223 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3224 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3225 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3226 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239 static unsigned short const xmlunicodetable_ISO8859_6
[128] = {
3240 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3241 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3242 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3243 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3244 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3245 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3246 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3247 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3248 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3249 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3250 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3251 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3253 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3254 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3255 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3258 static unsigned char const xmltranscodetable_ISO8859_6
[48 + 5 * 64] = {
3259 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3267 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3268 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3276 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3277 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3278 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3279 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284 static unsigned short const xmlunicodetable_ISO8859_7
[128] = {
3285 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3286 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3287 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3288 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3289 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3290 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3291 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3292 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3293 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3294 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3295 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3296 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3297 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3298 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3299 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3300 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3303 static unsigned char const xmltranscodetable_ISO8859_7
[48 + 7 * 64] = {
3304 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3312 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3313 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3314 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3315 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3328 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3329 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3330 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3331 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337 static unsigned short const xmlunicodetable_ISO8859_8
[128] = {
3338 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3339 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3340 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3341 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3342 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3343 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3344 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3345 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3346 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3347 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3348 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3349 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3350 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3351 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3352 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3353 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3356 static unsigned char const xmltranscodetable_ISO8859_8
[48 + 7 * 64] = {
3357 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3359 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3365 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3366 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3367 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3368 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3376 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3381 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3386 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390 static unsigned short const xmlunicodetable_ISO8859_9
[128] = {
3391 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3392 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3393 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3394 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3395 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3396 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3397 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3398 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3399 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3400 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3401 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3402 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3403 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3404 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3405 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3406 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3409 static unsigned char const xmltranscodetable_ISO8859_9
[48 + 5 * 64] = {
3410 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3418 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3419 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3420 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3421 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3422 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3423 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3424 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3425 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435 static unsigned short const xmlunicodetable_ISO8859_10
[128] = {
3436 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3437 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3438 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3439 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3440 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3441 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3442 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3443 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3444 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3445 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3446 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3447 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3448 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3449 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3450 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3451 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3454 static unsigned char const xmltranscodetable_ISO8859_10
[48 + 7 * 64] = {
3455 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3463 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3464 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3465 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3466 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3467 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3468 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3469 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3470 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3473 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3474 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3483 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3484 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3485 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3488 static unsigned short const xmlunicodetable_ISO8859_11
[128] = {
3489 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3490 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3491 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3492 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3493 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3494 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3495 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3496 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3497 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3498 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3499 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3500 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3501 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3502 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3503 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3504 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3507 static unsigned char const xmltranscodetable_ISO8859_11
[48 + 6 * 64] = {
3508 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3516 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3517 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3523 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3524 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3525 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3526 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3527 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3532 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3533 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537 static unsigned short const xmlunicodetable_ISO8859_13
[128] = {
3538 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3539 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3540 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3541 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3542 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3543 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3544 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3545 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3546 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3547 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3548 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3549 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3550 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3551 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3552 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3553 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3556 static unsigned char const xmltranscodetable_ISO8859_13
[48 + 7 * 64] = {
3557 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3565 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3566 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3567 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3568 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3577 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3578 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3579 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3580 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3581 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3582 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3583 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3584 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3585 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3586 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3587 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3590 static unsigned short const xmlunicodetable_ISO8859_14
[128] = {
3591 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3592 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3593 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3594 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3595 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3596 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3597 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3598 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3599 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3600 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3601 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3602 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3603 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3604 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3605 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3606 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3609 static unsigned char const xmltranscodetable_ISO8859_14
[48 + 10 * 64] = {
3610 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3618 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3619 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3620 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3625 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3645 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3647 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3650 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3651 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3652 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3655 static unsigned short const xmlunicodetable_ISO8859_15
[128] = {
3656 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3657 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3658 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3659 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3660 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3661 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3662 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3663 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3664 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3665 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3666 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3667 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3668 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3669 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3670 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3671 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3674 static unsigned char const xmltranscodetable_ISO8859_15
[48 + 6 * 64] = {
3675 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3683 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3684 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3685 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3686 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3698 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3699 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3700 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3701 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3704 static unsigned short const xmlunicodetable_ISO8859_16
[128] = {
3705 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3706 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3707 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3708 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3709 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3710 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3711 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3712 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3713 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3714 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3715 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3716 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3717 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3718 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3719 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3720 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3723 static unsigned char const xmltranscodetable_ISO8859_16
[48 + 9 * 64] = {
3724 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3731 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3732 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3733 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3734 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3735 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3736 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3741 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3743 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3753 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3760 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3761 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3762 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3767 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3770 static int ISO8859_2ToUTF8 (unsigned char* out
, int *outlen
,
3771 const unsigned char* in
, int *inlen
) {
3772 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_2
);
3774 static int UTF8ToISO8859_2 (unsigned char* out
, int *outlen
,
3775 const unsigned char* in
, int *inlen
) {
3776 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_2
);
3779 static int ISO8859_3ToUTF8 (unsigned char* out
, int *outlen
,
3780 const unsigned char* in
, int *inlen
) {
3781 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_3
);
3783 static int UTF8ToISO8859_3 (unsigned char* out
, int *outlen
,
3784 const unsigned char* in
, int *inlen
) {
3785 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_3
);
3788 static int ISO8859_4ToUTF8 (unsigned char* out
, int *outlen
,
3789 const unsigned char* in
, int *inlen
) {
3790 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_4
);
3792 static int UTF8ToISO8859_4 (unsigned char* out
, int *outlen
,
3793 const unsigned char* in
, int *inlen
) {
3794 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_4
);
3797 static int ISO8859_5ToUTF8 (unsigned char* out
, int *outlen
,
3798 const unsigned char* in
, int *inlen
) {
3799 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_5
);
3801 static int UTF8ToISO8859_5 (unsigned char* out
, int *outlen
,
3802 const unsigned char* in
, int *inlen
) {
3803 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_5
);
3806 static int ISO8859_6ToUTF8 (unsigned char* out
, int *outlen
,
3807 const unsigned char* in
, int *inlen
) {
3808 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_6
);
3810 static int UTF8ToISO8859_6 (unsigned char* out
, int *outlen
,
3811 const unsigned char* in
, int *inlen
) {
3812 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_6
);
3815 static int ISO8859_7ToUTF8 (unsigned char* out
, int *outlen
,
3816 const unsigned char* in
, int *inlen
) {
3817 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_7
);
3819 static int UTF8ToISO8859_7 (unsigned char* out
, int *outlen
,
3820 const unsigned char* in
, int *inlen
) {
3821 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_7
);
3824 static int ISO8859_8ToUTF8 (unsigned char* out
, int *outlen
,
3825 const unsigned char* in
, int *inlen
) {
3826 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_8
);
3828 static int UTF8ToISO8859_8 (unsigned char* out
, int *outlen
,
3829 const unsigned char* in
, int *inlen
) {
3830 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_8
);
3833 static int ISO8859_9ToUTF8 (unsigned char* out
, int *outlen
,
3834 const unsigned char* in
, int *inlen
) {
3835 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_9
);
3837 static int UTF8ToISO8859_9 (unsigned char* out
, int *outlen
,
3838 const unsigned char* in
, int *inlen
) {
3839 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_9
);
3842 static int ISO8859_10ToUTF8 (unsigned char* out
, int *outlen
,
3843 const unsigned char* in
, int *inlen
) {
3844 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_10
);
3846 static int UTF8ToISO8859_10 (unsigned char* out
, int *outlen
,
3847 const unsigned char* in
, int *inlen
) {
3848 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_10
);
3851 static int ISO8859_11ToUTF8 (unsigned char* out
, int *outlen
,
3852 const unsigned char* in
, int *inlen
) {
3853 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_11
);
3855 static int UTF8ToISO8859_11 (unsigned char* out
, int *outlen
,
3856 const unsigned char* in
, int *inlen
) {
3857 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_11
);
3860 static int ISO8859_13ToUTF8 (unsigned char* out
, int *outlen
,
3861 const unsigned char* in
, int *inlen
) {
3862 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_13
);
3864 static int UTF8ToISO8859_13 (unsigned char* out
, int *outlen
,
3865 const unsigned char* in
, int *inlen
) {
3866 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_13
);
3869 static int ISO8859_14ToUTF8 (unsigned char* out
, int *outlen
,
3870 const unsigned char* in
, int *inlen
) {
3871 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_14
);
3873 static int UTF8ToISO8859_14 (unsigned char* out
, int *outlen
,
3874 const unsigned char* in
, int *inlen
) {
3875 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_14
);
3878 static int ISO8859_15ToUTF8 (unsigned char* out
, int *outlen
,
3879 const unsigned char* in
, int *inlen
) {
3880 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_15
);
3882 static int UTF8ToISO8859_15 (unsigned char* out
, int *outlen
,
3883 const unsigned char* in
, int *inlen
) {
3884 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_15
);
3887 static int ISO8859_16ToUTF8 (unsigned char* out
, int *outlen
,
3888 const unsigned char* in
, int *inlen
) {
3889 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_16
);
3891 static int UTF8ToISO8859_16 (unsigned char* out
, int *outlen
,
3892 const unsigned char* in
, int *inlen
) {
3893 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_16
);
3897 xmlRegisterCharEncodingHandlersISO8859x (void) {
3898 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8
, UTF8ToISO8859_2
);
3899 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8
, UTF8ToISO8859_3
);
3900 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8
, UTF8ToISO8859_4
);
3901 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8
, UTF8ToISO8859_5
);
3902 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8
, UTF8ToISO8859_6
);
3903 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8
, UTF8ToISO8859_7
);
3904 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8
, UTF8ToISO8859_8
);
3905 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8
, UTF8ToISO8859_9
);
3906 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8
, UTF8ToISO8859_10
);
3907 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8
, UTF8ToISO8859_11
);
3908 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8
, UTF8ToISO8859_13
);
3909 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8
, UTF8ToISO8859_14
);
3910 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8
, UTF8ToISO8859_15
);
3911 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8
, UTF8ToISO8859_16
);
3917 #define bottom_encoding
3918 #include "elfgcchack.h"