d7b96226f0feeccd8b1cc897c530785019dcd8e1
[reactos.git] / reactos / lib / 3rdparty / libwin-iconv / win_iconv.c
1 /*
2 * iconv implementation using Win32 API to convert.
3 *
4 * This file is placed in the public domain.
5 */
6
7 /* for WC_NO_BEST_FIT_CHARS */
8 #ifndef WINVER
9 # define WINVER 0x0500
10 #endif
11
12 #define STRICT
13 #include <windows.h>
14 #include <errno.h>
15 #include <string.h>
16 #include <stdlib.h>
17
18 /* WORKAROUND: */
19 #ifndef UNDER_CE
20 #define GetProcAddressA GetProcAddress
21 #endif
22
23 #if 0
24 # define MAKE_EXE
25 # define MAKE_DLL
26 # define USE_LIBICONV_DLL
27 #endif
28
29 #if !defined(DEFAULT_LIBICONV_DLL)
30 # define DEFAULT_LIBICONV_DLL ""
31 #endif
32
33 #define MB_CHAR_MAX 16
34
35 #define UNICODE_MODE_BOM_DONE 1
36 #define UNICODE_MODE_SWAPPED 2
37
38 #define FLAG_USE_BOM 1
39 #define FLAG_TRANSLIT 2 /* //TRANSLIT */
40 #define FLAG_IGNORE 4 /* //IGNORE (not implemented) */
41
42 typedef unsigned char uchar;
43 typedef unsigned short ushort;
44 typedef unsigned int uint;
45
46 typedef void* iconv_t;
47
48 iconv_t iconv_open(const char *tocode, const char *fromcode);
49 int iconv_close(iconv_t cd);
50 size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
51
52 /* libiconv interface for vim */
53 #if defined(MAKE_DLL)
54 int
55 iconvctl (iconv_t cd, int request, void* argument)
56 {
57 /* not supported */
58 return 0;
59 }
60 #endif
61
62 typedef struct compat_t compat_t;
63 typedef struct csconv_t csconv_t;
64 typedef struct rec_iconv_t rec_iconv_t;
65
66 typedef iconv_t (*f_iconv_open)(const char *tocode, const char *fromcode);
67 typedef int (*f_iconv_close)(iconv_t cd);
68 typedef size_t (*f_iconv)(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
69 typedef int* (*f_errno)(void);
70 typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
71 typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
72 typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize);
73 typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize);
74
75 #define COMPAT_IN 1
76 #define COMPAT_OUT 2
77
78 /* unicode mapping for compatibility with other conversion table. */
79 struct compat_t {
80 uint in;
81 uint out;
82 uint flag;
83 };
84
85 struct csconv_t {
86 int codepage;
87 int flags;
88 f_mbtowc mbtowc;
89 f_wctomb wctomb;
90 f_mblen mblen;
91 f_flush flush;
92 DWORD mode;
93 compat_t *compat;
94 };
95
96 struct rec_iconv_t {
97 iconv_t cd;
98 f_iconv_close iconv_close;
99 f_iconv iconv;
100 f_errno _errno;
101 csconv_t from;
102 csconv_t to;
103 #if defined(USE_LIBICONV_DLL)
104 HMODULE hlibiconv;
105 #endif
106 };
107
108 static int win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode);
109 static int win_iconv_close(iconv_t cd);
110 static size_t win_iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
111
112 static int load_mlang();
113 static int make_csconv(const char *name, csconv_t *cv);
114 static int name_to_codepage(const char *name);
115 static uint utf16_to_ucs4(const ushort *wbuf);
116 static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize);
117 static int mbtowc_flags(int codepage);
118 static int must_use_null_useddefaultchar(int codepage);
119 static char *strrstr(const char *str, const char *token);
120 static char *xstrndup(const char *s, size_t n);
121 static int seterror(int err);
122
123 #if defined(USE_LIBICONV_DLL)
124 static int libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode);
125 static PVOID MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size);
126 static HMODULE find_imported_module_by_funcname(HMODULE hModule, const char *funcname);
127
128 static HMODULE hwiniconv;
129 #endif
130
131 static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
132 static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
133 static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
134 static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize);
135 static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize);
136
137 static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
138 static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
139 static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
140 static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
141 static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
142 static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
143 static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
144 static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
145 static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
146 static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
147 static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize);
148
149 static struct {
150 int codepage;
151 const char *name;
152 } codepage_alias[] = {
153 {65001, "CP65001"},
154 {65001, "UTF8"},
155 {65001, "UTF-8"},
156
157 {1200, "CP1200"},
158 {1200, "UTF16LE"},
159 {1200, "UTF-16LE"},
160 {1200, "UCS2LE"},
161 {1200, "UCS-2LE"},
162
163 {1201, "CP1201"},
164 {1201, "UTF16BE"},
165 {1201, "UTF-16BE"},
166 {1201, "UCS2BE"},
167 {1201, "UCS-2BE"},
168 {1201, "unicodeFFFE"},
169
170 {12000, "CP12000"},
171 {12000, "UTF32LE"},
172 {12000, "UTF-32LE"},
173 {12000, "UCS4LE"},
174 {12000, "UCS-4LE"},
175
176 {12001, "CP12001"},
177 {12001, "UTF32BE"},
178 {12001, "UTF-32BE"},
179 {12001, "UCS4BE"},
180 {12001, "UCS-4BE"},
181
182 #ifndef GLIB_COMPILATION
183 /*
184 * Default is big endian.
185 * See rfc2781 4.3 Interpreting text labelled as UTF-16.
186 */
187 {1201, "UTF16"},
188 {1201, "UTF-16"},
189 {1201, "UCS2"},
190 {1201, "UCS-2"},
191 {12001, "UTF32"},
192 {12001, "UTF-32"},
193 {12001, "UCS-4"},
194 {12001, "UCS4"},
195 #else
196 /* Default is little endian, because the platform is */
197 {1200, "UTF16"},
198 {1200, "UTF-16"},
199 {1200, "UCS2"},
200 {1200, "UCS-2"},
201 {12000, "UTF32"},
202 {12000, "UTF-32"},
203 {12000, "UCS4"},
204 {12000, "UCS-4"},
205 #endif
206
207 /* copy from libiconv `iconv -l` */
208 /* !IsValidCodePage(367) */
209 {20127, "ANSI_X3.4-1968"},
210 {20127, "ANSI_X3.4-1986"},
211 {20127, "ASCII"},
212 {20127, "CP367"},
213 {20127, "IBM367"},
214 {20127, "ISO-IR-6"},
215 {20127, "ISO646-US"},
216 {20127, "ISO_646.IRV:1991"},
217 {20127, "US"},
218 {20127, "US-ASCII"},
219 {20127, "CSASCII"},
220
221 /* !IsValidCodePage(819) */
222 {1252, "CP819"},
223 {1252, "IBM819"},
224 {28591, "ISO-8859-1"},
225 {28591, "ISO-IR-100"},
226 {28591, "ISO8859-1"},
227 {28591, "ISO_8859-1"},
228 {28591, "ISO_8859-1:1987"},
229 {28591, "L1"},
230 {28591, "LATIN1"},
231 {28591, "CSISOLATIN1"},
232
233 {1250, "CP1250"},
234 {1250, "MS-EE"},
235 {1250, "WINDOWS-1250"},
236
237 {1251, "CP1251"},
238 {1251, "MS-CYRL"},
239 {1251, "WINDOWS-1251"},
240
241 {1252, "CP1252"},
242 {1252, "MS-ANSI"},
243 {1252, "WINDOWS-1252"},
244
245 {1253, "CP1253"},
246 {1253, "MS-GREEK"},
247 {1253, "WINDOWS-1253"},
248
249 {1254, "CP1254"},
250 {1254, "MS-TURK"},
251 {1254, "WINDOWS-1254"},
252
253 {1255, "CP1255"},
254 {1255, "MS-HEBR"},
255 {1255, "WINDOWS-1255"},
256
257 {1256, "CP1256"},
258 {1256, "MS-ARAB"},
259 {1256, "WINDOWS-1256"},
260
261 {1257, "CP1257"},
262 {1257, "WINBALTRIM"},
263 {1257, "WINDOWS-1257"},
264
265 {1258, "CP1258"},
266 {1258, "WINDOWS-1258"},
267
268 {850, "850"},
269 {850, "CP850"},
270 {850, "IBM850"},
271 {850, "CSPC850MULTILINGUAL"},
272
273 /* !IsValidCodePage(862) */
274 {862, "862"},
275 {862, "CP862"},
276 {862, "IBM862"},
277 {862, "CSPC862LATINHEBREW"},
278
279 {866, "866"},
280 {866, "CP866"},
281 {866, "IBM866"},
282 {866, "CSIBM866"},
283
284 /* !IsValidCodePage(154) */
285 {154, "CP154"},
286 {154, "CYRILLIC-ASIAN"},
287 {154, "PT154"},
288 {154, "PTCP154"},
289 {154, "CSPTCP154"},
290
291 /* !IsValidCodePage(1133) */
292 {1133, "CP1133"},
293 {1133, "IBM-CP1133"},
294
295 {874, "CP874"},
296 {874, "WINDOWS-874"},
297
298 /* !IsValidCodePage(51932) */
299 {51932, "CP51932"},
300 {51932, "MS51932"},
301 {51932, "WINDOWS-51932"},
302 {51932, "EUC-JP"},
303
304 {932, "CP932"},
305 {932, "MS932"},
306 {932, "SHIFFT_JIS"},
307 {932, "SHIFFT_JIS-MS"},
308 {932, "SJIS"},
309 {932, "SJIS-MS"},
310 {932, "SJIS-OPEN"},
311 {932, "SJIS-WIN"},
312 {932, "WINDOWS-31J"},
313 {932, "WINDOWS-932"},
314 {932, "CSWINDOWS31J"},
315
316 {50221, "CP50221"},
317 {50221, "ISO-2022-JP"},
318 {50221, "ISO-2022-JP-MS"},
319 {50221, "ISO2022-JP"},
320 {50221, "ISO2022-JP-MS"},
321 {50221, "MS50221"},
322 {50221, "WINDOWS-50221"},
323
324 {936, "CP936"},
325 {936, "GBK"},
326 {936, "MS936"},
327 {936, "WINDOWS-936"},
328
329 {950, "CP950"},
330 {950, "BIG5"},
331 {950, "BIG5HKSCS"},
332 {950, "BIG5-HKSCS"},
333
334 {949, "CP949"},
335 {949, "UHC"},
336 {949, "EUC-KR"},
337
338 {1361, "CP1361"},
339 {1361, "JOHAB"},
340
341 {437, "437"},
342 {437, "CP437"},
343 {437, "IBM437"},
344 {437, "CSPC8CODEPAGE437"},
345
346 {737, "CP737"},
347
348 {775, "CP775"},
349 {775, "IBM775"},
350 {775, "CSPC775BALTIC"},
351
352 {852, "852"},
353 {852, "CP852"},
354 {852, "IBM852"},
355 {852, "CSPCP852"},
356
357 /* !IsValidCodePage(853) */
358 {853, "CP853"},
359
360 {855, "855"},
361 {855, "CP855"},
362 {855, "IBM855"},
363 {855, "CSIBM855"},
364
365 {857, "857"},
366 {857, "CP857"},
367 {857, "IBM857"},
368 {857, "CSIBM857"},
369
370 /* !IsValidCodePage(858) */
371 {858, "CP858"},
372
373 {860, "860"},
374 {860, "CP860"},
375 {860, "IBM860"},
376 {860, "CSIBM860"},
377
378 {861, "861"},
379 {861, "CP-IS"},
380 {861, "CP861"},
381 {861, "IBM861"},
382 {861, "CSIBM861"},
383
384 {863, "863"},
385 {863, "CP863"},
386 {863, "IBM863"},
387 {863, "CSIBM863"},
388
389 {864, "CP864"},
390 {864, "IBM864"},
391 {864, "CSIBM864"},
392
393 {865, "865"},
394 {865, "CP865"},
395 {865, "IBM865"},
396 {865, "CSIBM865"},
397
398 {869, "869"},
399 {869, "CP-GR"},
400 {869, "CP869"},
401 {869, "IBM869"},
402 {869, "CSIBM869"},
403
404 /* !IsValidCodePage(1152) */
405 {1125, "CP1125"},
406
407 /*
408 * Code Page Identifiers
409 * http://msdn2.microsoft.com/en-us/library/ms776446.aspx
410 */
411 {37, "IBM037"}, /* IBM EBCDIC US-Canada */
412 {437, "IBM437"}, /* OEM United States */
413 {500, "IBM500"}, /* IBM EBCDIC International */
414 {708, "ASMO-708"}, /* Arabic (ASMO 708) */
415 /* 709 Arabic (ASMO-449+, BCON V4) */
416 /* 710 Arabic - Transparent Arabic */
417 {720, "DOS-720"}, /* Arabic (Transparent ASMO); Arabic (DOS) */
418 {737, "ibm737"}, /* OEM Greek (formerly 437G); Greek (DOS) */
419 {775, "ibm775"}, /* OEM Baltic; Baltic (DOS) */
420 {850, "ibm850"}, /* OEM Multilingual Latin 1; Western European (DOS) */
421 {852, "ibm852"}, /* OEM Latin 2; Central European (DOS) */
422 {855, "IBM855"}, /* OEM Cyrillic (primarily Russian) */
423 {857, "ibm857"}, /* OEM Turkish; Turkish (DOS) */
424 {858, "IBM00858"}, /* OEM Multilingual Latin 1 + Euro symbol */
425 {860, "IBM860"}, /* OEM Portuguese; Portuguese (DOS) */
426 {861, "ibm861"}, /* OEM Icelandic; Icelandic (DOS) */
427 {862, "DOS-862"}, /* OEM Hebrew; Hebrew (DOS) */
428 {863, "IBM863"}, /* OEM French Canadian; French Canadian (DOS) */
429 {864, "IBM864"}, /* OEM Arabic; Arabic (864) */
430 {865, "IBM865"}, /* OEM Nordic; Nordic (DOS) */
431 {866, "cp866"}, /* OEM Russian; Cyrillic (DOS) */
432 {869, "ibm869"}, /* OEM Modern Greek; Greek, Modern (DOS) */
433 {870, "IBM870"}, /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */
434 {874, "windows-874"}, /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */
435 {875, "cp875"}, /* IBM EBCDIC Greek Modern */
436 {932, "shift_jis"}, /* ANSI/OEM Japanese; Japanese (Shift-JIS) */
437 {932, "shift-jis"}, /* alternative name for it */
438 {936, "gb2312"}, /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */
439 {949, "ks_c_5601-1987"}, /* ANSI/OEM Korean (Unified Hangul Code) */
440 {950, "big5"}, /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */
441 {950, "big5hkscs"}, /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */
442 {950, "big5-hkscs"}, /* alternative name for it */
443 {1026, "IBM1026"}, /* IBM EBCDIC Turkish (Latin 5) */
444 {1047, "IBM01047"}, /* IBM EBCDIC Latin 1/Open System */
445 {1140, "IBM01140"}, /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */
446 {1141, "IBM01141"}, /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */
447 {1142, "IBM01142"}, /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */
448 {1143, "IBM01143"}, /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */
449 {1144, "IBM01144"}, /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */
450 {1145, "IBM01145"}, /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */
451 {1146, "IBM01146"}, /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */
452 {1147, "IBM01147"}, /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */
453 {1148, "IBM01148"}, /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */
454 {1149, "IBM01149"}, /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */
455 {1250, "windows-1250"}, /* ANSI Central European; Central European (Windows) */
456 {1251, "windows-1251"}, /* ANSI Cyrillic; Cyrillic (Windows) */
457 {1252, "windows-1252"}, /* ANSI Latin 1; Western European (Windows) */
458 {1253, "windows-1253"}, /* ANSI Greek; Greek (Windows) */
459 {1254, "windows-1254"}, /* ANSI Turkish; Turkish (Windows) */
460 {1255, "windows-1255"}, /* ANSI Hebrew; Hebrew (Windows) */
461 {1256, "windows-1256"}, /* ANSI Arabic; Arabic (Windows) */
462 {1257, "windows-1257"}, /* ANSI Baltic; Baltic (Windows) */
463 {1258, "windows-1258"}, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */
464 {1361, "Johab"}, /* Korean (Johab) */
465 {10000, "macintosh"}, /* MAC Roman; Western European (Mac) */
466 {10001, "x-mac-japanese"}, /* Japanese (Mac) */
467 {10002, "x-mac-chinesetrad"}, /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */
468 {10003, "x-mac-korean"}, /* Korean (Mac) */
469 {10004, "x-mac-arabic"}, /* Arabic (Mac) */
470 {10005, "x-mac-hebrew"}, /* Hebrew (Mac) */
471 {10006, "x-mac-greek"}, /* Greek (Mac) */
472 {10007, "x-mac-cyrillic"}, /* Cyrillic (Mac) */
473 {10008, "x-mac-chinesesimp"}, /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */
474 {10010, "x-mac-romanian"}, /* Romanian (Mac) */
475 {10017, "x-mac-ukrainian"}, /* Ukrainian (Mac) */
476 {10021, "x-mac-thai"}, /* Thai (Mac) */
477 {10029, "x-mac-ce"}, /* MAC Latin 2; Central European (Mac) */
478 {10079, "x-mac-icelandic"}, /* Icelandic (Mac) */
479 {10081, "x-mac-turkish"}, /* Turkish (Mac) */
480 {10082, "x-mac-croatian"}, /* Croatian (Mac) */
481 {20000, "x-Chinese_CNS"}, /* CNS Taiwan; Chinese Traditional (CNS) */
482 {20001, "x-cp20001"}, /* TCA Taiwan */
483 {20002, "x_Chinese-Eten"}, /* Eten Taiwan; Chinese Traditional (Eten) */
484 {20003, "x-cp20003"}, /* IBM5550 Taiwan */
485 {20004, "x-cp20004"}, /* TeleText Taiwan */
486 {20005, "x-cp20005"}, /* Wang Taiwan */
487 {20105, "x-IA5"}, /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */
488 {20106, "x-IA5-German"}, /* IA5 German (7-bit) */
489 {20107, "x-IA5-Swedish"}, /* IA5 Swedish (7-bit) */
490 {20108, "x-IA5-Norwegian"}, /* IA5 Norwegian (7-bit) */
491 {20127, "us-ascii"}, /* US-ASCII (7-bit) */
492 {20261, "x-cp20261"}, /* T.61 */
493 {20269, "x-cp20269"}, /* ISO 6937 Non-Spacing Accent */
494 {20273, "IBM273"}, /* IBM EBCDIC Germany */
495 {20277, "IBM277"}, /* IBM EBCDIC Denmark-Norway */
496 {20278, "IBM278"}, /* IBM EBCDIC Finland-Sweden */
497 {20280, "IBM280"}, /* IBM EBCDIC Italy */
498 {20284, "IBM284"}, /* IBM EBCDIC Latin America-Spain */
499 {20285, "IBM285"}, /* IBM EBCDIC United Kingdom */
500 {20290, "IBM290"}, /* IBM EBCDIC Japanese Katakana Extended */
501 {20297, "IBM297"}, /* IBM EBCDIC France */
502 {20420, "IBM420"}, /* IBM EBCDIC Arabic */
503 {20423, "IBM423"}, /* IBM EBCDIC Greek */
504 {20424, "IBM424"}, /* IBM EBCDIC Hebrew */
505 {20833, "x-EBCDIC-KoreanExtended"}, /* IBM EBCDIC Korean Extended */
506 {20838, "IBM-Thai"}, /* IBM EBCDIC Thai */
507 {20866, "koi8-r"}, /* Russian (KOI8-R); Cyrillic (KOI8-R) */
508 {20871, "IBM871"}, /* IBM EBCDIC Icelandic */
509 {20880, "IBM880"}, /* IBM EBCDIC Cyrillic Russian */
510 {20905, "IBM905"}, /* IBM EBCDIC Turkish */
511 {20924, "IBM00924"}, /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */
512 {20932, "EUC-JP"}, /* Japanese (JIS 0208-1990 and 0121-1990) */
513 {20936, "x-cp20936"}, /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */
514 {20949, "x-cp20949"}, /* Korean Wansung */
515 {21025, "cp1025"}, /* IBM EBCDIC Cyrillic Serbian-Bulgarian */
516 /* 21027 (deprecated) */
517 {21866, "koi8-u"}, /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */
518 {28591, "iso-8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */
519 {28591, "iso8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */
520 {28592, "iso-8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */
521 {28592, "iso8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */
522 {28593, "iso-8859-3"}, /* ISO 8859-3 Latin 3 */
523 {28593, "iso8859-3"}, /* ISO 8859-3 Latin 3 */
524 {28594, "iso-8859-4"}, /* ISO 8859-4 Baltic */
525 {28594, "iso8859-4"}, /* ISO 8859-4 Baltic */
526 {28595, "iso-8859-5"}, /* ISO 8859-5 Cyrillic */
527 {28595, "iso8859-5"}, /* ISO 8859-5 Cyrillic */
528 {28596, "iso-8859-6"}, /* ISO 8859-6 Arabic */
529 {28596, "iso8859-6"}, /* ISO 8859-6 Arabic */
530 {28597, "iso-8859-7"}, /* ISO 8859-7 Greek */
531 {28597, "iso8859-7"}, /* ISO 8859-7 Greek */
532 {28598, "iso-8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
533 {28598, "iso8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
534 {28599, "iso-8859-9"}, /* ISO 8859-9 Turkish */
535 {28599, "iso8859-9"}, /* ISO 8859-9 Turkish */
536 {28603, "iso-8859-13"}, /* ISO 8859-13 Estonian */
537 {28603, "iso8859-13"}, /* ISO 8859-13 Estonian */
538 {28605, "iso-8859-15"}, /* ISO 8859-15 Latin 9 */
539 {28605, "iso8859-15"}, /* ISO 8859-15 Latin 9 */
540 {29001, "x-Europa"}, /* Europa 3 */
541 {38598, "iso-8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
542 {38598, "iso8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
543 {50220, "iso-2022-jp"}, /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */
544 {50221, "csISO2022JP"}, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */
545 {50222, "iso-2022-jp"}, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */
546 {50225, "iso-2022-kr"}, /* ISO 2022 Korean */
547 {50225, "iso2022-kr"}, /* ISO 2022 Korean */
548 {50227, "x-cp50227"}, /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */
549 /* 50229 ISO 2022 Traditional Chinese */
550 /* 50930 EBCDIC Japanese (Katakana) Extended */
551 /* 50931 EBCDIC US-Canada and Japanese */
552 /* 50933 EBCDIC Korean Extended and Korean */
553 /* 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese */
554 /* 50936 EBCDIC Simplified Chinese */
555 /* 50937 EBCDIC US-Canada and Traditional Chinese */
556 /* 50939 EBCDIC Japanese (Latin) Extended and Japanese */
557 {51932, "euc-jp"}, /* EUC Japanese */
558 {51936, "EUC-CN"}, /* EUC Simplified Chinese; Chinese Simplified (EUC) */
559 {51949, "euc-kr"}, /* EUC Korean */
560 /* 51950 EUC Traditional Chinese */
561 {52936, "hz-gb-2312"}, /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */
562 {54936, "GB18030"}, /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */
563 {57002, "x-iscii-de"}, /* ISCII Devanagari */
564 {57003, "x-iscii-be"}, /* ISCII Bengali */
565 {57004, "x-iscii-ta"}, /* ISCII Tamil */
566 {57005, "x-iscii-te"}, /* ISCII Telugu */
567 {57006, "x-iscii-as"}, /* ISCII Assamese */
568 {57007, "x-iscii-or"}, /* ISCII Oriya */
569 {57008, "x-iscii-ka"}, /* ISCII Kannada */
570 {57009, "x-iscii-ma"}, /* ISCII Malayalam */
571 {57010, "x-iscii-gu"}, /* ISCII Gujarati */
572 {57011, "x-iscii-pa"}, /* ISCII Punjabi */
573
574 {0, NULL}
575 };
576
577 /*
578 * SJIS SHIFTJIS table CP932 table
579 * ---- --------------------------- --------------------------------
580 * 5C U+00A5 YEN SIGN U+005C REVERSE SOLIDUS
581 * 7E U+203E OVERLINE U+007E TILDE
582 * 815C U+2014 EM DASH U+2015 HORIZONTAL BAR
583 * 815F U+005C REVERSE SOLIDUS U+FF3C FULLWIDTH REVERSE SOLIDUS
584 * 8160 U+301C WAVE DASH U+FF5E FULLWIDTH TILDE
585 * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO
586 * 817C U+2212 MINUS SIGN U+FF0D FULLWIDTH HYPHEN-MINUS
587 * 8191 U+00A2 CENT SIGN U+FFE0 FULLWIDTH CENT SIGN
588 * 8192 U+00A3 POUND SIGN U+FFE1 FULLWIDTH POUND SIGN
589 * 81CA U+00AC NOT SIGN U+FFE2 FULLWIDTH NOT SIGN
590 *
591 * EUC-JP and ISO-2022-JP should be compatible with CP932.
592 *
593 * Kernel and MLang have different Unicode mapping table. Make sure
594 * which API is used.
595 */
596 static compat_t cp932_compat[] = {
597 {0x00A5, 0x005C, COMPAT_OUT},
598 {0x203E, 0x007E, COMPAT_OUT},
599 {0x2014, 0x2015, COMPAT_OUT},
600 {0x301C, 0xFF5E, COMPAT_OUT},
601 {0x2016, 0x2225, COMPAT_OUT},
602 {0x2212, 0xFF0D, COMPAT_OUT},
603 {0x00A2, 0xFFE0, COMPAT_OUT},
604 {0x00A3, 0xFFE1, COMPAT_OUT},
605 {0x00AC, 0xFFE2, COMPAT_OUT},
606 {0, 0, 0}
607 };
608
609 static compat_t cp20932_compat[] = {
610 {0x00A5, 0x005C, COMPAT_OUT},
611 {0x203E, 0x007E, COMPAT_OUT},
612 {0x2014, 0x2015, COMPAT_OUT},
613 {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN},
614 {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN},
615 {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN},
616 {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN},
617 {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN},
618 {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN},
619 {0, 0, 0}
620 };
621
622 static compat_t *cp51932_compat = cp932_compat;
623
624 /* cp20932_compat for kernel. cp932_compat for mlang. */
625 static compat_t *cp5022x_compat = cp932_compat;
626
627 typedef HRESULT (WINAPI *CONVERTINETSTRING)(
628 LPDWORD lpdwMode,
629 DWORD dwSrcEncoding,
630 DWORD dwDstEncoding,
631 LPCSTR lpSrcStr,
632 LPINT lpnSrcSize,
633 LPBYTE lpDstStr,
634 LPINT lpnDstSize
635 );
636 typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)(
637 LPDWORD lpdwMode,
638 DWORD dwSrcEncoding,
639 LPCSTR lpSrcStr,
640 LPINT lpnMultiCharCount,
641 LPWSTR lpDstStr,
642 LPINT lpnWideCharCount
643 );
644 typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)(
645 LPDWORD lpdwMode,
646 DWORD dwEncoding,
647 LPCWSTR lpSrcStr,
648 LPINT lpnWideCharCount,
649 LPSTR lpDstStr,
650 LPINT lpnMultiCharCount
651 );
652 typedef HRESULT (WINAPI *ISCONVERTINETSTRINGAVAILABLE)(
653 DWORD dwSrcEncoding,
654 DWORD dwDstEncoding
655 );
656 typedef HRESULT (WINAPI *LCIDTORFC1766A)(
657 LCID Locale,
658 LPSTR pszRfc1766,
659 int nChar
660 );
661 typedef HRESULT (WINAPI *LCIDTORFC1766W)(
662 LCID Locale,
663 LPWSTR pszRfc1766,
664 int nChar
665 );
666 typedef HRESULT (WINAPI *RFC1766TOLCIDA)(
667 LCID *pLocale,
668 LPSTR pszRfc1766
669 );
670 typedef HRESULT (WINAPI *RFC1766TOLCIDW)(
671 LCID *pLocale,
672 LPWSTR pszRfc1766
673 );
674 static CONVERTINETSTRING ConvertINetString;
675 static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode;
676 static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte;
677 static ISCONVERTINETSTRINGAVAILABLE IsConvertINetStringAvailable;
678 static LCIDTORFC1766A LcidToRfc1766A;
679 static RFC1766TOLCIDA Rfc1766ToLcidA;
680
681 static int
682 load_mlang()
683 {
684 HMODULE h;
685 if (ConvertINetString != NULL)
686 return TRUE;
687 h = LoadLibrary(TEXT("mlang.dll"));
688 if (!h)
689 return FALSE;
690 ConvertINetString = (CONVERTINETSTRING)GetProcAddressA(h, "ConvertINetString");
691 ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode");
692 ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte");
693 IsConvertINetStringAvailable = (ISCONVERTINETSTRINGAVAILABLE)GetProcAddressA(h, "IsConvertINetStringAvailable");
694 LcidToRfc1766A = (LCIDTORFC1766A)GetProcAddressA(h, "LcidToRfc1766A");
695 Rfc1766ToLcidA = (RFC1766TOLCIDA)GetProcAddressA(h, "Rfc1766ToLcidA");
696 return TRUE;
697 }
698
699 iconv_t
700 iconv_open(const char *tocode, const char *fromcode)
701 {
702 rec_iconv_t *cd;
703
704 cd = (rec_iconv_t *)calloc(1, sizeof(rec_iconv_t));
705 if (cd == NULL)
706 return (iconv_t)(-1);
707
708 #if defined(USE_LIBICONV_DLL)
709 errno = 0;
710 if (libiconv_iconv_open(cd, tocode, fromcode))
711 return (iconv_t)cd;
712 #endif
713
714 /* reset the errno to prevent reporting wrong error code.
715 * 0 for unsorted error. */
716 errno = 0;
717 if (win_iconv_open(cd, tocode, fromcode))
718 return (iconv_t)cd;
719
720 free(cd);
721
722 return (iconv_t)(-1);
723 }
724
725 int
726 iconv_close(iconv_t _cd)
727 {
728 rec_iconv_t *cd = (rec_iconv_t *)_cd;
729 int r = cd->iconv_close(cd->cd);
730 int e = *(cd->_errno());
731 #if defined(USE_LIBICONV_DLL)
732 if (cd->hlibiconv != NULL)
733 FreeLibrary(cd->hlibiconv);
734 #endif
735 free(cd);
736 errno = e;
737 return r;
738 }
739
740 size_t
741 iconv(iconv_t _cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
742 {
743 rec_iconv_t *cd = (rec_iconv_t *)_cd;
744 size_t r = cd->iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft);
745 errno = *(cd->_errno());
746 return r;
747 }
748
749 static int
750 win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode)
751 {
752 if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to))
753 return FALSE;
754 cd->iconv_close = win_iconv_close;
755 cd->iconv = win_iconv;
756 cd->_errno = _errno;
757 cd->cd = (iconv_t)cd;
758 return TRUE;
759 }
760
761 static int
762 win_iconv_close(iconv_t cd)
763 {
764 return 0;
765 }
766
767 static size_t
768 win_iconv(iconv_t _cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
769 {
770 rec_iconv_t *cd = (rec_iconv_t *)_cd;
771 ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */
772 int insize;
773 int outsize;
774 int wsize;
775 DWORD frommode;
776 DWORD tomode;
777 uint wc;
778 compat_t *cp;
779 int i;
780
781 if (inbuf == NULL || *inbuf == NULL)
782 {
783 if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL)
784 {
785 tomode = cd->to.mode;
786 outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft);
787 if (outsize == -1)
788 {
789 cd->to.mode = tomode;
790 return (size_t)(-1);
791 }
792 *outbuf += outsize;
793 *outbytesleft -= outsize;
794 }
795 cd->from.mode = 0;
796 cd->to.mode = 0;
797 return 0;
798 }
799
800 while (*inbytesleft != 0)
801 {
802 frommode = cd->from.mode;
803 tomode = cd->to.mode;
804 wsize = MB_CHAR_MAX;
805
806 insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize);
807 if (insize == -1)
808 {
809 cd->from.mode = frommode;
810 return (size_t)(-1);
811 }
812
813 if (wsize == 0)
814 {
815 *inbuf += insize;
816 *inbytesleft -= insize;
817 continue;
818 }
819
820 if (cd->from.compat != NULL)
821 {
822 wc = utf16_to_ucs4(wbuf);
823 cp = cd->from.compat;
824 for (i = 0; cp[i].in != 0; ++i)
825 {
826 if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc)
827 {
828 ucs4_to_utf16(cp[i].in, wbuf, &wsize);
829 break;
830 }
831 }
832 }
833
834 if (cd->to.compat != NULL)
835 {
836 wc = utf16_to_ucs4(wbuf);
837 cp = cd->to.compat;
838 for (i = 0; cp[i].in != 0; ++i)
839 {
840 if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc)
841 {
842 ucs4_to_utf16(cp[i].out, wbuf, &wsize);
843 break;
844 }
845 }
846 }
847
848 outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, *outbytesleft);
849 if (outsize == -1)
850 {
851 cd->from.mode = frommode;
852 cd->to.mode = tomode;
853 return (size_t)(-1);
854 }
855
856 *inbuf += insize;
857 *outbuf += outsize;
858 *inbytesleft -= insize;
859 *outbytesleft -= outsize;
860 }
861
862 return 0;
863 }
864
865 static int
866 make_csconv(const char *_name, csconv_t *cv)
867 {
868 CPINFO cpinfo;
869 int use_compat = TRUE;
870 int flag = 0;
871 char *name;
872 char *p;
873
874 name = xstrndup(_name, strlen(_name));
875 if (name == NULL)
876 return FALSE;
877
878 /* check for option "enc_name//opt1//opt2" */
879 while ((p = strrstr(name, "//")) != NULL)
880 {
881 if (_stricmp(p + 2, "nocompat") == 0)
882 use_compat = FALSE;
883 else if (_stricmp(p + 2, "translit") == 0)
884 flag |= FLAG_TRANSLIT;
885 else if (_stricmp(p + 2, "ignore") == 0)
886 flag |= FLAG_IGNORE;
887 *p = 0;
888 }
889
890 cv->mode = 0;
891 cv->flags = flag;
892 cv->mblen = NULL;
893 cv->flush = NULL;
894 cv->compat = NULL;
895 cv->codepage = name_to_codepage(name);
896 if (cv->codepage == 1200 || cv->codepage == 1201)
897 {
898 cv->mbtowc = utf16_mbtowc;
899 cv->wctomb = utf16_wctomb;
900 if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 ||
901 _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0)
902 cv->flags |= FLAG_USE_BOM;
903 }
904 else if (cv->codepage == 12000 || cv->codepage == 12001)
905 {
906 cv->mbtowc = utf32_mbtowc;
907 cv->wctomb = utf32_wctomb;
908 if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 ||
909 _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0)
910 cv->flags |= FLAG_USE_BOM;
911 }
912 else if (cv->codepage == 65001)
913 {
914 cv->mbtowc = kernel_mbtowc;
915 cv->wctomb = kernel_wctomb;
916 cv->mblen = utf8_mblen;
917 }
918 else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang())
919 {
920 cv->mbtowc = iso2022jp_mbtowc;
921 cv->wctomb = iso2022jp_wctomb;
922 cv->flush = iso2022jp_flush;
923 }
924 else if (cv->codepage == 51932 && load_mlang())
925 {
926 cv->mbtowc = mlang_mbtowc;
927 cv->wctomb = mlang_wctomb;
928 cv->mblen = eucjp_mblen;
929 }
930 else if (IsValidCodePage(cv->codepage)
931 && GetCPInfo(cv->codepage, &cpinfo) != 0)
932 {
933 cv->mbtowc = kernel_mbtowc;
934 cv->wctomb = kernel_wctomb;
935 if (cpinfo.MaxCharSize == 1)
936 cv->mblen = sbcs_mblen;
937 else if (cpinfo.MaxCharSize == 2)
938 cv->mblen = dbcs_mblen;
939 else
940 cv->mblen = mbcs_mblen;
941 }
942 else
943 {
944 /* not supported */
945 free(name);
946 errno = EINVAL;
947 return FALSE;
948 }
949
950 if (use_compat)
951 {
952 switch (cv->codepage)
953 {
954 case 932: cv->compat = cp932_compat; break;
955 case 20932: cv->compat = cp20932_compat; break;
956 case 51932: cv->compat = cp51932_compat; break;
957 case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break;
958 }
959 }
960
961 free(name);
962
963 return TRUE;
964 }
965
966 static int
967 name_to_codepage(const char *name)
968 {
969 int i;
970
971 if (*name == '\0' ||
972 strcmp(name, "char") == 0)
973 return GetACP();
974 else if (strcmp(name, "wchar_t") == 0)
975 return 1200;
976 else if (_strnicmp(name, "cp", 2) == 0)
977 return atoi(name + 2); /* CP123 */
978 else if ('0' <= name[0] && name[0] <= '9')
979 return atoi(name); /* 123 */
980 else if (_strnicmp(name, "xx", 2) == 0)
981 return atoi(name + 2); /* XX123 for debug */
982
983 for (i = 0; codepage_alias[i].name != NULL; ++i)
984 if (_stricmp(name, codepage_alias[i].name) == 0)
985 return codepage_alias[i].codepage;
986 return -1;
987 }
988
989 /*
990 * http://www.faqs.org/rfcs/rfc2781.html
991 */
992 static uint
993 utf16_to_ucs4(const ushort *wbuf)
994 {
995 uint wc = wbuf[0];
996 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
997 wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000;
998 return wc;
999 }
1000
1001 static void
1002 ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize)
1003 {
1004 if (wc < 0x10000)
1005 {
1006 wbuf[0] = wc;
1007 *wbufsize = 1;
1008 }
1009 else
1010 {
1011 wc -= 0x10000;
1012 wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF);
1013 wbuf[1] = 0xDC00 | (wc & 0x3FF);
1014 *wbufsize = 2;
1015 }
1016 }
1017
1018 /*
1019 * Check if codepage is one of those for which the dwFlags parameter
1020 * to MultiByteToWideChar() must be zero. Return zero or
1021 * MB_ERR_INVALID_CHARS. The docs in Platform SDK for for Windows
1022 * Server 2003 R2 claims that also codepage 65001 is one of these, but
1023 * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave
1024 * out 65001 (UTF-8), and that indeed seems to be the case on XP, it
1025 * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting
1026 * from UTF-8.
1027 */
1028 static int
1029 mbtowc_flags(int codepage)
1030 {
1031 return (codepage == 50220 || codepage == 50221 ||
1032 codepage == 50222 || codepage == 50225 ||
1033 codepage == 50227 || codepage == 50229 ||
1034 codepage == 52936 || codepage == 54936 ||
1035 (codepage >= 57002 && codepage <= 57011) ||
1036 codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS;
1037 }
1038
1039 /*
1040 * Check if codepage is one those for which the lpUsedDefaultChar
1041 * parameter to WideCharToMultiByte() must be NULL. The docs in
1042 * Platform SDK for for Windows Server 2003 R2 claims that this is the
1043 * list below, while the MSDN docs for MSVS2008 claim that it is only
1044 * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform
1045 * SDK seems to be correct, at least for XP.
1046 */
1047 static int
1048 must_use_null_useddefaultchar(int codepage)
1049 {
1050 return (codepage == 65000 || codepage == 65001 ||
1051 codepage == 50220 || codepage == 50221 ||
1052 codepage == 50222 || codepage == 50225 ||
1053 codepage == 50227 || codepage == 50229 ||
1054 codepage == 52936 || codepage == 54936 ||
1055 (codepage >= 57002 && codepage <= 57011) ||
1056 codepage == 42);
1057 }
1058
1059 static char *
1060 strrstr(const char *str, const char *token)
1061 {
1062 int len = strlen(token);
1063 const char *p = str + strlen(str);
1064
1065 while (str <= --p)
1066 if (p[0] == token[0] && strncmp(p, token, len) == 0)
1067 return (char *)p;
1068 return NULL;
1069 }
1070
1071 static char *
1072 xstrndup(const char *s, size_t n)
1073 {
1074 char *p;
1075
1076 p = malloc(n + 1);
1077 if (p == NULL)
1078 return NULL;
1079 memcpy(p, s, n);
1080 p[n] = '\0';
1081 return p;
1082 }
1083
1084 static int
1085 seterror(int err)
1086 {
1087 errno = err;
1088 return -1;
1089 }
1090
1091 #if defined(USE_LIBICONV_DLL)
1092 static int
1093 libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode)
1094 {
1095 HMODULE hlibiconv = NULL;
1096 HMODULE hmsvcrt = NULL;
1097 char *dllname;
1098 const char *p;
1099 const char *e;
1100 f_iconv_open _iconv_open;
1101
1102 /*
1103 * always try to load dll, so that we can switch dll in runtime.
1104 */
1105
1106 /* XXX: getenv() can't get variable set by SetEnvironmentVariable() */
1107 p = getenv("WINICONV_LIBICONV_DLL");
1108 if (p == NULL)
1109 p = DEFAULT_LIBICONV_DLL;
1110 /* parse comma separated value */
1111 for ( ; *p != 0; p = (*e == ',') ? e + 1 : e)
1112 {
1113 e = strchr(p, ',');
1114 if (p == e)
1115 continue;
1116 else if (e == NULL)
1117 e = p + strlen(p);
1118 dllname = xstrndup(p, e - p);
1119 if (dllname == NULL)
1120 return FALSE;
1121 hlibiconv = LoadLibraryA(dllname);
1122 free(dllname);
1123 if (hlibiconv != NULL)
1124 {
1125 if (hlibiconv == hwiniconv)
1126 {
1127 FreeLibrary(hlibiconv);
1128 hlibiconv = NULL;
1129 continue;
1130 }
1131 break;
1132 }
1133 }
1134
1135 if (hlibiconv == NULL)
1136 goto failed;
1137
1138 hmsvcrt = find_imported_module_by_funcname(hlibiconv, "_errno");
1139 if (hmsvcrt == NULL)
1140 goto failed;
1141
1142 _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "libiconv_open");
1143 if (_iconv_open == NULL)
1144 _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "iconv_open");
1145 cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "libiconv_close");
1146 if (cd->iconv_close == NULL)
1147 cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "iconv_close");
1148 cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "libiconv");
1149 if (cd->iconv == NULL)
1150 cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "iconv");
1151 cd->_errno = (f_errno)GetProcAddressA(hmsvcrt, "_errno");
1152 if (_iconv_open == NULL || cd->iconv_close == NULL
1153 || cd->iconv == NULL || cd->_errno == NULL)
1154 goto failed;
1155
1156 cd->cd = _iconv_open(tocode, fromcode);
1157 if (cd->cd == (iconv_t)(-1))
1158 goto failed;
1159
1160 cd->hlibiconv = hlibiconv;
1161 return TRUE;
1162
1163 failed:
1164 if (hlibiconv != NULL)
1165 FreeLibrary(hlibiconv);
1166 /* do not free hmsvcrt which is obtained by GetModuleHandle() */
1167 return FALSE;
1168 }
1169
1170 /*
1171 * Reference:
1172 * http://forums.belution.com/ja/vc/000/234/78s.shtml
1173 * http://nienie.com/~masapico/api_ImageDirectoryEntryToData.html
1174 *
1175 * The formal way is
1176 * imagehlp.h or dbghelp.h
1177 * imagehlp.lib or dbghelp.lib
1178 * ImageDirectoryEntryToData()
1179 */
1180 #define TO_DOS_HEADER(base) ((PIMAGE_DOS_HEADER)(base))
1181 #define TO_NT_HEADERS(base) ((PIMAGE_NT_HEADERS)((LPBYTE)(base) + TO_DOS_HEADER(base)->e_lfanew))
1182 static PVOID
1183 MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size)
1184 {
1185 /* TODO: MappedAsImage? */
1186 PIMAGE_DATA_DIRECTORY p;
1187 p = TO_NT_HEADERS(Base)->OptionalHeader.DataDirectory + DirectoryEntry;
1188 if (p->VirtualAddress == 0) {
1189 *Size = 0;
1190 return NULL;
1191 }
1192 *Size = p->Size;
1193 return (PVOID)((LPBYTE)Base + p->VirtualAddress);
1194 }
1195
1196 static HMODULE
1197 find_imported_module_by_funcname(HMODULE hModule, const char *funcname)
1198 {
1199 size_t Base;
1200 ULONG Size;
1201 PIMAGE_IMPORT_DESCRIPTOR Imp;
1202 PIMAGE_THUNK_DATA Name; /* Import Name Table */
1203 PIMAGE_IMPORT_BY_NAME ImpName;
1204
1205 Base = (size_t)hModule;
1206 Imp = MyImageDirectoryEntryToData(
1207 (LPVOID)Base,
1208 TRUE,
1209 IMAGE_DIRECTORY_ENTRY_IMPORT,
1210 &Size);
1211 if (Imp == NULL)
1212 return NULL;
1213 for ( ; Imp->OriginalFirstThunk != 0; ++Imp)
1214 {
1215 Name = (PIMAGE_THUNK_DATA)(Base + Imp->OriginalFirstThunk);
1216 for ( ; Name->u1.Ordinal != 0; ++Name)
1217 {
1218 if (!IMAGE_SNAP_BY_ORDINAL(Name->u1.Ordinal))
1219 {
1220 ImpName = (PIMAGE_IMPORT_BY_NAME)
1221 (Base + (size_t)Name->u1.AddressOfData);
1222 if (strcmp((char *)ImpName->Name, funcname) == 0)
1223 return GetModuleHandleA((char *)(Base + Imp->Name));
1224 }
1225 }
1226 }
1227 return NULL;
1228 }
1229 #endif
1230
1231 static int
1232 sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1233 {
1234 return 1;
1235 }
1236
1237 static int
1238 dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1239 {
1240 int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1;
1241 if (bufsize < len)
1242 return seterror(EINVAL);
1243 return len;
1244 }
1245
1246 static int
1247 mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1248 {
1249 int len = 0;
1250
1251 if (cv->codepage == 54936) {
1252 if (buf[0] <= 0x7F) len = 1;
1253 else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
1254 bufsize >= 2 &&
1255 ((buf[1] >= 0x40 && buf[1] <= 0x7E) ||
1256 (buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2;
1257 else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
1258 bufsize >= 4 &&
1259 buf[1] >= 0x30 && buf[1] <= 0x39) len = 4;
1260 else
1261 return seterror(EINVAL);
1262 return len;
1263 }
1264 else
1265 return seterror(EINVAL);
1266 }
1267
1268 static int
1269 utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1270 {
1271 int len = 0;
1272
1273 if (buf[0] < 0x80) len = 1;
1274 else if ((buf[0] & 0xE0) == 0xC0) len = 2;
1275 else if ((buf[0] & 0xF0) == 0xE0) len = 3;
1276 else if ((buf[0] & 0xF8) == 0xF0) len = 4;
1277 else if ((buf[0] & 0xFC) == 0xF8) len = 5;
1278 else if ((buf[0] & 0xFE) == 0xFC) len = 6;
1279
1280 if (len == 0)
1281 return seterror(EILSEQ);
1282 else if (bufsize < len)
1283 return seterror(EINVAL);
1284 return len;
1285 }
1286
1287 static int
1288 eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1289 {
1290 if (buf[0] < 0x80) /* ASCII */
1291 return 1;
1292 else if (buf[0] == 0x8E) /* JIS X 0201 */
1293 {
1294 if (bufsize < 2)
1295 return seterror(EINVAL);
1296 else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF))
1297 return seterror(EILSEQ);
1298 return 2;
1299 }
1300 else if (buf[0] == 0x8F) /* JIS X 0212 */
1301 {
1302 if (bufsize < 3)
1303 return seterror(EINVAL);
1304 else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE)
1305 || !(0xA1 <= buf[2] && buf[2] <= 0xFE))
1306 return seterror(EILSEQ);
1307 return 3;
1308 }
1309 else /* JIS X 0208 */
1310 {
1311 if (bufsize < 2)
1312 return seterror(EINVAL);
1313 else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE)
1314 || !(0xA1 <= buf[1] && buf[1] <= 0xFE))
1315 return seterror(EILSEQ);
1316 return 2;
1317 }
1318 }
1319
1320 static int
1321 kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1322 {
1323 int len;
1324
1325 len = cv->mblen(cv, buf, bufsize);
1326 if (len == -1)
1327 return -1;
1328 *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage),
1329 (const char *)buf, len, (wchar_t *)wbuf, *wbufsize);
1330 if (*wbufsize == 0)
1331 return seterror(EILSEQ);
1332 return len;
1333 }
1334
1335 static int
1336 kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1337 {
1338 BOOL usedDefaultChar = 0;
1339 BOOL *p = NULL;
1340 int flags = 0;
1341 int len;
1342
1343 if (bufsize == 0)
1344 return seterror(E2BIG);
1345 if (!must_use_null_useddefaultchar(cv->codepage))
1346 {
1347 p = &usedDefaultChar;
1348 #ifdef WC_NO_BEST_FIT_CHARS
1349 if (!(cv->flags & FLAG_TRANSLIT))
1350 flags |= WC_NO_BEST_FIT_CHARS;
1351 #endif
1352 }
1353 len = WideCharToMultiByte(cv->codepage, flags,
1354 (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p);
1355 if (len == 0)
1356 {
1357 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
1358 return seterror(E2BIG);
1359 return seterror(EILSEQ);
1360 }
1361 else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT))
1362 return seterror(EILSEQ);
1363 else if (cv->mblen(cv, buf, len) != len) /* validate result */
1364 return seterror(EILSEQ);
1365 return len;
1366 }
1367
1368 /*
1369 * It seems that the mode (cv->mode) is fixnum.
1370 * For example, when converting iso-2022-jp(cp50221) to unicode:
1371 * in ascii sequence: mode=0xC42C0000
1372 * in jisx0208 sequence: mode=0xC42C0001
1373 * "C42C" is same for each convert session.
1374 * It should be: ((codepage-1)<<16)|state
1375 */
1376 static int
1377 mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1378 {
1379 int len;
1380 int insize;
1381 HRESULT hr;
1382
1383 len = cv->mblen(cv, buf, bufsize);
1384 if (len == -1)
1385 return -1;
1386 insize = len;
1387 hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage,
1388 (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize);
1389 if (hr != S_OK || insize != len)
1390 return seterror(EILSEQ);
1391 return len;
1392 }
1393
1394 static int
1395 mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1396 {
1397 char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */
1398 int tmpsize = MB_CHAR_MAX;
1399 int insize = wbufsize;
1400 HRESULT hr;
1401
1402 hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage,
1403 (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize);
1404 if (hr != S_OK || insize != wbufsize)
1405 return seterror(EILSEQ);
1406 else if (bufsize < tmpsize)
1407 return seterror(E2BIG);
1408 else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize)
1409 return seterror(EILSEQ);
1410 memcpy(buf, tmpbuf, tmpsize);
1411 return tmpsize;
1412 }
1413
1414 static int
1415 utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1416 {
1417 int codepage = cv->codepage;
1418
1419 /* swap endian: 1200 <-> 1201 */
1420 if (cv->mode & UNICODE_MODE_SWAPPED)
1421 codepage ^= 1;
1422
1423 if (bufsize < 2)
1424 return seterror(EINVAL);
1425 if (codepage == 1200) /* little endian */
1426 wbuf[0] = (buf[1] << 8) | buf[0];
1427 else if (codepage == 1201) /* big endian */
1428 wbuf[0] = (buf[0] << 8) | buf[1];
1429
1430 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1431 {
1432 cv->mode |= UNICODE_MODE_BOM_DONE;
1433 if (wbuf[0] == 0xFFFE)
1434 {
1435 cv->mode |= UNICODE_MODE_SWAPPED;
1436 *wbufsize = 0;
1437 return 2;
1438 }
1439 else if (wbuf[0] == 0xFEFF)
1440 {
1441 *wbufsize = 0;
1442 return 2;
1443 }
1444 }
1445
1446 if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF)
1447 return seterror(EILSEQ);
1448 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1449 {
1450 if (bufsize < 4)
1451 return seterror(EINVAL);
1452 if (codepage == 1200) /* little endian */
1453 wbuf[1] = (buf[3] << 8) | buf[2];
1454 else if (codepage == 1201) /* big endian */
1455 wbuf[1] = (buf[2] << 8) | buf[3];
1456 if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF))
1457 return seterror(EILSEQ);
1458 *wbufsize = 2;
1459 return 4;
1460 }
1461 *wbufsize = 1;
1462 return 2;
1463 }
1464
1465 static int
1466 utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1467 {
1468 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1469 {
1470 int r;
1471
1472 cv->mode |= UNICODE_MODE_BOM_DONE;
1473 if (bufsize < 2)
1474 return seterror(E2BIG);
1475 if (cv->codepage == 1200) /* little endian */
1476 memcpy(buf, "\xFF\xFE", 2);
1477 else if (cv->codepage == 1201) /* big endian */
1478 memcpy(buf, "\xFE\xFF", 2);
1479
1480 r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2);
1481 if (r == -1)
1482 return -1;
1483 return r + 2;
1484 }
1485
1486 if (bufsize < 2)
1487 return seterror(E2BIG);
1488 if (cv->codepage == 1200) /* little endian */
1489 {
1490 buf[0] = (wbuf[0] & 0x00FF);
1491 buf[1] = (wbuf[0] & 0xFF00) >> 8;
1492 }
1493 else if (cv->codepage == 1201) /* big endian */
1494 {
1495 buf[0] = (wbuf[0] & 0xFF00) >> 8;
1496 buf[1] = (wbuf[0] & 0x00FF);
1497 }
1498 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1499 {
1500 if (bufsize < 4)
1501 return seterror(E2BIG);
1502 if (cv->codepage == 1200) /* little endian */
1503 {
1504 buf[2] = (wbuf[1] & 0x00FF);
1505 buf[3] = (wbuf[1] & 0xFF00) >> 8;
1506 }
1507 else if (cv->codepage == 1201) /* big endian */
1508 {
1509 buf[2] = (wbuf[1] & 0xFF00) >> 8;
1510 buf[3] = (wbuf[1] & 0x00FF);
1511 }
1512 return 4;
1513 }
1514 return 2;
1515 }
1516
1517 static int
1518 utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1519 {
1520 int codepage = cv->codepage;
1521 uint wc;
1522
1523 /* swap endian: 12000 <-> 12001 */
1524 if (cv->mode & UNICODE_MODE_SWAPPED)
1525 codepage ^= 1;
1526
1527 if (bufsize < 4)
1528 return seterror(EINVAL);
1529 if (codepage == 12000) /* little endian */
1530 wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
1531 else if (codepage == 12001) /* big endian */
1532 wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
1533
1534 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1535 {
1536 cv->mode |= UNICODE_MODE_BOM_DONE;
1537 if (wc == 0xFFFE0000)
1538 {
1539 cv->mode |= UNICODE_MODE_SWAPPED;
1540 *wbufsize = 0;
1541 return 4;
1542 }
1543 else if (wc == 0x0000FEFF)
1544 {
1545 *wbufsize = 0;
1546 return 4;
1547 }
1548 }
1549
1550 if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc)
1551 return seterror(EILSEQ);
1552 ucs4_to_utf16(wc, wbuf, wbufsize);
1553 return 4;
1554 }
1555
1556 static int
1557 utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1558 {
1559 uint wc;
1560
1561 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1562 {
1563 int r;
1564
1565 cv->mode |= UNICODE_MODE_BOM_DONE;
1566 if (bufsize < 4)
1567 return seterror(E2BIG);
1568 if (cv->codepage == 12000) /* little endian */
1569 memcpy(buf, "\xFF\xFE\x00\x00", 4);
1570 else if (cv->codepage == 12001) /* big endian */
1571 memcpy(buf, "\x00\x00\xFE\xFF", 4);
1572
1573 r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4);
1574 if (r == -1)
1575 return -1;
1576 return r + 4;
1577 }
1578
1579 if (bufsize < 4)
1580 return seterror(E2BIG);
1581 wc = utf16_to_ucs4(wbuf);
1582 if (cv->codepage == 12000) /* little endian */
1583 {
1584 buf[0] = wc & 0x000000FF;
1585 buf[1] = (wc & 0x0000FF00) >> 8;
1586 buf[2] = (wc & 0x00FF0000) >> 16;
1587 buf[3] = (wc & 0xFF000000) >> 24;
1588 }
1589 else if (cv->codepage == 12001) /* big endian */
1590 {
1591 buf[0] = (wc & 0xFF000000) >> 24;
1592 buf[1] = (wc & 0x00FF0000) >> 16;
1593 buf[2] = (wc & 0x0000FF00) >> 8;
1594 buf[3] = wc & 0x000000FF;
1595 }
1596 return 4;
1597 }
1598
1599 /*
1600 * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
1601 * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow
1602 * 1 byte Kana)
1603 * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte
1604 * Kana - SO/SI)
1605 *
1606 * MultiByteToWideChar() and WideCharToMultiByte() behave differently
1607 * depending on Windows version. On XP, WideCharToMultiByte() doesn't
1608 * terminate result sequence with ascii escape. But Vista does.
1609 * Use MLang instead.
1610 */
1611
1612 #define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift))
1613 #define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF)
1614 #define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF)
1615
1616 #define ISO2022_SI 0
1617 #define ISO2022_SO 1
1618
1619 /* shift in */
1620 static const char iso2022_SI_seq[] = "\x0F";
1621 /* shift out */
1622 static const char iso2022_SO_seq[] = "\x0E";
1623
1624 typedef struct iso2022_esc_t iso2022_esc_t;
1625 struct iso2022_esc_t {
1626 const char *esc;
1627 int esc_len;
1628 int len;
1629 int cs;
1630 };
1631
1632 #define ISO2022JP_CS_ASCII 0
1633 #define ISO2022JP_CS_JISX0201_ROMAN 1
1634 #define ISO2022JP_CS_JISX0201_KANA 2
1635 #define ISO2022JP_CS_JISX0208_1978 3
1636 #define ISO2022JP_CS_JISX0208_1983 4
1637 #define ISO2022JP_CS_JISX0212 5
1638
1639 static iso2022_esc_t iso2022jp_esc[] = {
1640 {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII},
1641 {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN},
1642 {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA},
1643 {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */
1644 {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983},
1645 {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212},
1646 {NULL, 0, 0, 0}
1647 };
1648
1649 static int
1650 iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1651 {
1652 iso2022_esc_t *iesc = iso2022jp_esc;
1653 char tmp[MB_CHAR_MAX];
1654 int insize;
1655 HRESULT hr;
1656 DWORD dummy = 0;
1657 int len;
1658 int esc_len;
1659 int cs;
1660 int shift;
1661 int i;
1662
1663 if (buf[0] == 0x1B)
1664 {
1665 for (i = 0; iesc[i].esc != NULL; ++i)
1666 {
1667 esc_len = iesc[i].esc_len;
1668 if (bufsize < esc_len)
1669 {
1670 if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0)
1671 return seterror(EINVAL);
1672 }
1673 else
1674 {
1675 if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0)
1676 {
1677 cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI);
1678 *wbufsize = 0;
1679 return esc_len;
1680 }
1681 }
1682 }
1683 /* not supported escape sequence */
1684 return seterror(EILSEQ);
1685 }
1686 else if (buf[0] == iso2022_SO_seq[0])
1687 {
1688 cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO);
1689 *wbufsize = 0;
1690 return 1;
1691 }
1692 else if (buf[0] == iso2022_SI_seq[0])
1693 {
1694 cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI);
1695 *wbufsize = 0;
1696 return 1;
1697 }
1698
1699 cs = ISO2022_MODE_CS(cv->mode);
1700 shift = ISO2022_MODE_SHIFT(cv->mode);
1701
1702 /* reset the mode for informal sequence */
1703 if (buf[0] < 0x20)
1704 {
1705 cs = ISO2022JP_CS_ASCII;
1706 shift = ISO2022_SI;
1707 }
1708
1709 len = iesc[cs].len;
1710 if (bufsize < len)
1711 return seterror(EINVAL);
1712 for (i = 0; i < len; ++i)
1713 if (!(buf[i] < 0x80))
1714 return seterror(EILSEQ);
1715 esc_len = iesc[cs].esc_len;
1716 memcpy(tmp, iesc[cs].esc, esc_len);
1717 if (shift == ISO2022_SO)
1718 {
1719 memcpy(tmp + esc_len, iso2022_SO_seq, 1);
1720 esc_len += 1;
1721 }
1722 memcpy(tmp + esc_len, buf, len);
1723
1724 if ((cv->codepage == 50220 || cv->codepage == 50221
1725 || cv->codepage == 50222) && shift == ISO2022_SO)
1726 {
1727 /* XXX: shift-out cannot be used for mbtowc (both kernel and
1728 * mlang) */
1729 esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len;
1730 memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len);
1731 memcpy(tmp + esc_len, buf, len);
1732 }
1733
1734 insize = len + esc_len;
1735 hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage,
1736 (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize);
1737 if (hr != S_OK || insize != len + esc_len)
1738 return seterror(EILSEQ);
1739
1740 /* Check for conversion error. Assuming defaultChar is 0x3F. */
1741 /* ascii should be converted from ascii */
1742 if (wbuf[0] == buf[0]
1743 && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI))
1744 return seterror(EILSEQ);
1745
1746 /* reset the mode for informal sequence */
1747 if (cv->mode != ISO2022_MODE(cs, shift))
1748 cv->mode = ISO2022_MODE(cs, shift);
1749
1750 return len;
1751 }
1752
1753 static int
1754 iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1755 {
1756 iso2022_esc_t *iesc = iso2022jp_esc;
1757 char tmp[MB_CHAR_MAX];
1758 int tmpsize = MB_CHAR_MAX;
1759 int insize = wbufsize;
1760 HRESULT hr;
1761 DWORD dummy = 0;
1762 int len;
1763 int esc_len;
1764 int cs;
1765 int shift;
1766 int i;
1767
1768 /*
1769 * MultiByte = [escape sequence] + character + [escape sequence]
1770 *
1771 * Whether trailing escape sequence is added depends on which API is
1772 * used (kernel or MLang, and its version).
1773 */
1774 hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage,
1775 (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize);
1776 if (hr != S_OK || insize != wbufsize)
1777 return seterror(EILSEQ);
1778 else if (bufsize < tmpsize)
1779 return seterror(E2BIG);
1780
1781 if (tmpsize == 1)
1782 {
1783 cs = ISO2022JP_CS_ASCII;
1784 esc_len = 0;
1785 }
1786 else
1787 {
1788 for (i = 1; iesc[i].esc != NULL; ++i)
1789 {
1790 esc_len = iesc[i].esc_len;
1791 if (strncmp(tmp, iesc[i].esc, esc_len) == 0)
1792 {
1793 cs = iesc[i].cs;
1794 break;
1795 }
1796 }
1797 if (iesc[i].esc == NULL)
1798 /* not supported escape sequence */
1799 return seterror(EILSEQ);
1800 }
1801
1802 shift = ISO2022_SI;
1803 if (tmp[esc_len] == iso2022_SO_seq[0])
1804 {
1805 shift = ISO2022_SO;
1806 esc_len += 1;
1807 }
1808
1809 len = iesc[cs].len;
1810
1811 /* Check for converting error. Assuming defaultChar is 0x3F. */
1812 /* ascii should be converted from ascii */
1813 if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80))
1814 return seterror(EILSEQ);
1815 else if (tmpsize < esc_len + len)
1816 return seterror(EILSEQ);
1817
1818 if (cv->mode == ISO2022_MODE(cs, shift))
1819 {
1820 /* remove escape sequence */
1821 if (esc_len != 0)
1822 memmove(tmp, tmp + esc_len, len);
1823 esc_len = 0;
1824 }
1825 else
1826 {
1827 if (cs == ISO2022JP_CS_ASCII)
1828 {
1829 esc_len = iesc[ISO2022JP_CS_ASCII].esc_len;
1830 memmove(tmp + esc_len, tmp, len);
1831 memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len);
1832 }
1833 if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO)
1834 {
1835 /* shift-in before changing to other mode */
1836 memmove(tmp + 1, tmp, len + esc_len);
1837 memcpy(tmp, iso2022_SI_seq, 1);
1838 esc_len += 1;
1839 }
1840 }
1841
1842 if (bufsize < len + esc_len)
1843 return seterror(E2BIG);
1844 memcpy(buf, tmp, len + esc_len);
1845 cv->mode = ISO2022_MODE(cs, shift);
1846 return len + esc_len;
1847 }
1848
1849 static int
1850 iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize)
1851 {
1852 iso2022_esc_t *iesc = iso2022jp_esc;
1853 int esc_len;
1854
1855 if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI))
1856 {
1857 esc_len = 0;
1858 if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI)
1859 esc_len += 1;
1860 if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII)
1861 esc_len += iesc[ISO2022JP_CS_ASCII].esc_len;
1862 if (bufsize < esc_len)
1863 return seterror(E2BIG);
1864
1865 esc_len = 0;
1866 if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI)
1867 {
1868 memcpy(buf, iso2022_SI_seq, 1);
1869 esc_len += 1;
1870 }
1871 if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII)
1872 {
1873 memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc,
1874 iesc[ISO2022JP_CS_ASCII].esc_len);
1875 esc_len += iesc[ISO2022JP_CS_ASCII].esc_len;
1876 }
1877 return esc_len;
1878 }
1879 return 0;
1880 }
1881
1882 #if defined(MAKE_DLL) && defined(USE_LIBICONV_DLL)
1883 BOOL WINAPI
1884 DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)
1885 {
1886 switch( fdwReason )
1887 {
1888 case DLL_PROCESS_ATTACH:
1889 hwiniconv = (HMODULE)hinstDLL;
1890 break;
1891 case DLL_THREAD_ATTACH:
1892 case DLL_THREAD_DETACH:
1893 case DLL_PROCESS_DETACH:
1894 break;
1895 }
1896 return TRUE;
1897 }
1898 #endif
1899
1900 #if defined(MAKE_EXE)
1901 #include <stdio.h>
1902 #include <fcntl.h>
1903 #include <io.h>
1904 int
1905 main(int argc, char **argv)
1906 {
1907 char *fromcode = NULL;
1908 char *tocode = NULL;
1909 int i;
1910 char inbuf[BUFSIZ];
1911 char outbuf[BUFSIZ];
1912 char *pin;
1913 char *pout;
1914 size_t inbytesleft;
1915 size_t outbytesleft;
1916 size_t rest = 0;
1917 iconv_t cd;
1918 size_t r;
1919 FILE *in = stdin;
1920
1921 _setmode(_fileno(stdin), _O_BINARY);
1922 _setmode(_fileno(stdout), _O_BINARY);
1923
1924 for (i = 1; i < argc; ++i)
1925 {
1926 if (strcmp(argv[i], "-l") == 0)
1927 {
1928 for (i = 0; codepage_alias[i].name != NULL; ++i)
1929 printf("%s\n", codepage_alias[i].name);
1930 return 0;
1931 }
1932
1933 if (strcmp(argv[i], "-f") == 0)
1934 fromcode = argv[++i];
1935 else if (strcmp(argv[i], "-t") == 0)
1936 tocode = argv[++i];
1937 else
1938 {
1939 in = fopen(argv[i], "rb");
1940 if (in == NULL)
1941 {
1942 fprintf(stderr, "cannot open %s\n", argv[i]);
1943 return 1;
1944 }
1945 break;
1946 }
1947 }
1948
1949 if (fromcode == NULL || tocode == NULL)
1950 {
1951 printf("usage: %s -f from-enc -t to-enc [file]\n", argv[0]);
1952 return 0;
1953 }
1954
1955 cd = iconv_open(tocode, fromcode);
1956 if (cd == (iconv_t)(-1))
1957 {
1958 perror("iconv_open error");
1959 return 1;
1960 }
1961
1962 while ((inbytesleft = fread(inbuf + rest, 1, sizeof(inbuf) - rest, in)) != 0
1963 || rest != 0)
1964 {
1965 inbytesleft += rest;
1966 pin = inbuf;
1967 pout = outbuf;
1968 outbytesleft = sizeof(outbuf);
1969 r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft);
1970 fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, stdout);
1971 if (r == (size_t)(-1) && errno != E2BIG && (errno != EINVAL || feof(in)))
1972 {
1973 perror("conversion error");
1974 return 1;
1975 }
1976 memmove(inbuf, pin, inbytesleft);
1977 rest = inbytesleft;
1978 }
1979 pout = outbuf;
1980 outbytesleft = sizeof(outbuf);
1981 r = iconv(cd, NULL, NULL, &pout, &outbytesleft);
1982 fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, stdout);
1983 if (r == (size_t)(-1))
1984 {
1985 perror("conversion error");
1986 return 1;
1987 }
1988
1989 iconv_close(cd);
1990
1991 return 0;
1992 }
1993 #endif
1994