[MKHIVE] Remove key name in our custom registry tree; use cell index instead
[reactos.git] / reactos / tools / unicode / mbtowc.c
1 /*
2 * MultiByteToWideChar implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include <string.h>
22
23 #include "wine/unicode.h"
24
25 /* get the decomposition of a Unicode char */
26 static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
27 {
28 extern const WCHAR unicode_decompose_table[];
29 const WCHAR *ptr = unicode_decompose_table;
30 int res;
31
32 *dst = src;
33 ptr = unicode_decompose_table + ptr[src >> 8];
34 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
35 if (!*ptr) return 1;
36 if (dstlen <= 1) return 0;
37 /* apply the decomposition recursively to the first char */
38 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
39 return res;
40 }
41
42 /* check the code whether it is in Unicode Private Use Area (PUA). */
43 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
44 static inline int is_private_use_area_char(WCHAR code)
45 {
46 return (code >= 0xe000 && code <= 0xf8ff);
47 }
48
49 /* check src string for invalid chars; return non-zero if invalid char found */
50 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags,
51 const unsigned char *src, unsigned int srclen )
52 {
53 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
54 const WCHAR def_unicode_char = table->info.def_unicode_char;
55 const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
56 + (def_unicode_char & 0xff)];
57 while (srclen)
58 {
59 if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
60 is_private_use_area_char(cp2uni[*src])) break;
61 src++;
62 srclen--;
63 }
64 return srclen;
65 }
66
67 /* mbstowcs for single-byte code page */
68 /* all lengths are in characters, not bytes */
69 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags,
70 const unsigned char *src, unsigned int srclen,
71 WCHAR *dst, unsigned int dstlen )
72 {
73 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
74 int ret = srclen;
75
76 if (dstlen < srclen)
77 {
78 /* buffer too small: fill it up to dstlen and return error */
79 srclen = dstlen;
80 ret = -1;
81 }
82
83 for (;;)
84 {
85 switch(srclen)
86 {
87 default:
88 case 16: dst[15] = cp2uni[src[15]];
89 case 15: dst[14] = cp2uni[src[14]];
90 case 14: dst[13] = cp2uni[src[13]];
91 case 13: dst[12] = cp2uni[src[12]];
92 case 12: dst[11] = cp2uni[src[11]];
93 case 11: dst[10] = cp2uni[src[10]];
94 case 10: dst[9] = cp2uni[src[9]];
95 case 9: dst[8] = cp2uni[src[8]];
96 case 8: dst[7] = cp2uni[src[7]];
97 case 7: dst[6] = cp2uni[src[6]];
98 case 6: dst[5] = cp2uni[src[5]];
99 case 5: dst[4] = cp2uni[src[4]];
100 case 4: dst[3] = cp2uni[src[3]];
101 case 3: dst[2] = cp2uni[src[2]];
102 case 2: dst[1] = cp2uni[src[1]];
103 case 1: dst[0] = cp2uni[src[0]];
104 case 0: break;
105 }
106 if (srclen < 16) return ret;
107 dst += 16;
108 src += 16;
109 srclen -= 16;
110 }
111 }
112
113 /* mbstowcs for single-byte code page with char decomposition */
114 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
115 const unsigned char *src, unsigned int srclen,
116 WCHAR *dst, unsigned int dstlen )
117 {
118 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
119 unsigned int len;
120
121 if (!dstlen) /* compute length */
122 {
123 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
124 for (len = 0; srclen; srclen--, src++)
125 len += get_decomposition( cp2uni[*src], dummy, 4 );
126 return len;
127 }
128
129 for (len = dstlen; srclen && len; srclen--, src++)
130 {
131 int res = get_decomposition( cp2uni[*src], dst, len );
132 if (!res) break;
133 len -= res;
134 dst += res;
135 }
136 if (srclen) return -1; /* overflow */
137 return dstlen - len;
138 }
139
140 /* query necessary dst length for src string */
141 static inline int get_length_dbcs( const struct dbcs_table *table,
142 const unsigned char *src, unsigned int srclen )
143 {
144 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
145 int len;
146
147 for (len = 0; srclen; srclen--, src++, len++)
148 {
149 if (cp2uni_lb[*src])
150 {
151 if (!--srclen) break; /* partial char, ignore it */
152 src++;
153 }
154 }
155 return len;
156 }
157
158 /* check src string for invalid chars; return non-zero if invalid char found */
159 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
160 const unsigned char *src, unsigned int srclen )
161 {
162 const WCHAR * const cp2uni = table->cp2uni;
163 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
164 const WCHAR def_unicode_char = table->info.def_unicode_char;
165 const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
166 + (def_unicode_char & 0xff)];
167 while (srclen)
168 {
169 unsigned char off = cp2uni_lb[*src];
170 if (off) /* multi-byte char */
171 {
172 if (srclen == 1) break; /* partial char, error */
173 if (cp2uni[(off << 8) + src[1]] == def_unicode_char &&
174 ((src[0] << 8) | src[1]) != def_char) break;
175 src++;
176 srclen--;
177 }
178 else if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
179 is_private_use_area_char(cp2uni[*src])) break;
180 src++;
181 srclen--;
182 }
183 return srclen;
184 }
185
186 /* mbstowcs for double-byte code page */
187 /* all lengths are in characters, not bytes */
188 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
189 const unsigned char *src, unsigned int srclen,
190 WCHAR *dst, unsigned int dstlen )
191 {
192 const WCHAR * const cp2uni = table->cp2uni;
193 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
194 unsigned int len;
195
196 if (!dstlen) return get_length_dbcs( table, src, srclen );
197
198 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
199 {
200 unsigned char off = cp2uni_lb[*src];
201 if (off)
202 {
203 if (!--srclen) break; /* partial char, ignore it */
204 src++;
205 *dst = cp2uni[(off << 8) + *src];
206 }
207 else *dst = cp2uni[*src];
208 }
209 if (srclen) return -1; /* overflow */
210 return dstlen - len;
211 }
212
213
214 /* mbstowcs for double-byte code page with character decomposition */
215 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
216 const unsigned char *src, unsigned int srclen,
217 WCHAR *dst, unsigned int dstlen )
218 {
219 const WCHAR * const cp2uni = table->cp2uni;
220 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
221 unsigned int len;
222 WCHAR ch;
223 int res;
224
225 if (!dstlen) /* compute length */
226 {
227 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
228 for (len = 0; srclen; srclen--, src++)
229 {
230 unsigned char off = cp2uni_lb[*src];
231 if (off)
232 {
233 if (!--srclen) break; /* partial char, ignore it */
234 src++;
235 ch = cp2uni[(off << 8) + *src];
236 }
237 else ch = cp2uni[*src];
238 len += get_decomposition( ch, dummy, 4 );
239 }
240 return len;
241 }
242
243 for (len = dstlen; srclen && len; srclen--, src++)
244 {
245 unsigned char off = cp2uni_lb[*src];
246 if (off)
247 {
248 if (!--srclen) break; /* partial char, ignore it */
249 src++;
250 ch = cp2uni[(off << 8) + *src];
251 }
252 else ch = cp2uni[*src];
253 if (!(res = get_decomposition( ch, dst, len ))) break;
254 dst += res;
255 len -= res;
256 }
257 if (srclen) return -1; /* overflow */
258 return dstlen - len;
259 }
260
261
262 /* return -1 on dst buffer overflow, -2 on invalid input char */
263 int wine_cp_mbstowcs( const union cptable *table, int flags,
264 const char *s, int srclen,
265 WCHAR *dst, int dstlen )
266 {
267 const unsigned char *src = (const unsigned char*) s;
268
269 if (table->info.char_size == 1)
270 {
271 if (flags & MB_ERR_INVALID_CHARS)
272 {
273 if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2;
274 }
275 if (!(flags & MB_COMPOSITE))
276 {
277 if (!dstlen) return srclen;
278 return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen );
279 }
280 return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen );
281 }
282 else /* mbcs */
283 {
284 if (flags & MB_ERR_INVALID_CHARS)
285 {
286 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
287 }
288 if (!(flags & MB_COMPOSITE))
289 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
290 else
291 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
292 }
293 }
294
295 /* CP_SYMBOL implementation */
296 /* return -1 on dst buffer overflow */
297 int wine_cpsymbol_mbstowcs( const char *src, int srclen, WCHAR *dst, int dstlen)
298 {
299 int len, i;
300 if( dstlen == 0) return srclen;
301 len = dstlen > srclen ? srclen : dstlen;
302 for( i = 0; i < len; i++)
303 {
304 unsigned char c = src [ i ];
305 if( c < 0x20 )
306 dst[i] = c;
307 else
308 dst[i] = c + 0xf000;
309 }
310 if( srclen > len) return -1;
311 return len;
312 }