1c85b2429d8ef8bc6ca656cdfa017fae13739055
[reactos.git] / reactos / tools / unicode / mbtowc.c
1 /*
2 * MultiByteToWideChar implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21 #include <string.h>
22
23 #include "wine/unicode.h"
24
25 typedef unsigned char uchar;
26
27 /* get the decomposition of a Unicode char */
28 int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
29 {
30 extern const WCHAR unicode_decompose_table[];
31 const WCHAR *ptr = unicode_decompose_table;
32 int res;
33
34 *dst = src;
35 ptr = unicode_decompose_table + ptr[src >> 8];
36 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
37 if (!*ptr) return 1;
38 if (dstlen <= 1) return 0;
39 /* apply the decomposition recursively to the first char */
40 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
41 return res;
42 }
43
44 /* check src string for invalid chars; return non-zero if invalid char found */
45 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
46 const unsigned char *src, unsigned int srclen )
47 {
48 const WCHAR * const cp2uni = table->cp2uni;
49 while (srclen)
50 {
51 if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char)
52 break;
53 src++;
54 srclen--;
55 }
56 return srclen;
57 }
58
59 /* mbstowcs for single-byte code page */
60 /* all lengths are in characters, not bytes */
61 static inline int mbstowcs_sbcs( const struct sbcs_table *table,
62 const unsigned char *src, unsigned int srclen,
63 WCHAR *dst, unsigned int dstlen )
64 {
65 const WCHAR * const cp2uni = table->cp2uni;
66 int ret = srclen;
67
68 if (dstlen < srclen)
69 {
70 /* buffer too small: fill it up to dstlen and return error */
71 srclen = dstlen;
72 ret = -1;
73 }
74
75 for (;;)
76 {
77 switch(srclen)
78 {
79 default:
80 case 16: dst[15] = cp2uni[src[15]];
81 case 15: dst[14] = cp2uni[src[14]];
82 case 14: dst[13] = cp2uni[src[13]];
83 case 13: dst[12] = cp2uni[src[12]];
84 case 12: dst[11] = cp2uni[src[11]];
85 case 11: dst[10] = cp2uni[src[10]];
86 case 10: dst[9] = cp2uni[src[9]];
87 case 9: dst[8] = cp2uni[src[8]];
88 case 8: dst[7] = cp2uni[src[7]];
89 case 7: dst[6] = cp2uni[src[6]];
90 case 6: dst[5] = cp2uni[src[5]];
91 case 5: dst[4] = cp2uni[src[4]];
92 case 4: dst[3] = cp2uni[src[3]];
93 case 3: dst[2] = cp2uni[src[2]];
94 case 2: dst[1] = cp2uni[src[1]];
95 case 1: dst[0] = cp2uni[src[0]];
96 case 0: break;
97 }
98 if (srclen < 16) return ret;
99 dst += 16;
100 src += 16;
101 srclen -= 16;
102 }
103 }
104
105 /* mbstowcs for single-byte code page with char decomposition */
106 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table,
107 const unsigned char *src, unsigned int srclen,
108 WCHAR *dst, unsigned int dstlen )
109 {
110 const WCHAR * const cp2uni = table->cp2uni;
111 unsigned int len;
112
113 if (!dstlen) /* compute length */
114 {
115 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
116 for (len = 0; srclen; srclen--, src++)
117 len += get_decomposition( cp2uni[*src], dummy, 4 );
118 return len;
119 }
120
121 for (len = dstlen; srclen && len; srclen--, src++)
122 {
123 int res = get_decomposition( cp2uni[*src], dst, len );
124 if (!res) break;
125 len -= res;
126 dst += res;
127 }
128 if (srclen) return -1; /* overflow */
129 return dstlen - len;
130 }
131
132 /* query necessary dst length for src string */
133 static inline int get_length_dbcs( const struct dbcs_table *table,
134 const unsigned char *src, unsigned int srclen )
135 {
136 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
137 int len;
138
139 for (len = 0; srclen; srclen--, src++, len++)
140 {
141 if (cp2uni_lb[*src])
142 {
143 if (!--srclen) break; /* partial char, ignore it */
144 src++;
145 }
146 }
147 return len;
148 }
149
150 /* check src string for invalid chars; return non-zero if invalid char found */
151 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
152 const unsigned char *src, unsigned int srclen )
153 {
154 const WCHAR * const cp2uni = table->cp2uni;
155 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
156
157 while (srclen)
158 {
159 unsigned char off = cp2uni_lb[*src];
160 if (off) /* multi-byte char */
161 {
162 if (srclen == 1) break; /* partial char, error */
163 if (cp2uni[(off << 8) + src[1]] == table->info.def_unicode_char &&
164 ((src[0] << 8) | src[1]) != table->info.def_char) break;
165 src++;
166 srclen--;
167 }
168 else if (cp2uni[*src] == table->info.def_unicode_char &&
169 *src != table->info.def_char) break;
170 src++;
171 srclen--;
172 }
173 return srclen;
174 }
175
176 /* mbstowcs for double-byte code page */
177 /* all lengths are in characters, not bytes */
178 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
179 const unsigned char *src, unsigned int srclen,
180 WCHAR *dst, unsigned int dstlen )
181 {
182 const WCHAR * const cp2uni = table->cp2uni;
183 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
184 unsigned int len;
185
186 if (!dstlen) return get_length_dbcs( table, src, srclen );
187
188 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
189 {
190 unsigned char off = cp2uni_lb[*src];
191 if (off)
192 {
193 if (!--srclen) break; /* partial char, ignore it */
194 src++;
195 *dst = cp2uni[(off << 8) + *src];
196 }
197 else *dst = cp2uni[*src];
198 }
199 if (srclen) return -1; /* overflow */
200 return dstlen - len;
201 }
202
203
204 /* mbstowcs for double-byte code page with character decomposition */
205 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
206 const unsigned char *src, unsigned int srclen,
207 WCHAR *dst, unsigned int dstlen )
208 {
209 const WCHAR * const cp2uni = table->cp2uni;
210 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
211 unsigned int len;
212 WCHAR ch;
213 int res;
214
215 if (!dstlen) /* compute length */
216 {
217 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
218 for (len = 0; srclen; srclen--, src++)
219 {
220 unsigned char off = cp2uni_lb[*src];
221 if (off)
222 {
223 if (!--srclen) break; /* partial char, ignore it */
224 src++;
225 ch = cp2uni[(off << 8) + *src];
226 }
227 else ch = cp2uni[*src];
228 len += get_decomposition( ch, dummy, 4 );
229 }
230 return len;
231 }
232
233 for (len = dstlen; srclen && len; srclen--, src++)
234 {
235 unsigned char off = cp2uni_lb[*src];
236 if (off)
237 {
238 if (!--srclen) break; /* partial char, ignore it */
239 src++;
240 ch = cp2uni[(off << 8) + *src];
241 }
242 else ch = cp2uni[*src];
243 if (!(res = get_decomposition( ch, dst, len ))) break;
244 dst += res;
245 len -= res;
246 }
247 if (srclen) return -1; /* overflow */
248 return dstlen - len;
249 }
250
251
252 /* return -1 on dst buffer overflow, -2 on invalid input char */
253 int wine_cp_mbstowcs( const union cptable *table, int flags,
254 const char *src, int srclen,
255 WCHAR *dst, int dstlen )
256 {
257 if (table->info.char_size == 1)
258 {
259 if (flags & MB_ERR_INVALID_CHARS)
260 {
261 if (check_invalid_chars_sbcs( &table->sbcs, (const uchar*)src, srclen )) return -2;
262 }
263 if (!(flags & MB_COMPOSITE))
264 {
265 if (!dstlen) return srclen;
266 return mbstowcs_sbcs( &table->sbcs, (const uchar*)src, srclen, dst, dstlen );
267 }
268 return mbstowcs_sbcs_decompose( &table->sbcs, (const uchar*)src, srclen, dst, dstlen );
269 }
270 else /* mbcs */
271 {
272 if (flags & MB_ERR_INVALID_CHARS)
273 {
274 if (check_invalid_chars_dbcs( &table->dbcs, (const uchar*)src, srclen )) return -2;
275 }
276 if (!(flags & MB_COMPOSITE))
277 return mbstowcs_dbcs( &table->dbcs, (const uchar*)src, srclen, dst, dstlen );
278 else
279 return mbstowcs_dbcs_decompose( &table->dbcs, (const uchar*)src, srclen, dst, dstlen );
280 }
281 }
282
283 /* CP_SYMBOL implementation */
284 /* return -1 on dst buffer overflow */
285 int wine_cpsymbol_mbstowcs( const char *src, int srclen, WCHAR *dst, int dstlen)
286 {
287 int len, i;
288 if( dstlen == 0) return srclen;
289 len = dstlen > srclen ? srclen : dstlen;
290 for( i = 0; i < len; i++)
291 {
292 unsigned char c = src [ i ];
293 if( c < 0x20 )
294 dst[i] = c;
295 else
296 dst[i] = c + 0xf000;
297 }
298 if( srclen > len) return -1;
299 return len;
300 }