dd3c9242d37e91f3cbf9a5b283f15588ffed41a1
[reactos.git] / reactos / tools / unicode / mbtowc.c
1 /*
2 * MultiByteToWideChar implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21 #include <string.h>
22
23 #include "wine/unicode.h"
24
25 /* get the decomposition of a Unicode char */
26 int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
27 {
28 extern const WCHAR unicode_decompose_table[];
29 const WCHAR *ptr = unicode_decompose_table;
30 int res;
31
32 *dst = src;
33 ptr = unicode_decompose_table + ptr[src >> 8];
34 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
35 if (!*ptr) return 1;
36 if (dstlen <= 1) return 0;
37 /* apply the decomposition recursively to the first char */
38 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
39 return res;
40 }
41
42 /* check src string for invalid chars; return non-zero if invalid char found */
43 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
44 const unsigned char *src, unsigned int srclen )
45 {
46 const WCHAR * const cp2uni = table->cp2uni;
47 while (srclen)
48 {
49 if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char)
50 break;
51 src++;
52 srclen--;
53 }
54 return srclen;
55 }
56
57 /* mbstowcs for single-byte code page */
58 /* all lengths are in characters, not bytes */
59 static inline int mbstowcs_sbcs( const struct sbcs_table *table,
60 const unsigned char *src, unsigned int srclen,
61 WCHAR *dst, unsigned int dstlen )
62 {
63 const WCHAR * const cp2uni = table->cp2uni;
64 int ret = srclen;
65
66 if (dstlen < srclen)
67 {
68 /* buffer too small: fill it up to dstlen and return error */
69 srclen = dstlen;
70 ret = -1;
71 }
72
73 for (;;)
74 {
75 switch(srclen)
76 {
77 default:
78 case 16: dst[15] = cp2uni[src[15]];
79 case 15: dst[14] = cp2uni[src[14]];
80 case 14: dst[13] = cp2uni[src[13]];
81 case 13: dst[12] = cp2uni[src[12]];
82 case 12: dst[11] = cp2uni[src[11]];
83 case 11: dst[10] = cp2uni[src[10]];
84 case 10: dst[9] = cp2uni[src[9]];
85 case 9: dst[8] = cp2uni[src[8]];
86 case 8: dst[7] = cp2uni[src[7]];
87 case 7: dst[6] = cp2uni[src[6]];
88 case 6: dst[5] = cp2uni[src[5]];
89 case 5: dst[4] = cp2uni[src[4]];
90 case 4: dst[3] = cp2uni[src[3]];
91 case 3: dst[2] = cp2uni[src[2]];
92 case 2: dst[1] = cp2uni[src[1]];
93 case 1: dst[0] = cp2uni[src[0]];
94 case 0: break;
95 }
96 if (srclen < 16) return ret;
97 dst += 16;
98 src += 16;
99 srclen -= 16;
100 }
101 }
102
103 /* mbstowcs for single-byte code page with char decomposition */
104 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table,
105 const unsigned char *src, unsigned int srclen,
106 WCHAR *dst, unsigned int dstlen )
107 {
108 const WCHAR * const cp2uni = table->cp2uni;
109 unsigned int len;
110
111 if (!dstlen) /* compute length */
112 {
113 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
114 for (len = 0; srclen; srclen--, src++)
115 len += get_decomposition( cp2uni[*src], dummy, 4 );
116 return len;
117 }
118
119 for (len = dstlen; srclen && len; srclen--, src++)
120 {
121 int res = get_decomposition( cp2uni[*src], dst, len );
122 if (!res) break;
123 len -= res;
124 dst += res;
125 }
126 if (srclen) return -1; /* overflow */
127 return dstlen - len;
128 }
129
130 /* query necessary dst length for src string */
131 static inline int get_length_dbcs( const struct dbcs_table *table,
132 const unsigned char *src, unsigned int srclen )
133 {
134 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
135 int len;
136
137 for (len = 0; srclen; srclen--, src++, len++)
138 {
139 if (cp2uni_lb[*src])
140 {
141 if (!--srclen) break; /* partial char, ignore it */
142 src++;
143 }
144 }
145 return len;
146 }
147
148 /* check src string for invalid chars; return non-zero if invalid char found */
149 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
150 const unsigned char *src, unsigned int srclen )
151 {
152 const WCHAR * const cp2uni = table->cp2uni;
153 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
154
155 while (srclen)
156 {
157 unsigned char off = cp2uni_lb[*src];
158 if (off) /* multi-byte char */
159 {
160 if (srclen == 1) break; /* partial char, error */
161 if (cp2uni[(off << 8) + src[1]] == table->info.def_unicode_char &&
162 ((src[0] << 8) | src[1]) != table->info.def_char) break;
163 src++;
164 srclen--;
165 }
166 else if (cp2uni[*src] == table->info.def_unicode_char &&
167 *src != table->info.def_char) break;
168 src++;
169 srclen--;
170 }
171 return srclen;
172 }
173
174 /* mbstowcs for double-byte code page */
175 /* all lengths are in characters, not bytes */
176 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
177 const unsigned char *src, unsigned int srclen,
178 WCHAR *dst, unsigned int dstlen )
179 {
180 const WCHAR * const cp2uni = table->cp2uni;
181 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
182 unsigned int len;
183
184 if (!dstlen) return get_length_dbcs( table, src, srclen );
185
186 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
187 {
188 unsigned char off = cp2uni_lb[*src];
189 if (off)
190 {
191 if (!--srclen) break; /* partial char, ignore it */
192 src++;
193 *dst = cp2uni[(off << 8) + *src];
194 }
195 else *dst = cp2uni[*src];
196 }
197 if (srclen) return -1; /* overflow */
198 return dstlen - len;
199 }
200
201
202 /* mbstowcs for double-byte code page with character decomposition */
203 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
204 const unsigned char *src, unsigned int srclen,
205 WCHAR *dst, unsigned int dstlen )
206 {
207 const WCHAR * const cp2uni = table->cp2uni;
208 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
209 unsigned int len;
210 WCHAR ch;
211 int res;
212
213 if (!dstlen) /* compute length */
214 {
215 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
216 for (len = 0; srclen; srclen--, src++)
217 {
218 unsigned char off = cp2uni_lb[*src];
219 if (off)
220 {
221 if (!--srclen) break; /* partial char, ignore it */
222 src++;
223 ch = cp2uni[(off << 8) + *src];
224 }
225 else ch = cp2uni[*src];
226 len += get_decomposition( ch, dummy, 4 );
227 }
228 return len;
229 }
230
231 for (len = dstlen; srclen && len; srclen--, src++)
232 {
233 unsigned char off = cp2uni_lb[*src];
234 if (off)
235 {
236 if (!--srclen) break; /* partial char, ignore it */
237 src++;
238 ch = cp2uni[(off << 8) + *src];
239 }
240 else ch = cp2uni[*src];
241 if (!(res = get_decomposition( ch, dst, len ))) break;
242 dst += res;
243 len -= res;
244 }
245 if (srclen) return -1; /* overflow */
246 return dstlen - len;
247 }
248
249
250 /* return -1 on dst buffer overflow, -2 on invalid input char */
251 int wine_cp_mbstowcs( const union cptable *table, int flags,
252 const char *src, int srclen,
253 WCHAR *dst, int dstlen )
254 {
255 if (table->info.char_size == 1)
256 {
257 if (flags & MB_ERR_INVALID_CHARS)
258 {
259 if (check_invalid_chars_sbcs( &table->sbcs, src, srclen )) return -2;
260 }
261 if (!(flags & MB_COMPOSITE))
262 {
263 if (!dstlen) return srclen;
264 return mbstowcs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
265 }
266 return mbstowcs_sbcs_decompose( &table->sbcs, src, srclen, dst, dstlen );
267 }
268 else /* mbcs */
269 {
270 if (flags & MB_ERR_INVALID_CHARS)
271 {
272 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
273 }
274 if (!(flags & MB_COMPOSITE))
275 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
276 else
277 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
278 }
279 }
280
281 /* CP_SYMBOL implementation */
282 /* return -1 on dst buffer overflow */
283 int wine_cpsymbol_mbstowcs( const char *src, int srclen, WCHAR *dst, int dstlen)
284 {
285 int len, i;
286 if( dstlen == 0) return srclen;
287 len = dstlen > srclen ? srclen : dstlen;
288 for( i = 0; i < len; i++)
289 {
290 unsigned char c = src [ i ];
291 if( c < 0x20 )
292 dst[i] = c;
293 else
294 dst[i] = c + 0xf000;
295 }
296 if( srclen > len) return -1;
297 return len;
298 }