[ROSAPPS] Add NLS to TXT file converter
[reactos.git] / rosapps / applications / devutils / nls2txt / bestfit.c
1 /*
2 * PROJECT: ReactOS NLS to TXT Converter
3 * LICENSE: GNU General Public License Version 2.0 or any later version
4 * FILE: devutils/nls2txt/bestfit.c
5 * COPYRIGHT: Copyright 2016 Dmitry Chapyshev <dmitry@reactos.org>
6 */
7
8 #include "precomp.h"
9
10 static HANDLE
11 BestFit_CreateFile(const WCHAR *pszFile)
12 {
13 DWORD dwBytesWritten;
14 HANDLE hFile;
15
16 hFile = CreateFileW(pszFile,
17 GENERIC_WRITE,
18 FILE_SHARE_READ | FILE_SHARE_WRITE,
19 NULL,
20 CREATE_ALWAYS,
21 FILE_ATTRIBUTE_NORMAL,
22 NULL);
23 if (hFile != INVALID_HANDLE_VALUE)
24 {
25 /* Write UTF-8 BOM */
26 WriteFile(hFile, "\xEF\xBB\xBF", 3, &dwBytesWritten, NULL);
27 }
28
29 return hFile;
30 }
31
32 static VOID
33 BestFit_CloseFile(HANDLE hFile)
34 {
35 CloseHandle(hFile);
36 }
37
38 static CHAR*
39 UTF8fromUNICODE(const WCHAR *pszInput, PSIZE_T Size)
40 {
41 ULONG Length;
42 CHAR *pszOutput;
43
44 if (!pszInput || !Size) return NULL;
45
46 Length = WideCharToMultiByte(CP_UTF8, 0, pszInput, -1, NULL, 0, NULL, NULL);
47
48 *Size = Length * sizeof(CHAR);
49
50 pszOutput = (CHAR *) malloc(*Size);
51 if (pszOutput)
52 {
53 WideCharToMultiByte(CP_UTF8, 0, pszInput, -1, pszOutput, Length, NULL, NULL);
54 }
55
56 return pszOutput;
57 }
58
59 static VOID
60 BestFit_Write(HANDLE hFile, const WCHAR *pszFormat, ...)
61 {
62 LARGE_INTEGER FileSize;
63 LARGE_INTEGER MoveTo;
64 LARGE_INTEGER NewPos;
65 DWORD dwBytesWritten;
66
67 if (hFile == INVALID_HANDLE_VALUE)
68 return;
69
70 MoveTo.QuadPart = 0;
71 if (!SetFilePointerEx(hFile, MoveTo, &NewPos, FILE_END))
72 return;
73
74 if (!GetFileSizeEx(hFile, &FileSize))
75 return;
76
77 if (LockFile(hFile, (DWORD_PTR)NewPos.QuadPart, 0, (DWORD_PTR)FileSize.QuadPart, 0))
78 {
79 WCHAR *pszString;
80 CHAR *pszUtf8;
81 va_list Args;
82 SIZE_T Size;
83
84 va_start(Args, pszFormat);
85
86 Size = (_vscwprintf(pszFormat, Args) + 1) * sizeof(WCHAR);
87 pszString = (WCHAR*) malloc(Size);
88
89 if (!pszString)
90 {
91 UnlockFile(hFile, (DWORD_PTR)NewPos.QuadPart, 0, (DWORD_PTR)FileSize.QuadPart, 0);
92 va_end(Args);
93 return;
94 }
95
96 StringCbVPrintfW(pszString, Size, pszFormat, Args);
97 va_end(Args);
98
99 pszUtf8 = UTF8fromUNICODE(pszString, &Size);
100 if (pszUtf8)
101 {
102 WriteFile(hFile, pszUtf8, Size - sizeof(CHAR), &dwBytesWritten, NULL);
103 free(pszUtf8);
104 }
105
106 free(pszString);
107
108 UnlockFile(hFile, (DWORD_PTR)NewPos.QuadPart, 0, (DWORD_PTR)FileSize.QuadPart, 0);
109 }
110 }
111
112 BOOL
113 BestFit_FromNLS(const WCHAR *pszNLSFile, const WCHAR *pszBestFitFile)
114 {
115 CPTABLEINFO CodePageTable;
116 PUSHORT CodePage;
117 HANDLE hFile;
118 USHORT CodePageChar;
119 ULONG UnicodeChar;
120
121 CodePage = NLS_ReadFile(pszNLSFile, &CodePageTable);
122 if (CodePage == NULL)
123 return FALSE;
124
125 hFile = BestFit_CreateFile(pszBestFitFile);
126 if (hFile == INVALID_HANDLE_VALUE)
127 {
128 free(CodePage);
129 return FALSE;
130 }
131
132 /* The only field is the decimal windows code page number for this code page. */
133 BestFit_Write(hFile, L"CODEPAGE %u\r\n\r\n", CodePageTable.CodePage);
134
135 BestFit_Write(hFile,
136 L"CPINFO %u 0x%02X 0x%04X\r\n\r\n",
137 /* "1" for a single byte code page, "2" for a double byte code page */
138 CodePageTable.MaximumCharacterSize,
139 /* Replacement characters for unassigned Unicode code points when
140 written to this code page */
141 CodePageTable.DefaultChar,
142 /* Replacement characters for illegal or unassigned code page values
143 when converting to Unicode. */
144 CodePageTable.UniDefaultChar);
145
146 /* This field contains the number of following records of code page to Unicode mappings. */
147 BestFit_Write(hFile, L"MBTABLE %u\r\n\r\n", NLS_RecordsCountForMBTable(&CodePageTable));
148
149 for (CodePageChar = 0; CodePageChar <= 0xFF; CodePageChar++)
150 {
151 if (!NLS_IsDefaultCharForMB(&CodePageTable, CodePageChar))
152 {
153 WCHAR szCharName[MAX_STR_LEN] = { 0 };
154
155 GetUName(CodePageTable.MultiByteTable[CodePageChar], szCharName);
156
157 BestFit_Write(hFile,
158 L"0x%02X 0x%04X ;%s\r\n",
159 CodePageChar,
160 CodePageTable.MultiByteTable[CodePageChar],
161 szCharName);
162 }
163 }
164
165 BestFit_Write(hFile, L"\r\n");
166
167 if (NLS_IsGlyphTablePresent(&CodePageTable))
168 {
169 PUSHORT GlyphTable = CodePageTable.MultiByteTable + 256 + 1;
170
171 BestFit_Write(hFile, L"GLYPHTABLE %u\r\n\r\n", NLS_RecordsCountForGlyphTable(&CodePageTable));
172
173 for (CodePageChar = 0; CodePageChar <= 0xFF; CodePageChar++)
174 {
175 WCHAR szCharName[MAX_STR_LEN] = { 0 };
176
177 GetUName(GlyphTable[CodePageChar], szCharName);
178
179 BestFit_Write(hFile,
180 L"0x%02X 0x%04X ;%s\r\n",
181 CodePageChar,
182 GlyphTable[CodePageChar],
183 szCharName);
184 }
185
186 BestFit_Write(hFile, L"\r\n");
187 }
188
189 if (NLS_IsDBCSCodePage(&CodePageTable))
190 {
191 PUSHORT LeadByteRanges = (PUSHORT)&CodePageTable.LeadByte[0];
192 USHORT Index;
193 USHORT LeadByte;
194
195 BestFit_Write(hFile,
196 L"DBCSRANGE %u ;%u DBCS Lead Byte Ranges\r\n\r\n",
197 CodePageTable.DBCSRanges[0],
198 CodePageTable.DBCSRanges[0]);
199
200 for (Index = 0; Index < MAXIMUM_LEADBYTES / 2; Index++)
201 {
202 if (!LeadByteRanges[Index])
203 continue;
204
205 BestFit_Write(hFile,
206 L"0x%X 0x%X ;Lead Byte Range %u\r\n\r\n",
207 LOBYTE(LeadByteRanges[Index]),
208 HIBYTE(LeadByteRanges[Index]),
209 Index + 1);
210
211 for (LeadByte = LOBYTE(LeadByteRanges[Index]);
212 LeadByte <= HIBYTE(LeadByteRanges[Index]);
213 LeadByte++)
214 {
215 PUSHORT LeadByteInfo = CodePageTable.DBCSOffsets;
216
217 BestFit_Write(hFile,
218 L"DBCSTABLE %u ;Range = %u, LeadByte = 0x%02X\r\n\r\n",
219 NLS_RecordsCountForDBCSTable(&CodePageTable, LeadByte),
220 Index + 1,
221 LeadByte);
222
223 for (CodePageChar = 0; CodePageChar <= 0xFF; CodePageChar++)
224 {
225 USHORT Info = LeadByteInfo[LeadByte];
226
227 if (Info && LeadByteInfo[Info + CodePageChar] != CodePageTable.UniDefaultChar)
228 {
229 BestFit_Write(hFile,
230 L"0x%02X 0x%04X\r\n",
231 CodePageChar,
232 LeadByteInfo[Info + CodePageChar]);
233 }
234 }
235
236 BestFit_Write(hFile, L"\r\n");
237 }
238 }
239 }
240
241 /* This field contains the number of records of Unicode to byte mappings. */
242 BestFit_Write(hFile, L"WCTABLE %u\r\n\r\n", NLS_RecordsCountForUnicodeTable(&CodePageTable));
243
244 for (UnicodeChar = 0; UnicodeChar <= 0xFFFF; UnicodeChar++)
245 {
246 if (!NLS_IsDefaultCharForUnicode(&CodePageTable, UnicodeChar))
247 {
248 WCHAR szCharName[MAX_STR_LEN] = { 0 };
249
250 GetUName(UnicodeChar, szCharName);
251
252 if (NLS_IsDBCSCodePage(&CodePageTable))
253 {
254 PUSHORT MultiByteTable = (PUSHORT)CodePageTable.WideCharTable;
255
256 BestFit_Write(hFile,
257 L"0x%04X 0x%04X ;%s\r\n",
258 UnicodeChar,
259 MultiByteTable[UnicodeChar],
260 szCharName);
261 }
262 else
263 {
264 PUCHAR SingleByteTable = (PUCHAR)CodePageTable.WideCharTable;
265
266 BestFit_Write(hFile,
267 L"0x%04X 0x%02X ;%s\r\n",
268 UnicodeChar,
269 SingleByteTable[UnicodeChar],
270 szCharName);
271 }
272 }
273 }
274
275 /* This tag marks the end of the code page data. Anything after this marker is ignored. */
276 BestFit_Write(hFile, L"\r\nENDCODEPAGE\r\n");
277
278 BestFit_CloseFile(hFile);
279 free(CodePage);
280
281 return TRUE;
282 }