sdk/tools/create_nls/create_nls.c

   1 /*
   2  * Tool for creating NT-like NLS files for Unicode <-> Codepage conversions.
   3  * Tool for creating NT-like l_intl.nls file for case mapping of unicode
   4  * characters.
   5  * Copyright 2000 Timoshkov Dmitry
   6  * Copyright 2001 Matei Alexandru
   7  *
   8  * Sources of information:
   9  * Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html
  10  * Ove Kõven's investigations http://www.ping.uio.no/~ovehk/nls
  11  */
  12 #include <windows.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15 #include <malloc.h>
  16 #include <string.h>
  17 #include <ctype.h>
  18
  19 static const WCHAR * const uprtable[256];
  20 static const WCHAR * const lwrtable[256];
  21
  22 #define NLSDIR "../../media/nls"
  23 #define LIBDIR "unicode.org/"
  24
  25 typedef struct {
  26     WORD wSize; /* in words 0x000D */
  27     WORD CodePage;
  28     WORD MaxCharSize; /* 1 or 2 */
  29     BYTE DefaultChar[MAX_DEFAULTCHAR];
  30     WCHAR UnicodeDefaultChar;
  31     WCHAR unknown1;
  32     WCHAR unknown2;
  33     BYTE LeadByte[MAX_LEADBYTES];
  34 } __attribute__((packed)) NLS_FILE_HEADER;
  35
  36 /*
  37 Support for translation from the multiple unicode chars
  38 to the single code page char.
  39
  40 002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;;
  41 00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;;
  42 2010;HYPHEN;Pd;0;ON;;;;;N;;;;;
  43 2011;NON-BREAKING HYPHEN;Pd;0;ON;<noBreak> 2010;;;;N;;;;;
  44 2013;EN DASH;Pd;0;ON;;;;;N;;;;;
  45 2014;EM DASH;Pd;0;ON;;;;;N;;;;;
  46 2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;;
  47 */
  48
  49 /* HYPHEN-MINUS aliases */
  50 static WCHAR hyphen_aliases[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0};
  51
  52 static struct {
  53     WCHAR cp_char;
  54     WCHAR *alias; /* must be 0 terminated */
  55 } u2cp_alias[] = {
  56 /* HYPHEN-MINUS aliases */
  57 {0x002D, hyphen_aliases}
  58 };
  59
  60 static void patch_aliases(void *u2cp, CPINFOEXA *cpi)
  61 {
  62     int i, j;
  63     WCHAR *wc, *alias;
  64     BYTE *c;
  65
  66     if(cpi->MaxCharSize == 2) {
  67         wc = (WCHAR *)u2cp;
  68         for(i = 0; i < 65536; i++) {
  69             for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
  70                 alias = u2cp_alias[j].alias;
  71                 while(*alias) {
  72                     if(*alias == i && wc[i] == *(WCHAR *)cpi->DefaultChar) {
  73                         wc[i] = u2cp_alias[j].cp_char;
  74                     }
  75                     alias++;
  76                 }
  77             }
  78         }
  79     }
  80     else {
  81         c = (BYTE *)u2cp;
  82         for(i = 0; i < 65536; i++) {
  83             for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
  84                 alias = u2cp_alias[j].alias;
  85                 while(*alias) {
  86                     if(*alias == i && c[i] == cpi->DefaultChar[0] && u2cp_alias[j].cp_char < 256) {
  87                         c[i] = (BYTE)u2cp_alias[j].cp_char;
  88                     }
  89                     alias++;
  90                 }
  91             }
  92         }
  93     }
  94 }
  95
  96 static BOOL write_unicode2cp_table(FILE *out, CPINFOEXA *cpi, WCHAR *table)
  97 {
  98     void *u2cp;
  99     WCHAR *wc;
 100     CHAR *c;
 101     int i;
 102     BOOL ret = TRUE;
 103
 104     u2cp = malloc(cpi->MaxCharSize * 65536);
 105     if(!u2cp) {
 106         printf("Not enough memory for Unicode to Codepage table\n");
 107         return FALSE;
 108     }
 109
 110     if(cpi->MaxCharSize == 2) {
 111         wc = (WCHAR *)u2cp;
 112         for(i = 0; i < 65536; i++)
 113             wc[i] = *(WCHAR *)cpi->DefaultChar;
 114
 115         for(i = 0; i < 65536; i++)
 116             if (table[i] != '?')
 117                 wc[table[i]] = (WCHAR)i;
 118     }
 119     else {
 120         c = (CHAR *)u2cp;
 121         for(i = 0; i < 65536; i++)
 122             c[i] = cpi->DefaultChar[0];
 123
 124         for(i = 0; i < 256; i++)
 125             if (table[i] != '?')
 126                 c[table[i]] = (CHAR)i;
 127     }
 128
 129     patch_aliases(u2cp, cpi);
 130
 131     if(fwrite(u2cp, 1, cpi->MaxCharSize * 65536, out) != cpi->MaxCharSize * 65536)
 132         ret = FALSE;
 133
 134     free(u2cp);
 135
 136     return ret;
 137 }
 138
 139 static BOOL write_lb_ranges(FILE *out, CPINFOEXA *cpi, WCHAR *table)
 140 {
 141     WCHAR sub_table[256];
 142     WORD offset, offsets[256];
 143     int i, j, range;
 144
 145     memset(offsets, 0, sizeof(offsets));
 146
 147     offset = 0;
 148
 149     for(i = 0; i < MAX_LEADBYTES; i += 2) {
 150         for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
 151             offset += 256;
 152             offsets[range] = offset;
 153         }
 154     }
 155
 156     if(fwrite(offsets, 1, sizeof(offsets), out) != sizeof(offsets))
 157         return FALSE;
 158
 159     for(i = 0; i < MAX_LEADBYTES; i += 2) {
 160         for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
 161             /*printf("Writing sub table for LeadByte %02X\n", range);*/
 162             for(j = MAKEWORD(0, range); j <= MAKEWORD(0xFF, range); j++) {
 163                 sub_table[j - MAKEWORD(0, range)] = table[j];
 164             }
 165
 166             if(fwrite(sub_table, 1, sizeof(sub_table), out) != sizeof(sub_table))
 167                 return FALSE;
 168         }
 169     }
 170
 171     return TRUE;
 172 }
 173
 174 static BOOL create_nls_file(char *name, CPINFOEXA *cpi, WCHAR *table, WCHAR *oemtable)
 175 {
 176     FILE *out;
 177     NLS_FILE_HEADER nls;
 178     WORD wValue, number_of_lb_ranges, number_of_lb_subtables, i;
 179
 180     printf("Creating NLS table \"%s\"\n", name);
 181
 182     if(!(out = fopen(name, "wb"))) {
 183         printf("Could not create file \"%s\"\n", name);
 184         return FALSE;
 185     }
 186
 187     memset(&nls, 0, sizeof(nls));
 188
 189     nls.wSize = sizeof(nls) / sizeof(WORD);
 190     nls.CodePage = cpi->CodePage;
 191     nls.MaxCharSize = cpi->MaxCharSize;
 192     memcpy(nls.DefaultChar, cpi->DefaultChar, MAX_DEFAULTCHAR);
 193     nls.UnicodeDefaultChar = cpi->UnicodeDefaultChar;
 194     nls.unknown1 = '?';
 195     nls.unknown2 = '?';
 196     memcpy(nls.LeadByte, cpi->LeadByte, MAX_LEADBYTES);
 197
 198     if(fwrite(&nls, 1, sizeof(nls), out) != sizeof(nls)) {
 199         fclose(out);
 200         printf("Could not write to file \"%s\"\n", name);
 201         return FALSE;
 202     }
 203
 204     number_of_lb_ranges = 0;
 205     number_of_lb_subtables = 0;
 206
 207     for(i = 0; i < MAX_LEADBYTES; i += 2) {
 208         if(cpi->LeadByte[i] != 0 && cpi->LeadByte[i + 1] > cpi->LeadByte[i]) {
 209             number_of_lb_ranges++;
 210             number_of_lb_subtables += cpi->LeadByte[i + 1] - cpi->LeadByte[i] + 1;
 211         }
 212     }
 213
 214     /*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/
 215     /*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/
 216
 217     /* Calculate offset to Unicode to CP table in words:
 218      *  1. (256 * sizeof(WORD)) primary CP to Unicode table +
 219      *  2. (WORD) optional OEM glyph table size in words +
 220      *  3. OEM glyph table size in words * sizeof(WORD) +
 221      *  4. (WORD) Number of DBCS LeadByte ranges +
 222      *  5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables
 223      *  6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables +
 224      *  7. (WORD) Unknown flag
 225      */
 226
 227     wValue = (256 * sizeof(WORD) + /* 1 */
 228               sizeof(WORD) + /* 2 */
 229               ((oemtable !=NULL) ? (256 * sizeof(WORD)) : 0) + /* 3 */
 230               sizeof(WORD) + /* 4 */
 231               ((number_of_lb_subtables != 0) ? 256 * sizeof(WORD) : 0) + /* 5 */
 232               number_of_lb_subtables * 256 * sizeof(WORD) + /* 6 */
 233               sizeof(WORD) /* 7 */
 234               ) / sizeof(WORD);
 235
 236     /* offset of Unicode to CP table in words */
 237     fwrite(&wValue, 1, sizeof(wValue), out);
 238
 239     /* primary CP to Unicode table */
 240     if(fwrite(table, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
 241         fclose(out);
 242         printf("Could not write to file \"%s\"\n", name);
 243         return FALSE;
 244     }
 245
 246     /* optional OEM glyph table size in words */
 247     wValue = (oemtable != NULL) ? (256 * sizeof(WORD)) : 0;
 248     fwrite(&wValue, 1, sizeof(wValue), out);
 249
 250     /* optional OEM to Unicode table */
 251     if (oemtable) {
 252         if(fwrite(oemtable, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
 253             fclose(out);
 254             printf("Could not write to file \"%s\"\n", name);
 255             return FALSE;
 256         }
 257     }
 258
 259     /* Number of DBCS LeadByte ranges */
 260     fwrite(&number_of_lb_ranges, 1, sizeof(number_of_lb_ranges), out);
 261
 262     /* offsets of lead byte sub tables and lead byte sub tables */
 263     if(number_of_lb_ranges > 0) {
 264         if(!write_lb_ranges(out, cpi, table)) {
 265             fclose(out);
 266             printf("Could not write to file \"%s\"\n", name);
 267             return FALSE;
 268         }
 269     }
 270
 271     /* Unknown flag */
 272     wValue = 0;
 273     fwrite(&wValue, 1, sizeof(wValue), out);
 274
 275     if(!write_unicode2cp_table(out, cpi, table)) {
 276         fclose(out);
 277         printf("Could not write to file \"%s\"\n", name);
 278         return FALSE;
 279     }
 280
 281     fclose(out);
 282     return TRUE;
 283 }
 284
 285 /* correct the codepage information such as default chars */
 286 static void patch_codepage_info(CPINFOEXA *cpi)
 287 {
 288     /* currently nothing */
 289 }
 290
 291 static WCHAR *Load_CP2Unicode_Table(char *table_name, UINT cp, CPINFOEXA *cpi)
 292 {
 293     char buf[256];
 294     char *p;
 295     DWORD n, value;
 296     FILE *file;
 297     WCHAR *table;
 298     int lb_ranges, lb_range_started, line;
 299
 300     printf("Loading translation table \"%s\"\n", table_name);
 301
 302     /* Init to default values */
 303     memset(cpi, 0, sizeof(CPINFOEXA));
 304     cpi->CodePage = cp;
 305     *(WCHAR *)cpi->DefaultChar = '?';
 306     cpi->MaxCharSize = 1;
 307     cpi->UnicodeDefaultChar = '?';
 308
 309     patch_codepage_info(cpi);
 310
 311     table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
 312     if(!table) {
 313         printf("Not enough memory for Codepage to Unicode table\n");
 314         return NULL;
 315     }
 316
 317     for(n = 0; n < 256; n++)
 318         table[n] = (WCHAR)n;
 319
 320     for(n = 256; n < 65536; n++)
 321         table[n] = cpi->UnicodeDefaultChar;
 322
 323     file = fopen(table_name, "r");
 324     if(file == NULL) {
 325         free(table);
 326         return NULL;
 327     }
 328
 329     line = 0;
 330     lb_ranges = 0;
 331     lb_range_started = 0;
 332
 333     while(fgets(buf, sizeof(buf), file)) {
 334         line++;
 335         p = buf;
 336         while(isspace(*p)) p++;
 337
 338         if(!*p || p[0] == '#')
 339             continue;
 340
 341         n = strtol(p, &p, 0);
 342         if(n > 0xFFFF) {
 343             printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, n, table_name);
 344             continue;
 345         }
 346
 347         if(n > 0xFF && cpi->MaxCharSize != 2) {
 348             /*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/
 349             cpi->MaxCharSize = 2;
 350         }
 351
 352         while(isspace(*p)) p++;
 353
 354         if(!*p || p[0] == '#') {
 355             /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
 356         }
 357         else {
 358             value = strtol(p, &p, 0);
 359             if(value > 0xFFFF) {
 360                 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, n, table_name);
 361             }
 362             table[n] = (WCHAR)value;
 363         }
 364
 365         /* wait for comment */
 366         while(*p && *p != '#') p++;
 367
 368         if(*p == '#' && strstr(p, "DBCS LEAD BYTE")) {
 369             /*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/
 370             if(n > 0xFF) {
 371                 printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line, n);
 372                 continue;
 373             }
 374
 375             table[n] = (WCHAR)0;
 376
 377             if(lb_range_started) {
 378                 cpi->LeadByte[(lb_ranges - 1) * 2 + 1] = (BYTE)n;
 379             }
 380             else {
 381                 /*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/
 382                 if(lb_ranges < MAX_LEADBYTES/2) {
 383                     lb_ranges++;
 384                     lb_range_started = 1;
 385                     cpi->LeadByte[(lb_ranges - 1) * 2] = (BYTE)n;
 386                 }
 387                 else
 388                     printf("Line %d: Error: could not start new lead byte range\n", line);
 389             }
 390         }
 391         else {
 392             if(lb_range_started)
 393                 lb_range_started = 0;
 394         }
 395     }
 396
 397     fclose(file);
 398
 399     return table;
 400 }
 401
 402 static WCHAR *Load_OEM2Unicode_Table(char *table_name, WCHAR *def_table, UINT cp, CPINFOEXA *cpi)
 403 {
 404     char buf[256];
 405     char *p;
 406     DWORD n, value;
 407     FILE *file;
 408     WCHAR *table;
 409     int line;
 410
 411     printf("Loading oem glyph table \"%s\"\n", table_name);
 412
 413     table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
 414     if(!table) {
 415         printf("Not enough memory for Codepage to Unicode table\n");
 416         return NULL;
 417     }
 418
 419     memcpy(table, def_table, 65536 * sizeof(WCHAR));
 420
 421     file = fopen(table_name, "r");
 422     if(file == NULL) {
 423         free(table);
 424         return NULL;
 425     }
 426
 427     line = 0;
 428
 429     while(fgets(buf, sizeof(buf), file)) {
 430         line++;
 431         p = buf;
 432         while(isspace(*p)) p++;
 433
 434         if(!*p || p[0] == '#')
 435             continue;
 436
 437         value = strtol(p, &p, 16);
 438         if(value > 0xFFFF) {
 439             printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, value, table_name);
 440             continue;
 441         }
 442
 443         while(isspace(*p)) p++;
 444
 445         if(!*p || p[0] == '#') {
 446             /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
 447             continue;
 448         }
 449         else {
 450             n = strtol(p, &p, 16);
 451             if(n > 0xFFFF) {
 452                 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, value, table_name);
 453                 continue;
 454             }
 455         }
 456
 457         if (cpi->CodePage == 864) {
 458             while(isspace(*p)) p++;
 459
 460             if(!*p || p[0] == '#' || p[0] == '-') {
 461                 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
 462                 continue;
 463             }
 464             else {
 465                 n = strtol(p, &p, 16);
 466                 if(n > 0xFFFF) {
 467                     printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line, value, table_name);
 468                 }
 469                 continue;
 470             }
 471         }
 472
 473         table[n] = (WCHAR)value;
 474     }
 475
 476     fclose(file);
 477
 478     return table;
 479 }
 480
 481 int write_nls_files()
 482 {
 483     WCHAR *table;
 484     WCHAR *oemtable;
 485     char nls_filename[256];
 486     CPINFOEXA cpi;
 487     int i;
 488     struct code_page {
 489         UINT cp;
 490         BOOL oem;
 491         char *table_filename;
 492         char *comment;
 493     } pages[] = {
 494         {37,  FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"},
 495         {424, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"},
 496         {437, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"},
 497         {500, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"},
 498         /*{708, FALSE, "", "Arabic ASMO"},*/
 499         /*{720, FALSE, "", "Arabic Transparent ASMO"},*/
 500         {737, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"},
 501         {775, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"},
 502         {850, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"},
 503         {852, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"},
 504         {855, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" },
 505         {856, TRUE,  LIBDIR"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"},
 506         {857, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"},
 507         {860, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"},
 508         {861, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"},
 509         {862, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"},
 510         {863, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"},
 511         {864, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"},
 512         {865, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"},
 513         {866, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"},
 514         {869, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"},
 515         /*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/
 516         {874, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"},
 517         {875, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"},
 518         {878, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
 519         {932, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"},
 520         {936, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"},
 521         {949, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"},
 522         {950, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"},
 523         {1006, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"},
 524         {1026, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"},
 525         {1250, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"},
 526         {1251, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"},
 527         {1252, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"},
 528         {1253, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"},
 529         {1254, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"},
 530         {1255, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"},
 531         {1256, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"},
 532         {1257, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"},
 533         {1258, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"},
 534         {10000, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"},
 535         {10006, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"},
 536         {10007, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"},
 537         {10029, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"},
 538         {10079, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"},
 539         {10081, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"},
 540         /*{20000, FALSE, "", "CNS Taiwan"},*/
 541         /*{20001, FALSE, "", "TCA Taiwan"},*/
 542         /*{20002, FALSE, "", "Eten Taiwan"},*/
 543         /*{20003, FALSE, "", "IBM5550 Taiwan"},*/
 544         /*{20004, FALSE, "", "TeleText Taiwan"},*/
 545         /*{20005, FALSE, "", "Wang Taiwan"},*/
 546         /*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/
 547         /*{20106, FALSE, "", "IA5 German"},*/
 548         /*{20107, FALSE, "", "IA5 Swedish"},*/
 549         /*{20108, FALSE, "", "IA5 Norwegian"},*/
 550         /*{20127, FALSE, "", "US ASCII"}, */
 551         /*{20261, FALSE, "", "T.61"},*/
 552         /*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/
 553         /*{20273, FALSE, "", "IBM EBCDIC Germany"},*/
 554         /*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/
 555         /*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/
 556         /*{20280, FALSE, "", "IBM EBCDIC Italy"},*/
 557         /*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/
 558         /*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/
 559         /*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/
 560         /*{20297, FALSE, "", "IBM EBCDIC France"},*/
 561         /*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/
 562         /*{20423, FALSE, "IBM869.TXT", "IBM EBCDIC Greek"},*/
 563         /*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/
 564         /*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/
 565         /*{20838, FALSE, "", "IBM EBCDIC Thai"},*/
 566         {20871, FALSE, "ReactOS/IBMCP861.TXT", "IBM EBCDIC Icelandic"},
 567         /*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/
 568         {20866, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
 569         /*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/
 570         /*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/
 571         /*{21027, FALSE, "", "Ext Alpha Lowercase"},*/
 572         {28591, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"},
 573         {28592, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"},
 574         {28593, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"},
 575         {28594, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"},
 576         {28595, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"},
 577         {28596, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"},
 578         {28597, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"},
 579         {28598, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"},
 580         {28599, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"}
 581     };
 582
 583     for(i = 0; i < sizeof(pages)/sizeof(pages[0]); i++) {
 584         table = Load_CP2Unicode_Table(pages[i].table_filename, pages[i].cp, &cpi);
 585         if(!table) {
 586             printf("Could not load \"%s\" (%s)\n", pages[i].table_filename, pages[i].comment);
 587             continue;
 588         }
 589
 590         if (pages[i].oem) {
 591             oemtable = Load_OEM2Unicode_Table(LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table, pages[i].cp, &cpi);
 592             if(!oemtable) {
 593                 printf("Could not load \"%s\" (%s)\n", LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table");
 594                 continue;
 595             }
 596         }
 597
 598         sprintf(nls_filename, "%s/c_%03d.nls", NLSDIR, cpi.CodePage);
 599         if(!create_nls_file(nls_filename, &cpi, table, pages[i].oem ? oemtable : NULL)) {
 600             printf("Could not write \"%s\" (%s)\n", nls_filename, pages[i].comment);
 601         }
 602
 603         if (pages[i].oem)
 604             free(oemtable);
 605
 606         free(table);
 607     }
 608
 609     return 0;
 610 }
 611
 612
 613
 614 static WORD *to_upper_org = NULL, *to_lower_org = NULL;
 615
 616 #if 0
 617 static WORD diffs[256];
 618 static int number_of_diffs;
 619 #endif
 620
 621 static WORD number_of_subtables_with_diffs;
 622 /* pointers to subtables with 16 elements in each to the main table */
 623 static WORD *subtables_with_diffs[4096];
 624
 625 static WORD number_of_subtables_with_offsets;
 626 /* subtables with 16 elements  */
 627 static WORD subtables_with_offsets[4096 * 16];
 628
 629 static void test_packed_table(WCHAR *table)
 630 {
 631     WCHAR test_str[] = L"This is an English text. \x0CF\x0EE-\x0F0\x0F3\x0F1\x0F1\x0EA\x0E8 \x0FF \x0EF\x0E8\x0F1\x0E0\x0F2\x0FC \x0F3\x0EC\x0E5\x0FE \x0ED\x0E5\x0EC\x0ED\x0EE\x0E6\x0EA\x0EE. 1234567890";
 632     //WORD diff, off;
 633     //WORD *sub_table;
 634     DWORD i, len;
 635
 636     len = lstrlenW(test_str);
 637
 638     for(i = 0; i < len + 1; i++) {
 639         /*off = table[HIBYTE(test_str[i])];
 640
 641         sub_table = table + off;
 642         off = sub_table[LOBYTE(test_str[i]) >> 4];
 643
 644         sub_table = table + off;
 645         off = LOBYTE(test_str[i]) & 0x0F;
 646
 647         diff = sub_table[off];
 648
 649         test_str[i] += diff;*/
 650         test_str[i] += table[table[table[HIBYTE(test_str[i])] + (LOBYTE(test_str[i]) >> 4)] + (LOBYTE(test_str[i]) & 0x0F)];
 651     }
 652 /*
 653     {
 654         FILE *file;
 655         static int n = 0;
 656         char name[20];
 657
 658         sprintf(name, "text%02d.dat", n++);
 659         file = fopen(name, "wb");
 660         fwrite(test_str, len * sizeof(WCHAR), 1, file);
 661         fclose(file);
 662     }*/
 663 }
 664
 665 static BOOL CreateCaseDiff(char *table_name)
 666 {
 667     char buf[256];
 668     char *p;
 669     WORD code, case_mapping;
 670     FILE *file;
 671     int line;
 672
 673     to_upper_org = (WORD *)calloc(65536, sizeof(WORD));
 674     if(!to_upper_org) {
 675         printf("Not enough memory for to upper table\n");
 676         return FALSE;
 677     }
 678
 679     to_lower_org = (WORD *)calloc(65536, sizeof(WORD));
 680     if(!to_lower_org) {
 681         printf("Not enough memory for to lower table\n");
 682         return FALSE;
 683     }
 684
 685     file = fopen(table_name, "r");
 686     if(file == NULL) {
 687         printf("Could not open file \"%s\"\n", table_name);
 688         return FALSE;
 689     }
 690
 691     line = 0;
 692
 693     while(fgets(buf, sizeof(buf), file)) {
 694         line++;
 695         p = buf;
 696         while(*p && isspace(*p)) p++;
 697
 698         if(!*p)
 699             continue;
 700
 701         /* 0. Code value */
 702         code = (WORD)strtol(p, &p, 16);
 703
 704         //if(code != 0x9A0 && code != 0xBA0)
 705             //continue;
 706
 707         while(*p && *p != ';') p++;
 708         if(!*p)
 709             continue;
 710         p++;
 711
 712         /* 1. Character name */
 713         while(*p && *p != ';') p++;
 714         if(!*p)
 715             continue;
 716         p++;
 717
 718         /* 2. General Category */
 719         while(*p && *p != ';') p++;
 720         if(!*p)
 721             continue;
 722         p++;
 723
 724         /* 3. Canonical Combining Classes */
 725         while(*p && *p != ';') p++;
 726         if(!*p)
 727             continue;
 728         p++;
 729
 730         /* 4. Bidirectional Category */
 731         while(*p && *p != ';') p++;
 732         if(!*p)
 733             continue;
 734         p++;
 735
 736         /* 5. Character Decomposition Mapping */
 737         while(*p && *p != ';') p++;
 738         if(!*p)
 739             continue;
 740         p++;
 741
 742         /* 6. Decimal digit value */
 743         while(*p && *p != ';') p++;
 744         if(!*p)
 745             continue;
 746         p++;
 747
 748         /* 7. Digit value */
 749         while(*p && *p != ';') p++;
 750         if(!*p)
 751             continue;
 752         p++;
 753
 754         /* 8. Numeric value */
 755         while(*p && *p != ';') p++;
 756         if(!*p)
 757             continue;
 758         p++;
 759
 760         /* 9. Mirrored */
 761         while(*p && *p != ';') p++;
 762         if(!*p)
 763             continue;
 764         p++;
 765
 766         /* 10. Unicode 1.0 Name */
 767         while(*p && *p != ';') p++;
 768         if(!*p)
 769             continue;
 770         p++;
 771
 772         /* 11. 10646 comment field */
 773         while(*p && *p != ';') p++;
 774         if(!*p)
 775             continue;
 776         p++;
 777
 778         /* 12. Uppercase Mapping */
 779         while(*p && isspace(*p)) p++;
 780         if(!*p) continue;
 781         if(*p != ';') {
 782             case_mapping = (WORD)strtol(p, &p, 16);
 783             to_upper_org[code] = case_mapping - code;
 784             while(*p && *p != ';') p++;
 785         }
 786         else
 787             p++;
 788
 789         /* 13. Lowercase Mapping */
 790         while(*p && isspace(*p)) p++;
 791         if(!*p) continue;
 792         if(*p != ';') {
 793             case_mapping = (WORD)strtol(p, &p, 16);
 794             to_lower_org[code] = case_mapping - code;
 795             while(*p && *p != ';') p++;
 796         }
 797         else
 798             p++;
 799
 800         /* 14. Titlecase Mapping */
 801         while(*p && *p != ';') p++;
 802         if(!*p)
 803             continue;
 804         p++;
 805     }
 806
 807     fclose(file);
 808
 809     return TRUE;
 810 }
 811
 812 #if 0
 813 static int find_diff(WORD diff)
 814 {
 815     int i;
 816
 817     for(i = 0; i < number_of_diffs; i++) {
 818         if(diffs[i] == diff)
 819             return i;
 820     }
 821
 822     return -1;
 823 }
 824 #endif
 825
 826 static WORD find_subtable_with_diffs(WORD *table, WORD *subtable)
 827 {
 828     WORD index;
 829
 830     for(index = 0; index < number_of_subtables_with_diffs; index++) {
 831         if(memcmp(subtables_with_diffs[index], subtable, 16 * sizeof(WORD)) == 0) {
 832             return index;
 833         }
 834     }
 835
 836     if(number_of_subtables_with_diffs >= 4096) {
 837         printf("Could not add new subtable with diffs, storage is full\n");
 838         return 0;
 839     }
 840
 841     subtables_with_diffs[number_of_subtables_with_diffs] = subtable;
 842     number_of_subtables_with_diffs++;
 843
 844     return index;
 845 }
 846
 847 static WORD find_subtable_with_offsets(WORD *subtable)
 848 {
 849     WORD index;
 850
 851     for(index = 0; index < number_of_subtables_with_offsets; index++) {
 852         if(memcmp(&subtables_with_offsets[index * 16], subtable, 16 * sizeof(WORD)) == 0) {
 853             return index;
 854         }
 855     }
 856
 857     if(number_of_subtables_with_offsets >= 4096) {
 858         printf("Could not add new subtable with offsets, storage is full\n");
 859         return 0;
 860     }
 861
 862     memcpy(&subtables_with_offsets[number_of_subtables_with_offsets * 16], subtable, 16 * sizeof(WORD));
 863     number_of_subtables_with_offsets++;
 864
 865     return index;
 866 }
 867
 868 static WORD *pack_table(WORD *table, WORD *packed_size_in_words)
 869 {
 870     WORD high, low4, index;
 871     WORD main_index[256];
 872     WORD temp_subtable[16];
 873     WORD *packed_table;
 874     WORD *subtable_src, *subtable_dst;
 875
 876     memset(subtables_with_diffs, 0, sizeof(subtables_with_diffs));
 877     number_of_subtables_with_diffs = 0;
 878
 879     memset(subtables_with_offsets, 0, sizeof(subtables_with_offsets));
 880     number_of_subtables_with_offsets = 0;
 881
 882     for(high = 0; high < 256; high++) {
 883         for(low4 = 0; low4 < 256; low4 += 16) {
 884             index = find_subtable_with_diffs(table, &table[MAKEWORD(low4, high)]);
 885
 886             temp_subtable[low4 >> 4] = index;
 887         }
 888
 889         index = find_subtable_with_offsets(temp_subtable);
 890         main_index[high] = index;
 891     }
 892
 893     *packed_size_in_words = 0x100 + number_of_subtables_with_offsets * 16 + number_of_subtables_with_diffs * 16;
 894     packed_table = calloc(*packed_size_in_words, sizeof(WORD));
 895
 896     /* fill main index according to the subtables_with_offsets */
 897     for(high = 0; high < 256; high++) {
 898         packed_table[high] = 0x100 + main_index[high] * 16;
 899     }
 900
 901     //memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16);
 902
 903     /* fill subtable index according to the subtables_with_diffs */
 904     for(index = 0; index < number_of_subtables_with_offsets; index++) {
 905         subtable_dst = packed_table + 0x100 + index * 16;
 906         subtable_src = &subtables_with_offsets[index * 16];
 907
 908         for(low4 = 0; low4 < 16; low4++) {
 909             subtable_dst[low4] = 0x100 + number_of_subtables_with_offsets * 16 + subtable_src[low4] * 16;
 910         }
 911     }
 912
 913
 914     for(index = 0; index < number_of_subtables_with_diffs; index++) {
 915         subtable_dst = packed_table + 0x100 + number_of_subtables_with_offsets * 16 + index * 16;
 916         memcpy(subtable_dst, subtables_with_diffs[index], 16 * sizeof(WORD));
 917
 918     }
 919
 920
 921     test_packed_table(packed_table);
 922
 923     return packed_table;
 924 }
 925
 926 int write_casemap_file(void)
 927 {
 928     WORD packed_size_in_words, offset_to_next_table_in_words;
 929     WORD *packed_table, value;
 930     FILE *file;
 931
 932     if(!CreateCaseDiff(LIBDIR"UnicodeData.txt"))
 933         return -1;
 934
 935     file = fopen(NLSDIR"/l_intl.nls", "wb");
 936
 937     /* write version number */
 938     value = 1;
 939     fwrite(&value, 1, sizeof(WORD), file);
 940
 941     /* pack upper case table */
 942     packed_table = pack_table(to_upper_org, &packed_size_in_words);
 943     offset_to_next_table_in_words = packed_size_in_words + 1;
 944     fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
 945     /* write packed upper case table */
 946     fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
 947     free(packed_table);
 948
 949     /* pack lower case table */
 950     packed_table = pack_table(to_lower_org, &packed_size_in_words);
 951     offset_to_next_table_in_words = packed_size_in_words + 1;
 952     fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
 953     /* write packed lower case table */
 954     fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
 955     free(packed_table);
 956
 957     fclose(file);
 958
 959     free(to_upper_org);
 960     free(to_lower_org);
 961
 962     return 0;
 963 }
 964
 965 int main()
 966 {
 967     write_nls_files();
 968     write_casemap_file();
 969
 970     return 0;
 971 }