Added nls tool.
authorEric Kohl <eric.kohl@reactos.org>
Wed, 2 Jan 2002 00:48:14 +0000 (00:48 +0000)
committerEric Kohl <eric.kohl@reactos.org>
Wed, 2 Jan 2002 00:48:14 +0000 (00:48 +0000)
svn path=/trunk/; revision=2471

reactos/tools/create_nls/Makefile [new file with mode: 0644]
reactos/tools/create_nls/Readme [new file with mode: 0644]
reactos/tools/create_nls/create_nls.c [new file with mode: 0644]
reactos/tools/create_nls/unicode.org/.cvsignore [new file with mode: 0644]

diff --git a/reactos/tools/create_nls/Makefile b/reactos/tools/create_nls/Makefile
new file mode 100644 (file)
index 0000000..7056ce8
--- /dev/null
@@ -0,0 +1,54 @@
+#
+#  ReactOS/NLS conversion tool
+#
+PATH_TO_TOP = ../..
+
+TARGET=create_nls$(EXE_POSTFIX)
+
+all: $(TARGET)
+
+OBJECTS = create_nls.o
+
+CLEAN_FILES = *.o create_nls$(EXE_POSTFIX)
+
+create_nls$(EXE_POSTFIX): $(OBJECTS)
+       $(HOST_CC) $(OBJECTS) -o create_nls$(EXE_POSTFIX)
+
+HOST_CFLAGS = -I.
+
+create_nls.o: create_nls.c
+       $(HOST_CC) $(HOST_CFLAGS) -c create_nls.c -o create_nls.o
+
+ifeq ($(HOST),mingw32-linux)
+clean:
+       rm -f *.o
+       rm -f create_nls$(EXE_POSTFIX)
+endif
+ifeq ($(HOST),mingw32-windows)
+clean:
+       del *.o
+       del create_nls$(EXE_POSTFIX)
+endif
+
+.phony: clean
+
+nls:
+       create_nls$(EXE_POSTFIX)
+
+.phony: nls
+
+ifeq ($(HOST),mingw32-linux)
+clean_nls:
+       rm -f ../../media/nls/*.nls
+endif
+ifeq ($(HOST),mingw32-windows)
+clean_nls:
+       del ..\..\media\nls\*.nls
+endif
+
+.phony: clean_nls
+
+
+include $(PATH_TO_TOP)/rules.mak
+
+# EOF
diff --git a/reactos/tools/create_nls/Readme b/reactos/tools/create_nls/Readme
new file mode 100644 (file)
index 0000000..e72804b
--- /dev/null
@@ -0,0 +1,23 @@
+Create_nls - .nls file creation tool
+------------------------------------
+
+Create_nls is a tool to create binary .nls files from codepage mapping
+files provided by Unicode Inc. Due to the confusing license these files
+are not part of the ReactOS source tree.
+
+
+You can easily download the files from 'http://www.unicode.org/public' or
+'ftp://ftp.unicode.org/pub'. Download the directory structure of the web/ftp
+site into the unicode.org subdirectory and create_nls will build the .nls
+files.
+
+Makefile targets:
+       'make':            builds create_nls tool.
+       'make clean':      deletes all executable and object files
+       'make nls':        generates binary .nls files.
+       'make clean_nls':  deletes binary .nls files.
+
+Since the binary .nls files are part of the ReactOS source tree it is not
+necessary to build these files yourself. Therefore create_nls is not
+integrated with the main makefile. You will have to build and run it
+separately.
diff --git a/reactos/tools/create_nls/create_nls.c b/reactos/tools/create_nls/create_nls.c
new file mode 100644 (file)
index 0000000..c3c31ba
--- /dev/null
@@ -0,0 +1,965 @@
+/*
+ * Tool for creating NT-like NLS files for Unicode <-> Codepage conversions.
+ * Tool for creating NT-like l_intl.nls file for case mapping of unicode
+ * characters.
+ * Copyright 2000 Timoshkov Dmitry
+ * Copyright 2001 Matei Alexandru
+ *
+ * Sources of information:
+ * Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html
+ * Ove Kõven's investigations http://www.ping.uio.no/~ovehk/nls
+ */
+#include <windows.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <string.h>
+#include <ctype.h>
+
+static const WCHAR * const uprtable[256];
+static const WCHAR * const lwrtable[256];
+
+#define NLSDIR "../../media/nls"
+#define LIBDIR "unicode.org/"
+
+typedef struct {
+    WORD wSize; /* in words 0x000D */
+    WORD CodePage;
+    WORD MaxCharSize; /* 1 or 2 */
+    BYTE DefaultChar[MAX_DEFAULTCHAR];
+    WCHAR UnicodeDefaultChar;
+    WCHAR unknown1;
+    WCHAR unknown2;
+    BYTE LeadByte[MAX_LEADBYTES];
+} __attribute__((packed)) NLS_FILE_HEADER;
+
+/*
+Support for translation from the multiple unicode chars
+to the single code page char.
+
+002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;;
+00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;;
+2010;HYPHEN;Pd;0;ON;;;;;N;;;;;
+2011;NON-BREAKING HYPHEN;Pd;0;ON;<noBreak> 2010;;;;N;;;;;
+2013;EN DASH;Pd;0;ON;;;;;N;;;;;
+2014;EM DASH;Pd;0;ON;;;;;N;;;;;
+2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;;
+*/
+
+/* HYPHEN-MINUS aliases */
+static WCHAR hyphen_aliases[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0};
+
+static struct {
+    WCHAR cp_char;
+    WCHAR *alias; /* must be 0 terminated */
+} u2cp_alias[] = {
+/* HYPHEN-MINUS aliases */
+{0x002D, hyphen_aliases}
+};
+
+static void patch_aliases(void *u2cp, CPINFOEXA *cpi)
+{
+    int i, j;
+    WCHAR *wc, *alias;
+    BYTE *c;
+
+    if(cpi->MaxCharSize == 2) {
+       wc = (WCHAR *)u2cp;
+       for(i = 0; i < 65536; i++) {
+           for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
+               alias = u2cp_alias[j].alias;
+               while(*alias) {
+                   if(*alias == i && wc[i] == *(WCHAR *)cpi->DefaultChar) {
+                       wc[i] = u2cp_alias[j].cp_char;
+                   }
+                   alias++;
+               }
+           }
+       }
+    }
+    else {
+       c = (BYTE *)u2cp;
+       for(i = 0; i < 65536; i++) {
+           for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
+               alias = u2cp_alias[j].alias;
+               while(*alias) {
+                   if(*alias == i && c[i] == cpi->DefaultChar[0] && u2cp_alias[j].cp_char < 256) {
+                       c[i] = (BYTE)u2cp_alias[j].cp_char;
+                   }
+                   alias++;
+               }
+           }
+       }
+    }
+}
+
+static BOOL write_unicode2cp_table(FILE *out, CPINFOEXA *cpi, WCHAR *table)
+{
+    void *u2cp;
+    WCHAR *wc;
+    CHAR *c;
+    int i;
+    BOOL ret = TRUE;
+
+    u2cp = malloc(cpi->MaxCharSize * 65536);
+    if(!u2cp) {
+       printf("Not enough memory for Unicode to Codepage table\n");
+       return FALSE;
+    }
+
+    if(cpi->MaxCharSize == 2) {
+       wc = (WCHAR *)u2cp;
+       for(i = 0; i < 65536; i++)
+           wc[i] = *(WCHAR *)cpi->DefaultChar;
+
+       for(i = 0; i < 65536; i++)
+           if (table[i] != '?')
+               wc[table[i]] = (WCHAR)i;
+    }
+    else {
+       c = (CHAR *)u2cp;
+       for(i = 0; i < 65536; i++)
+           c[i] = cpi->DefaultChar[0];
+
+       for(i = 0; i < 256; i++)
+           if (table[i] != '?')
+               c[table[i]] = (CHAR)i;
+    }
+
+    patch_aliases(u2cp, cpi);
+
+    if(fwrite(u2cp, 1, cpi->MaxCharSize * 65536, out) != cpi->MaxCharSize * 65536)
+       ret = FALSE;
+
+    free(u2cp);
+
+    return ret;
+}
+
+static BOOL write_lb_ranges(FILE *out, CPINFOEXA *cpi, WCHAR *table)
+{
+    WCHAR sub_table[256];
+    WORD offset, offsets[256];
+    int i, j, range;
+
+    memset(offsets, 0, sizeof(offsets));
+
+    offset = 0;
+
+    for(i = 0; i < MAX_LEADBYTES; i += 2) {
+       for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
+           offset += 256;
+           offsets[range] = offset;
+       }
+    }
+
+    if(fwrite(offsets, 1, sizeof(offsets), out) != sizeof(offsets))
+       return FALSE;
+
+    for(i = 0; i < MAX_LEADBYTES; i += 2) {
+       for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
+           /*printf("Writing sub table for LeadByte %02X\n", range);*/
+           for(j = MAKEWORD(0, range); j <= MAKEWORD(0xFF, range); j++) {
+               sub_table[j - MAKEWORD(0, range)] = table[j];
+           }
+
+           if(fwrite(sub_table, 1, sizeof(sub_table), out) != sizeof(sub_table))
+               return FALSE;
+       }
+    }
+
+    return TRUE;
+}
+
+static BOOL create_nls_file(char *name, CPINFOEXA *cpi, WCHAR *table, WCHAR *oemtable)
+{
+    FILE *out;
+    NLS_FILE_HEADER nls;
+    WORD wValue, number_of_lb_ranges, number_of_lb_subtables, i;
+
+    printf("Creating NLS table \"%s\"\n", name);
+
+    if(!(out = fopen(name, "wb"))) {
+       printf("Could not create file \"%s\"\n", name);
+       return FALSE;
+    }
+
+    memset(&nls, 0, sizeof(nls));
+
+    nls.wSize = sizeof(nls) / sizeof(WORD);
+    nls.CodePage = cpi->CodePage;
+    nls.MaxCharSize = cpi->MaxCharSize;
+    memcpy(nls.DefaultChar, cpi->DefaultChar, MAX_DEFAULTCHAR);
+    nls.UnicodeDefaultChar = cpi->UnicodeDefaultChar;
+    nls.unknown1 = '?';
+    nls.unknown2 = '?';
+    memcpy(nls.LeadByte, cpi->LeadByte, MAX_LEADBYTES);
+
+    if(fwrite(&nls, 1, sizeof(nls), out) != sizeof(nls)) {
+       fclose(out);
+       printf("Could not write to file \"%s\"\n", name);
+       return FALSE;
+    }
+
+    number_of_lb_ranges = 0;
+    number_of_lb_subtables = 0;
+
+    for(i = 0; i < MAX_LEADBYTES; i += 2) {
+       if(cpi->LeadByte[i] != 0 && cpi->LeadByte[i + 1] > cpi->LeadByte[i]) {
+           number_of_lb_ranges++;
+           number_of_lb_subtables += cpi->LeadByte[i + 1] - cpi->LeadByte[i] + 1;
+       }
+    }
+
+    /*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/
+    /*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/
+
+    /* Calculate offset to Unicode to CP table in words:
+     *  1. (256 * sizeof(WORD)) primary CP to Unicode table +
+     *  2. (WORD) optional OEM glyph table size in words +
+     *  3. OEM glyph table size in words * sizeof(WORD) +
+     *  4. (WORD) Number of DBCS LeadByte ranges + 
+     *  5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables
+     *  6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables +
+     *  7. (WORD) Unknown flag
+     */
+
+    wValue = (256 * sizeof(WORD) + /* 1 */
+             sizeof(WORD) + /* 2 */
+             ((oemtable !=NULL) ? (256 * sizeof(WORD)) : 0) + /* 3 */
+             sizeof(WORD) + /* 4 */
+             ((number_of_lb_subtables != 0) ? 256 * sizeof(WORD) : 0) + /* 5 */
+             number_of_lb_subtables * 256 * sizeof(WORD) + /* 6 */
+             sizeof(WORD) /* 7 */
+             ) / sizeof(WORD);
+
+    /* offset of Unicode to CP table in words */
+    fwrite(&wValue, 1, sizeof(wValue), out);
+
+    /* primary CP to Unicode table */
+    if(fwrite(table, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
+       fclose(out);
+       printf("Could not write to file \"%s\"\n", name);
+       return FALSE;
+    }
+
+    /* optional OEM glyph table size in words */
+    wValue = (oemtable != NULL) ? (256 * sizeof(WORD)) : 0;
+    fwrite(&wValue, 1, sizeof(wValue), out);
+
+    /* optional OEM to Unicode table */
+    if (oemtable) {
+       if(fwrite(oemtable, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
+           fclose(out);
+           printf("Could not write to file \"%s\"\n", name);
+           return FALSE;
+       }
+    }
+
+    /* Number of DBCS LeadByte ranges */
+    fwrite(&number_of_lb_ranges, 1, sizeof(number_of_lb_ranges), out);
+
+    /* offsets of lead byte sub tables and lead byte sub tables */
+    if(number_of_lb_ranges > 0) {
+       if(!write_lb_ranges(out, cpi, table)) {
+           fclose(out);
+           printf("Could not write to file \"%s\"\n", name);
+           return FALSE;
+       }
+    }
+
+    /* Unknown flag */
+    wValue = 0;
+    fwrite(&wValue, 1, sizeof(wValue), out);
+
+    if(!write_unicode2cp_table(out, cpi, table)) {
+       fclose(out);
+       printf("Could not write to file \"%s\"\n", name);
+       return FALSE;
+    }
+
+    fclose(out);
+    return TRUE;
+}
+
+/* correct the codepage information such as default chars */
+static void patch_codepage_info(CPINFOEXA *cpi)
+{
+    /* currently nothing */
+}
+
+static WCHAR *Load_CP2Unicode_Table(char *table_name, UINT cp, CPINFOEXA *cpi)
+{
+    char buf[256];
+    char *p;
+    DWORD n, value;
+    FILE *file;
+    WCHAR *table;
+    int lb_ranges, lb_range_started, line;
+
+    printf("Loading translation table \"%s\"\n", table_name);
+    
+    /* Init to default values */
+    memset(cpi, 0, sizeof(CPINFOEXA));
+    cpi->CodePage = cp;
+    *(WCHAR *)cpi->DefaultChar = '?';
+    cpi->MaxCharSize = 1;
+    cpi->UnicodeDefaultChar = '?';
+
+    patch_codepage_info(cpi);
+
+    table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
+    if(!table) {
+       printf("Not enough memory for Codepage to Unicode table\n");
+       return NULL;
+    }
+
+    for(n = 0; n < 256; n++)
+       table[n] = (WCHAR)n;
+
+    for(n = 256; n < 65536; n++)
+       table[n] = cpi->UnicodeDefaultChar;
+
+    file = fopen(table_name, "r");
+    if(file == NULL) {
+       free(table);
+       return NULL;
+    }
+
+    line = 0;
+    lb_ranges = 0;
+    lb_range_started = 0;
+
+    while(fgets(buf, sizeof(buf), file)) {
+       line++;
+       p = buf;
+       while(isspace(*p)) p++;
+
+       if(!*p || p[0] == '#')
+           continue;
+
+       n = strtol(p, &p, 0);
+       if(n > 0xFFFF) {
+           printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, n, table_name);
+           continue;
+       }
+
+       if(n > 0xFF && cpi->MaxCharSize != 2) {
+           /*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/
+           cpi->MaxCharSize = 2;
+       }
+
+       while(isspace(*p)) p++;
+
+       if(!*p || p[0] == '#') {
+           /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
+       }
+       else {
+           value = strtol(p, &p, 0);
+           if(value > 0xFFFF) {
+               printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, n, table_name);
+           }
+           table[n] = (WCHAR)value;
+       }
+
+       /* wait for comment */
+       while(*p && *p != '#') p++;
+
+       if(*p == '#' && strstr(p, "DBCS LEAD BYTE")) {
+           /*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/
+           if(n > 0xFF) {
+               printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line, n);
+               continue;
+           }
+
+           table[n] = (WCHAR)0;
+
+           if(lb_range_started) {
+               cpi->LeadByte[(lb_ranges - 1) * 2 + 1] = (BYTE)n;
+           }
+           else {
+               /*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/
+               if(lb_ranges < MAX_LEADBYTES/2) {
+                   lb_ranges++;
+                   lb_range_started = 1;
+                   cpi->LeadByte[(lb_ranges - 1) * 2] = (BYTE)n;
+               }
+               else
+                   printf("Line %d: Error: could not start new lead byte range\n", line);
+           }
+       }
+       else {
+           if(lb_range_started)
+               lb_range_started = 0;
+       }
+    }
+
+    fclose(file);
+
+    return table;
+}
+
+static WCHAR *Load_OEM2Unicode_Table(char *table_name, WCHAR *def_table, UINT cp, CPINFOEXA *cpi)
+{
+    char buf[256];
+    char *p;
+    DWORD n, value;
+    FILE *file;
+    WCHAR *table;
+    int line;
+
+    printf("Loading oem glyph table \"%s\"\n", table_name);
+    
+    table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
+    if(!table) {
+       printf("Not enough memory for Codepage to Unicode table\n");
+       return NULL;
+    }
+
+    memcpy(table, def_table, 65536 * sizeof(WCHAR));
+
+    file = fopen(table_name, "r");
+    if(file == NULL) {
+       free(table);
+       return NULL;
+    }
+
+    while(fgets(buf, sizeof(buf), file)) {
+       line++;
+       p = buf;
+       while(isspace(*p)) p++;
+
+       if(!*p || p[0] == '#')
+           continue;
+
+       value = strtol(p, &p, 16);
+       if(value > 0xFFFF) {
+           printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, value, table_name);
+           continue;
+       }
+
+       while(isspace(*p)) p++;
+
+       if(!*p || p[0] == '#') {
+           /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
+           continue;
+       }
+       else {
+           n = strtol(p, &p, 16);
+           if(n > 0xFFFF) {
+               printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, value, table_name);
+               continue;
+           }
+       }
+
+       if (cpi->CodePage == 864) {
+           while(isspace(*p)) p++;
+
+           if(!*p || p[0] == '#' || p[0] == '-') {
+               /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
+               continue;
+           }
+           else {
+               n = strtol(p, &p, 16);
+               if(n > 0xFFFF) {
+                   printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line, value, table_name);
+               }
+               continue;
+           }
+       }
+
+       table[n] = (WCHAR)value;
+    }
+
+    fclose(file);
+
+    return table;
+}
+
+int write_nls_files()
+{
+    WCHAR *table;
+    WCHAR *oemtable;
+    char nls_filename[256];
+    CPINFOEXA cpi;
+    int i;
+    struct code_page {
+       UINT cp;
+       BOOL oem;
+       char *table_filename;
+       char *comment;
+    } pages[] = {
+       {37,  FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"},
+       {424, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"},
+       {437, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"},
+       {500, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"},
+       /*{708, FALSE, "", "Arabic ASMO"},*/
+       /*{720, FALSE, "", "Arabic Transparent ASMO"},*/
+       {737, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"},
+       {775, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"},
+       {850, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"},
+       {852, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"},
+       {855, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" },
+       {856, TRUE,  LIBDIR"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"},
+       {857, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"},
+       {860, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"},
+       {861, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"},
+       {862, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"},
+       {863, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"},
+       {864, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"},
+       {865, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"},
+       {866, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"},
+       {869, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"},
+       /*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/
+       {874, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"},
+       {875, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"},
+       {878, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
+       {932, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"},
+       {936, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"},
+       {949, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"},
+       {950, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"},
+       {1006, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"},
+       {1026, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"},
+       {1250, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"},
+       {1251, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"},
+       {1252, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"},
+       {1253, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"},
+       {1254, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"},
+       {1255, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"},
+       {1256, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"},
+       {1257, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"},
+       {1258, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"},
+       {10000, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"},
+       {10006, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"},
+       {10007, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"},
+       {10029, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"},
+       {10079, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"},
+       {10081, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"},
+       /*{20000, FALSE, "", "CNS Taiwan"},*/
+       /*{20001, FALSE, "", "TCA Taiwan"},*/
+       /*{20002, FALSE, "", "Eten Taiwan"},*/
+       /*{20003, FALSE, "", "IBM5550 Taiwan"},*/
+       /*{20004, FALSE, "", "TeleText Taiwan"},*/
+       /*{20005, FALSE, "", "Wang Taiwan"},*/
+       /*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/
+       /*{20106, FALSE, "", "IA5 German"},*/
+       /*{20107, FALSE, "", "IA5 Swedish"},*/
+       /*{20108, FALSE, "", "IA5 Norwegian"},*/
+       /*{20127, FALSE, "", "US ASCII"},*/
+       /*{20261, FALSE, "", "T.61"},*/
+       /*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/
+       /*{20273, FALSE, "", "IBM EBCDIC Germany"},*/
+       /*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/
+       /*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/
+       /*{20280, FALSE, "", "IBM EBCDIC Italy"},*/
+       /*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/
+       /*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/
+       /*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/
+       /*{20297, FALSE, "", "IBM EBCDIC France"},*/
+       /*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/
+       /*{20423, FALSE, "", "IBM EBCDIC Greek"},*/
+       /*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/
+       /*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/
+       /*{20838, FALSE, "", "IBM EBCDIC Thai"},*/
+       /*{20871, FALSE, "", "IBM EBCDIC Icelandic"},*/
+       /*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/
+       {20866, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
+       /*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/
+       /*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/
+       /*{21027, FALSE, "", "Ext Alpha Lowercase"},*/
+       {28591, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"},
+       {28592, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"},
+       {28593, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"},
+       {28594, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"},
+       {28595, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"},
+       {28596, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"},
+       {28597, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"},
+       {28598, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"},
+       {28599, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"}
+    };
+
+    for(i = 0; i < sizeof(pages)/sizeof(pages[0]); i++) {
+       table = Load_CP2Unicode_Table(pages[i].table_filename, pages[i].cp, &cpi);
+       if(!table) {
+           printf("Could not load \"%s\" (%s)\n", pages[i].table_filename, pages[i].comment);
+           continue;
+       }
+
+       if (pages[i].oem) {
+           oemtable = Load_OEM2Unicode_Table(LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table, pages[i].cp, &cpi);
+           if(!oemtable) {
+               printf("Could not load \"%s\" (%s)\n", LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table");
+               continue;
+           }
+       }
+
+       sprintf(nls_filename, "%s/c_%03d.nls", NLSDIR, cpi.CodePage);
+       if(!create_nls_file(nls_filename, &cpi, table, pages[i].oem ? oemtable : NULL)) {
+           printf("Could not write \"%s\" (%s)\n", nls_filename, pages[i].comment);
+       }
+
+       if (pages[i].oem)
+           free(oemtable);
+
+       free(table);
+    }
+
+    return 0;
+}
+
+
+
+static WORD *to_upper_org = NULL, *to_lower_org = NULL;
+
+static WORD diffs[256];
+static int number_of_diffs;
+
+static WORD number_of_subtables_with_diffs;
+/* pointers to subtables with 16 elements in each to the main table */
+static WORD *subtables_with_diffs[4096];
+
+static WORD number_of_subtables_with_offsets;
+/* subtables with 16 elements  */
+static WORD subtables_with_offsets[4096 * 16];
+
+static void test_packed_table(WCHAR *table)
+{
+    WCHAR test_str[] = L"This is an English text. Ïî-ðóññêè ÿ ïèñàòü óìåþ íåìíîæêî. 1234567890";
+    //WORD diff, off;
+    //WORD *sub_table;
+    DWORD i, len;
+
+    len = lstrlenW(test_str);
+
+    for(i = 0; i < len + 1; i++) {
+       /*off = table[HIBYTE(test_str[i])];
+
+       sub_table = table + off;
+       off = sub_table[LOBYTE(test_str[i]) >> 4];
+
+       sub_table = table + off;
+       off = LOBYTE(test_str[i]) & 0x0F;
+
+       diff = sub_table[off];
+
+       test_str[i] += diff;*/
+       test_str[i] += table[table[table[HIBYTE(test_str[i])] + (LOBYTE(test_str[i]) >> 4)] + (LOBYTE(test_str[i]) & 0x0F)];
+    }
+/*
+    {
+       FILE *file;
+       static int n = 0;
+       char name[20];
+
+       sprintf(name, "text%02d.dat", n++);
+       file = fopen(name, "wb");
+       fwrite(test_str, len * sizeof(WCHAR), 1, file);
+       fclose(file);
+    }*/
+}
+
+static BOOL CreateCaseDiff(char *table_name)
+{
+    char buf[256];
+    char *p;
+    WORD code, case_mapping;
+    FILE *file;
+    int line;
+
+    to_upper_org = (WORD *)calloc(65536, sizeof(WORD));
+    if(!to_upper_org) {
+       printf("Not enough memory for to upper table\n");
+       return FALSE;
+    }
+
+    to_lower_org = (WORD *)calloc(65536, sizeof(WORD));
+    if(!to_lower_org) {
+       printf("Not enough memory for to lower table\n");
+       return FALSE;
+    }
+
+    file = fopen(table_name, "r");
+    if(file == NULL) {
+       printf("Could not open file \"%s\"\n", table_name);
+       return FALSE;
+    }
+
+    line = 0;
+
+    while(fgets(buf, sizeof(buf), file)) {
+       line++;
+       p = buf;
+       while(*p && isspace(*p)) p++;
+
+       if(!*p)
+           continue;
+
+       /* 0. Code value */
+       code = (WORD)strtol(p, &p, 16);
+
+       //if(code != 0x9A0 && code != 0xBA0)
+           //continue;
+
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 1. Character name */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 2. General Category */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 3. Canonical Combining Classes */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 4. Bidirectional Category */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 5. Character Decomposition Mapping */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 6. Decimal digit value */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 7. Digit value */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 8. Numeric value */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 9. Mirrored */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 10. Unicode 1.0 Name */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 11. 10646 comment field */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+
+       /* 12. Uppercase Mapping */
+       while(*p && isspace(*p)) p++;
+       if(!*p) continue;
+       if(*p != ';') {
+           case_mapping = (WORD)strtol(p, &p, 16);
+           to_upper_org[code] = case_mapping - code;
+           while(*p && *p != ';') p++;
+       }
+       else
+           p++;
+
+       /* 13. Lowercase Mapping */
+       while(*p && isspace(*p)) p++;
+       if(!*p) continue;
+       if(*p != ';') {
+           case_mapping = (WORD)strtol(p, &p, 16);
+           to_lower_org[code] = case_mapping - code;
+           while(*p && *p != ';') p++;
+       }
+       else
+           p++;
+
+       /* 14. Titlecase Mapping */
+       while(*p && *p != ';') p++;
+       if(!*p)
+           continue;
+       p++;
+    }
+
+    fclose(file);
+
+    return TRUE;
+}
+
+static int find_diff(WORD diff)
+{
+    int i;
+
+    for(i = 0; i < number_of_diffs; i++) {
+       if(diffs[i] == diff)
+           return i;
+    }
+
+    return -1;
+}
+
+static WORD find_subtable_with_diffs(WORD *table, WORD *subtable)
+{
+    WORD index;
+
+    for(index = 0; index < number_of_subtables_with_diffs; index++) {
+       if(memcmp(subtables_with_diffs[index], subtable, 16 * sizeof(WORD)) == 0) {
+           return index;
+       }
+    }
+
+    if(number_of_subtables_with_diffs >= 4096) {
+       printf("Could not add new subtable with diffs, storage is full\n");
+       return 0;
+    }
+
+    subtables_with_diffs[number_of_subtables_with_diffs] = subtable;
+    number_of_subtables_with_diffs++;
+
+    return index;
+}
+
+static WORD find_subtable_with_offsets(WORD *subtable)
+{
+    WORD index;
+
+    for(index = 0; index < number_of_subtables_with_offsets; index++) {
+       if(memcmp(&subtables_with_offsets[index * 16], subtable, 16 * sizeof(WORD)) == 0) {
+           return index;
+       }
+    }
+
+    if(number_of_subtables_with_offsets >= 4096) {
+       printf("Could not add new subtable with offsets, storage is full\n");
+       return 0;
+    }
+
+    memcpy(&subtables_with_offsets[number_of_subtables_with_offsets * 16], subtable, 16 * sizeof(WORD));
+    number_of_subtables_with_offsets++;
+
+    return index;
+}
+
+static WORD *pack_table(WORD *table, WORD *packed_size_in_words)
+{
+    WORD high, low4, index;
+    WORD main_index[256];
+    WORD temp_subtable[16];
+    WORD *packed_table;
+    WORD *subtable_src, *subtable_dst;
+
+    memset(subtables_with_diffs, 0, sizeof(subtables_with_diffs));
+    number_of_subtables_with_diffs = 0;
+
+    memset(subtables_with_offsets, 0, sizeof(subtables_with_offsets));
+    number_of_subtables_with_offsets = 0;
+
+    for(high = 0; high < 256; high++) {
+       for(low4 = 0; low4 < 256; low4 += 16) {
+           index = find_subtable_with_diffs(table, &table[MAKEWORD(low4, high)]);
+
+           temp_subtable[low4 >> 4] = index;
+       }
+
+       index = find_subtable_with_offsets(temp_subtable);
+       main_index[high] = index;
+    }
+
+    *packed_size_in_words = 0x100 + number_of_subtables_with_offsets * 16 + number_of_subtables_with_diffs * 16;
+    packed_table = calloc(*packed_size_in_words, sizeof(WORD));
+
+    /* fill main index according to the subtables_with_offsets */
+    for(high = 0; high < 256; high++) {
+       packed_table[high] = 0x100 + main_index[high] * 16;
+    }
+
+    //memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16);
+
+    /* fill subtable index according to the subtables_with_diffs */
+    for(index = 0; index < number_of_subtables_with_offsets; index++) {
+       subtable_dst = packed_table + 0x100 + index * 16;
+       subtable_src = &subtables_with_offsets[index * 16];
+
+       for(low4 = 0; low4 < 16; low4++) {
+           subtable_dst[low4] = 0x100 + number_of_subtables_with_offsets * 16 + subtable_src[low4] * 16;
+       }
+    }
+
+
+    for(index = 0; index < number_of_subtables_with_diffs; index++) {
+       subtable_dst = packed_table + 0x100 + number_of_subtables_with_offsets * 16 + index * 16;
+       memcpy(subtable_dst, subtables_with_diffs[index], 16 * sizeof(WORD));
+
+    }
+
+
+    test_packed_table(packed_table);
+
+    return packed_table;
+}
+
+int write_casemap_file(void)
+{
+    WORD packed_size_in_words, offset_to_next_table_in_words;
+    WORD *packed_table, value;
+    FILE *file;
+
+    if(!CreateCaseDiff(LIBDIR"UnicodeData.txt"))
+       return -1;
+
+    file = fopen(NLSDIR"/l_intl.nls", "wb");
+
+    /* write version number */
+    value = 1;
+    fwrite(&value, 1, sizeof(WORD), file);
+
+    /* pack upper case table */
+    packed_table = pack_table(to_upper_org, &packed_size_in_words);
+    offset_to_next_table_in_words = packed_size_in_words + 1;
+    fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
+    /* write packed upper case table */
+    fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
+    free(packed_table);
+
+    /* pack lower case table */
+    packed_table = pack_table(to_lower_org, &packed_size_in_words);
+    offset_to_next_table_in_words = packed_size_in_words + 1;
+    fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
+    /* write packed lower case table */
+    fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
+    free(packed_table);
+
+    fclose(file);
+
+    free(to_upper_org);
+    free(to_lower_org);
+
+    return 0;
+}
+
+int main()
+{
+       write_nls_files();
+       write_casemap_file();
+
+       return 0;
+}
diff --git a/reactos/tools/create_nls/unicode.org/.cvsignore b/reactos/tools/create_nls/unicode.org/.cvsignore
new file mode 100644 (file)
index 0000000..2211df6
--- /dev/null
@@ -0,0 +1 @@
+*.txt