[RTL]

[reactos.git] / reactos / sdk / lib / rtl / unicode.c
diff --git a/reactos/sdk/lib/rtl/unicode.c b/reactos/sdk/lib/rtl/unicode.c

index 79203a8..a6185b9 100644 (file)
--- a/reactos/sdk/lib/rtl/unicode.c
+++ b/reactos/sdk/lib/rtl/unicode.c
@@ -24,15 +24,19 @@ extern BOOLEAN NlsMbOemCodePageTag;
  extern PUSHORT NlsLeadByteInfo;
  extern USHORT NlsOemDefaultChar;
  extern USHORT NlsUnicodeDefaultChar;
+extern PUSHORT NlsOemLeadByteInfo;
+extern PWCHAR NlsOemToUnicodeTable;
+extern PCHAR NlsUnicodeToOemTable;
+extern PUSHORT NlsUnicodeToMbOemTable;
  
  
  /* FUNCTIONS *****************************************************************/
  
  NTSTATUS
  NTAPI
-RtlMultiAppendUnicodeStringBuffer(IN PVOID Unknown,
-                                  IN ULONG Unknown2,
-                                  IN PVOID Unknown3)
+RtlMultiAppendUnicodeStringBuffer(OUT PRTL_UNICODE_STRING_BUFFER StringBuffer,
+                                  IN ULONG NumberOfAddends,
+                                  IN PCUNICODE_STRING Addends)
  {
      UNIMPLEMENTED;
      return STATUS_NOT_IMPLEMENTED;
@@ -503,14 +507,48 @@ RtlpDidUnicodeToOemWork(IN PCUNICODE_STRING UnicodeString,
  }
  
  /*
-* @unimplemented
+* @implemented
  */
  BOOLEAN
  NTAPI
  RtlIsValidOemCharacter(IN PWCHAR Char)
  {
-    UNIMPLEMENTED;
-    return FALSE;
+    WCHAR UnicodeChar;
+    WCHAR OemChar;
+
+    /* If multi-byte code page present */
+    if (NlsMbOemCodePageTag)
+    {
+        USHORT Offset = 0;
+
+        OemChar = NlsUnicodeToMbOemTable[*Char];
+
+        /* If character has Lead Byte */
+        if (NlsOemLeadByteInfo[HIBYTE(OemChar)])
+            Offset = NlsOemLeadByteInfo[HIBYTE(OemChar)];
+
+        /* Receive Unicode character from the table */
+        UnicodeChar = RtlpUpcaseUnicodeChar(NlsOemToUnicodeTable[LOBYTE(OemChar) + Offset]);
+
+        /* Receive OEM character from the table */
+        OemChar = NlsUnicodeToMbOemTable[UnicodeChar];
+    }
+    else
+    {
+        /* Receive Unicode character from the table */
+        UnicodeChar = RtlpUpcaseUnicodeChar(NlsOemToUnicodeTable[(UCHAR)NlsUnicodeToOemTable[*Char]]);
+
+        /* Receive OEM character from the table */
+        OemChar = NlsUnicodeToOemTable[UnicodeChar];
+    }
+
+    /* Not valid character, failed */
+    if (OemChar == NlsOemDefaultChar)
+        return FALSE;
+
+    *Char = UnicodeChar;
+
+    return TRUE;
  }
  
  /*
@@ -704,11 +742,11 @@ NTSTATUS NTAPI RtlIntegerToChar(
      }
      else if (len == length)
      {
-        memcpy(str, pos, len);
+        RtlCopyMemory(str, pos, len);
      }
      else
      {
-        memcpy(str, pos, len + 1);
+        RtlCopyMemory(str, pos, len + 1);
      }
  
      return STATUS_SUCCESS;
@@ -905,8 +943,8 @@ RtlPrefixUnicodeString(
          {
              while (NumChars--)
              {
-                if (RtlUpcaseUnicodeChar(*pc1++) !=
-                    RtlUpcaseUnicodeChar(*pc2++))
+                if (RtlpUpcaseUnicodeChar(*pc1++) !=
+                    RtlpUpcaseUnicodeChar(*pc2++))
                      return FALSE;
              }
          }
@@ -1245,6 +1283,12 @@ RtlIsTextUnicode(CONST VOID* buf, INT len, INT* pf)
      const WCHAR *s = buf;
      int i;
      unsigned int flags = MAXULONG, out_flags = 0;
+    UCHAR last_lo_byte = 0;
+    UCHAR last_hi_byte = 0;
+    ULONG hi_byte_diff = 0;
+    ULONG lo_byte_diff = 0;
+    ULONG weight = 3;
+    ULONG lead_byte = 0;
  
      if (len < sizeof(WCHAR))
      {
@@ -1279,19 +1323,75 @@ RtlIsTextUnicode(CONST VOID* buf, INT len, INT* pf)
      if (*s == 0xFEFF) out_flags |= IS_TEXT_UNICODE_SIGNATURE;
      if (*s == 0xFFFE) out_flags |= IS_TEXT_UNICODE_REVERSE_SIGNATURE;
  
-    /* apply some statistical analysis */
-    if (flags & IS_TEXT_UNICODE_STATISTICS)
+    for (i = 0; i < len; i++)
      {
-        int stats = 0;
+        UCHAR lo_byte = LOBYTE(s[i]);
+        UCHAR hi_byte = HIBYTE(s[i]);
+
+        lo_byte_diff += max(lo_byte, last_lo_byte) - min(lo_byte, last_lo_byte);
+        hi_byte_diff += max(hi_byte, last_hi_byte) - min(hi_byte, last_hi_byte);
  
-        /* FIXME: checks only for ASCII characters in the unicode stream */
+        last_lo_byte = lo_byte;
+        last_hi_byte = hi_byte;
+
+        switch (s[i])
+        {
+            case 0xFFFE: /* Reverse BOM */
+            case UNICODE_NULL:
+            case 0x0A0D: /* ASCII CRLF (packed into one word) */
+            case 0xFFFF: /* Unicode 0xFFFF */
+                out_flags |= IS_TEXT_UNICODE_ILLEGAL_CHARS;
+                break;
+        }
+    }
+
+    if (NlsMbCodePageTag)
+    {
          for (i = 0; i < len; i++)
          {
-            if (s[i] <= 255) stats++;
+            if (NlsLeadByteInfo[s[i]])
+            {
+                ++lead_byte;
+                ++i;
+            }
+        }
+
+        if (lead_byte)
+        {
+            weight = (len / 2) - 1;
+
+            if (lead_byte < (weight / 3))
+                weight = 3;
+            else if (lead_byte < ((weight * 2) / 3))
+                weight = 2;
+            else
+                weight = 1;
+
+            if (flags & IS_TEXT_UNICODE_DBCS_LEADBYTE)
+                out_flags |= IS_TEXT_UNICODE_DBCS_LEADBYTE;
          }
+    }
  
-        if (stats > len / 2)
-            out_flags |= IS_TEXT_UNICODE_STATISTICS;
+    if (lo_byte_diff < 127 && !hi_byte_diff)
+    {
+        out_flags |= IS_TEXT_UNICODE_ASCII16;
+    }
+
+    if (hi_byte_diff && !lo_byte_diff)
+    {
+        out_flags |= IS_TEXT_UNICODE_REVERSE_ASCII16;
+    }
+
+    if ((weight * lo_byte_diff) < hi_byte_diff)
+    {
+        out_flags |= IS_TEXT_UNICODE_REVERSE_STATISTICS;
+    }
+
+    /* apply some statistical analysis */
+    if ((flags & IS_TEXT_UNICODE_STATISTICS) &&
+        ((weight * hi_byte_diff) < lo_byte_diff))
+    {
+        out_flags |= IS_TEXT_UNICODE_STATISTICS;
      }
  
      /* Check for unicode NULL chars */
@@ -1824,7 +1924,7 @@ RtlUpcaseUnicodeString(
  
      for (i = 0; i < j; i++)
      {
-        UniDest->Buffer[i] = RtlUpcaseUnicodeChar(UniSource->Buffer[i]);
+        UniDest->Buffer[i] = RtlpUpcaseUnicodeChar(UniSource->Buffer[i]);
      }
  
      UniDest->Length = UniSource->Length;
@@ -2096,7 +2196,7 @@ RtlCompareUnicodeString(
  
      if (CaseInsensitive)
      {
-        while (!ret && len--) ret = RtlUpcaseUnicodeChar(*p1++) - RtlUpcaseUnicodeChar(*p2++);
+        while (!ret && len--) ret = RtlpUpcaseUnicodeChar(*p1++) - RtlpUpcaseUnicodeChar(*p2++);
      }
      else
      {
@@ -2276,7 +2376,7 @@ RtlDowncaseUnicodeString(
          }
          else
          {
-            UniDest->Buffer[i] = RtlDowncaseUnicodeChar(UniSource->Buffer[i]);
+            UniDest->Buffer[i] = RtlpDowncaseUnicodeChar(UniSource->Buffer[i]);
          }
      }
  
@@ -2486,13 +2586,13 @@ RtlpIsCharInUnicodeString(
      USHORT i;
  
      if (CaseInSensitive)
-        Char = RtlUpcaseUnicodeChar(Char);
+        Char = RtlpUpcaseUnicodeChar(Char);
  
      for (i = 0; i < MatchString->Length / sizeof(WCHAR); i++)
      {
          WCHAR OtherChar = MatchString->Buffer[i];
          if (CaseInSensitive)
-            OtherChar = RtlUpcaseUnicodeChar(OtherChar);
+            OtherChar = RtlpUpcaseUnicodeChar(OtherChar);
  
          if (Char == OtherChar)
              return TRUE;