[YAROTOWS] Reintegrate the branch. For a brighter future.
[reactos.git] / reactos / subsystems / win32 / win32k / dib / dib16bpp.c
index ba36f3f..a1d72ae 100644 (file)
@@ -1,23 +1,14 @@
 /*
- *  ReactOS W32 Subsystem
- *  Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 ReactOS Team
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * PROJECT:         Win32 subsystem
+ * LICENSE:         See COPYING in the top level directory
+ * FILE:            subsystems/win32/win32k/dib/dib16bpp.c
+ * PURPOSE:         Device Independant Bitmap functions, 16bpp
+ * PROGRAMMERS:     Jason Filby
+ *                  Thomas Bluemel
+ *                  Gregor Anich
  */
 
-#include <w32k.h>
+#include <win32k.h>
 
 #define NDEBUG
 #include <debug.h>
 VOID
 DIB_16BPP_PutPixel(SURFOBJ *SurfObj, LONG x, LONG y, ULONG c)
 {
-    PBYTE byteaddr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta;
-    PWORD addr = (PWORD)byteaddr + x;
+  PBYTE byteaddr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta;
+  PWORD addr = (PWORD)byteaddr + x;
 
-    *addr = (WORD)c;
+  *addr = (WORD)c;
 }
 
 ULONG
 DIB_16BPP_GetPixel(SURFOBJ *SurfObj, LONG x, LONG y)
 {
-    PBYTE byteaddr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta;
-    PWORD addr = (PWORD)byteaddr + x;
-    return (ULONG)(*addr);
+  PBYTE byteaddr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta;
+  PWORD addr = (PWORD)byteaddr + x;
+  return (ULONG)(*addr);
 }
 
 VOID
 DIB_16BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
 {
-    PDWORD addr = (PDWORD)((PWORD)((PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta) + x1);
+  PDWORD addr = (PDWORD)((PWORD)((PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta) + x1);
 
 #if defined(_M_IX86) && !defined(_MSC_VER)
-    /* This is about 10% faster than the generic C code below */
-    LONG Count = x2 - x1;
+  /* This is about 10% faster than the generic C code below */
+  LONG Count = x2 - x1;
 
-    __asm__ __volatile__ (
+  __asm__ __volatile__ (
     "  cld\n"
     "  mov  %0, %%eax\n"
     "  shl  $16, %%eax\n"
@@ -69,28 +60,28 @@ DIB_16BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
     "  stosw\n"
     "1:\n"
     : /* no output */
-    : "r"(c), "r"(Count), "m"(addr)
+  : "r"(c), "r"(Count), "m"(addr)
     : "%eax", "%ecx", "%edi");
 #else /* _M_IX86 */
-    LONG cx = x1;
-    DWORD cc;
-
-    if (0 != (cx & 0x01))
-    {
-        *((PWORD) addr) = c;
-        cx++;
-        addr = (PDWORD)((PWORD)(addr) + 1);
-    }
-    cc = ((c & 0xffff) << 16) | (c & 0xffff);
-    while(cx + 1 < x2)
-    {
-        *addr++ = cc;
-        cx += 2;
-    }
-    if (cx < x2)
-    {
-        *((PWORD) addr) = c;
-    }
+  LONG cx = x1;
+  DWORD cc;
+
+  if (0 != (cx & 0x01))
+  {
+    *((PWORD) addr) = c;
+    cx++;
+    addr = (PDWORD)((PWORD)(addr) + 1);
+  }
+  cc = ((c & 0xffff) << 16) | (c & 0xffff);
+  while(cx + 1 < x2)
+  {
+    *addr++ = cc;
+    cx += 2;
+  }
+  if (cx < x2)
+  {
+    *((PWORD) addr) = c;
+  }
 #endif /* _M_IX86 */
 }
 
@@ -99,7 +90,7 @@ VOID
 DIB_16BPP_VLine(SURFOBJ *SurfObj, LONG x, LONG y1, LONG y2, ULONG c)
 {
 #if defined(_M_IX86) && !defined(_MSC_VER)
-    asm volatile(
+  asm volatile(
     "   testl %2, %2"       "\n\t"
     "   jle   2f"           "\n\t"
     "   movl  %2, %%ecx"    "\n\t"
@@ -126,1073 +117,412 @@ DIB_16BPP_VLine(SURFOBJ *SurfObj, LONG x, LONG y1, LONG y2, ULONG c)
     "   jnz   1b"           "\n\t"
     "2:"                    "\n\t"
     : /* no output */
-    : "r"((PBYTE)SurfObj->pvScan0 + (y1 * SurfObj->lDelta) + (x * sizeof (WORD))),
-      "r"(SurfObj->lDelta), "r"(y2 - y1), "a"(c)
+  : "r"((PBYTE)SurfObj->pvScan0 + (y1 * SurfObj->lDelta) + (x * sizeof (WORD))),
+    "r"(SurfObj->lDelta), "r"(y2 - y1), "a"(c)
     : "cc", "memory", "%ecx");
 #else
-    PBYTE byteaddr = (PBYTE)(ULONG_PTR)SurfObj->pvScan0 + y1 * SurfObj->lDelta;
-    PWORD addr = (PWORD)byteaddr + x;
-    LONG lDelta = SurfObj->lDelta;
+  PBYTE byteaddr = (PBYTE)(ULONG_PTR)SurfObj->pvScan0 + y1 * SurfObj->lDelta;
+  PWORD addr = (PWORD)byteaddr + x;
+  LONG lDelta = SurfObj->lDelta;
 
-    byteaddr = (PBYTE)addr;
-    while(y1++ < y2)
-    {
-        *addr = (WORD)c;
+  byteaddr = (PBYTE)addr;
+  while(y1++ < y2)
+  {
+    *addr = (WORD)c;
 
-        byteaddr += lDelta;
-        addr = (PWORD)byteaddr;
-    }
+    byteaddr += lDelta;
+    addr = (PWORD)byteaddr;
+  }
 #endif
 }
 
 BOOLEAN
 DIB_16BPP_BitBltSrcCopy(PBLTINFO BltInfo)
 {
-    LONG     i, j, sx, sy, xColor, f1;
-    PBYTE    SourceBits, DestBits, SourceLine, DestLine;
-    PBYTE    SourceBits_4BPP, SourceLine_4BPP;
-    DestBits = (PBYTE)BltInfo->DestSurface->pvScan0 + (BltInfo->DestRect.top * BltInfo->DestSurface->lDelta) + 2 * BltInfo->DestRect.left;
-
-    switch(BltInfo->SourceSurface->iBitmapFormat)
-    {
-        case BMF_1BPP:
-            sx = BltInfo->SourcePoint.x;
-            sy = BltInfo->SourcePoint.y;
-            for (j=BltInfo->DestRect.top; j<BltInfo->DestRect.bottom; j++)
-            {
-                sx = BltInfo->SourcePoint.x;
-                for (i=BltInfo->DestRect.left; i<BltInfo->DestRect.right; i++)
-                {
-                    if(DIB_1BPP_GetPixel(BltInfo->SourceSurface, sx, sy) == 0)
-                    {
-                        DIB_16BPP_PutPixel(BltInfo->DestSurface, i, j,
-                                  XLATEOBJ_iXlate(BltInfo->XlateSourceToDest, 0));
-                    }
-                    else
-                    {
-                        DIB_16BPP_PutPixel(BltInfo->DestSurface, i, j,
-                                  XLATEOBJ_iXlate(BltInfo->XlateSourceToDest, 1));
-                    }
-                    sx++;
-                }
-                sy++;
-            }
-            break;
-
-        case BMF_4BPP:
-            SourceBits_4BPP = (PBYTE)BltInfo->SourceSurface->pvScan0 +
-                           (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) +
-                           (BltInfo->SourcePoint.x >> 1);
-
-            for (j=BltInfo->DestRect.top; j<BltInfo->DestRect.bottom; j++)
-            {
-                SourceLine_4BPP = SourceBits_4BPP;
-                sx = BltInfo->SourcePoint.x;
-                f1 = sx & 1;
-
-                for (i=BltInfo->DestRect.left; i<BltInfo->DestRect.right; i++)
-                {
-                    xColor = XLATEOBJ_iXlate(BltInfo->XlateSourceToDest,
-                    (*SourceLine_4BPP & altnotmask[f1]) >> (4 * (1 - f1)));
-                    DIB_16BPP_PutPixel(BltInfo->DestSurface, i, j, xColor);
-                    if(f1 == 1)
-                    {
-                        SourceLine_4BPP++;
-                        f1 = 0;
-                    }
-                    else
-                    {
-                        f1 = 1;
-                    }
-                    sx++;
-                }
-                SourceBits_4BPP += BltInfo->SourceSurface->lDelta;
-            }
-            break;
-
-        case BMF_8BPP:
-            SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
-                         (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) +
-                         BltInfo->SourcePoint.x;
-            DestLine = DestBits;
-
-            for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
-            {
-                SourceBits = SourceLine;
-                DestBits = DestLine;
-
-                for (i = BltInfo->DestRect.left; i < BltInfo->DestRect.right; i++)
-                {
-                    *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
-                                           BltInfo->XlateSourceToDest, *SourceBits);
-                    SourceBits += 1;
-                    DestBits += 2;
-                }
-
-                SourceLine += BltInfo->SourceSurface->lDelta;
-                DestLine += BltInfo->DestSurface->lDelta;
-            }
-            break;
-
-        case BMF_16BPP:
-            if (NULL == BltInfo->XlateSourceToDest || 0 !=
-               (BltInfo->XlateSourceToDest->flXlate & XO_TRIVIAL))
-            {
-                if (BltInfo->DestRect.top < BltInfo->SourcePoint.y)
-                {
-                    SourceBits = (PBYTE)BltInfo->SourceSurface->pvScan0 +
-                                 (BltInfo->SourcePoint.y *
-                                 BltInfo->SourceSurface->lDelta) + 2 *
-                                 BltInfo->SourcePoint.x;
-
-                    for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
-                    {
-                        RtlMoveMemory(DestBits, SourceBits,
-                                      2 * (BltInfo->DestRect.right -
-                                      BltInfo->DestRect.left));
-
-                        SourceBits += BltInfo->SourceSurface->lDelta;
-                        DestBits += BltInfo->DestSurface->lDelta;
-                    }
-                }
-                else
-                {
-                    SourceBits = (PBYTE)BltInfo->SourceSurface->pvScan0 +
-                                 ((BltInfo->SourcePoint.y + BltInfo->DestRect.bottom -
-                                 BltInfo->DestRect.top - 1) *
-                                 BltInfo->SourceSurface->lDelta) + 2 *
-                                 BltInfo->SourcePoint.x;
-
-                    DestBits = (PBYTE)BltInfo->DestSurface->pvScan0 +
-                               ((BltInfo->DestRect.bottom - 1) *
-                               BltInfo->DestSurface->lDelta) + 2 *
-                               BltInfo->DestRect.left;
-
-                    for (j = BltInfo->DestRect.bottom - 1;
-                         BltInfo->DestRect.top <= j; j--)
-                    {
-                        RtlMoveMemory(DestBits, SourceBits, 2 *
-                                      (BltInfo->DestRect.right -
-                                      BltInfo->DestRect.left));
-
-                        SourceBits -= BltInfo->SourceSurface->lDelta;
-                        DestBits -= BltInfo->DestSurface->lDelta;
-                    }
-                }
-            }
-            else
-            {
-                if (BltInfo->DestRect.top < BltInfo->SourcePoint.y)
-                {
-                    SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
-                                 (BltInfo->SourcePoint.y *
-                                 BltInfo->SourceSurface->lDelta) + 2 *
-                                 BltInfo->SourcePoint.x;
-
-                    DestLine = DestBits;
-                    for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
-                    {
-                        SourceBits = SourceLine;
-                        DestBits = DestLine;
-                        for (i = BltInfo->DestRect.left; i <
-                                 BltInfo->DestRect.right; i++)
-                        {
-                            *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
-                                                  BltInfo->XlateSourceToDest,
-                                                  *((WORD *)SourceBits));
-                            SourceBits += 2;
-                            DestBits += 2;
-                        }
-                        SourceLine += BltInfo->SourceSurface->lDelta;
-                        DestLine += BltInfo->DestSurface->lDelta;
-                    }
-                }
-                else
-                {
-                    SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
-                                 ((BltInfo->SourcePoint.y +
-                                 BltInfo->DestRect.bottom -
-                                 BltInfo->DestRect.top - 1) *
-                                 BltInfo->SourceSurface->lDelta) + 2 *
-                                 BltInfo->SourcePoint.x;
-
-                    DestLine = (PBYTE)BltInfo->DestSurface->pvScan0 +
-                               ((BltInfo->DestRect.bottom - 1) *
-                               BltInfo->DestSurface->lDelta) + 2 *
-                               BltInfo->DestRect.left;
-
-                    for (j = BltInfo->DestRect.bottom - 1;
-                             BltInfo->DestRect.top <= j; j--)
-                    {
-                        SourceBits = SourceLine;
-                        DestBits = DestLine;
-                        for (i = BltInfo->DestRect.left; i <
-                                 BltInfo->DestRect.right; i++)
-                        {
-                            *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
-                                                  BltInfo->XlateSourceToDest,
-                                                  *((WORD *)SourceBits));
-                            SourceBits += 2;
-                            DestBits += 2;
-                        }
-                        SourceLine -= BltInfo->SourceSurface->lDelta;
-                        DestLine -= BltInfo->DestSurface->lDelta;
-                    }
-                }
-            }
-            break;
-
-        case BMF_24BPP:
-            SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
-                         (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) +
-                          3 * BltInfo->SourcePoint.x;
-
-            DestLine = DestBits;
-
-            for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
-            {
-                SourceBits = SourceLine;
-                DestBits = DestLine;
-
-                for (i = BltInfo->DestRect.left; i < BltInfo->DestRect.right; i++)
-                {
-                    xColor = (*(SourceBits + 2) << 0x10) +
-                             (*(SourceBits + 1) << 0x08) + (*(SourceBits));
-
-                    *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
-                                           BltInfo->XlateSourceToDest, xColor);
-
-                    SourceBits += 3;
-                    DestBits += 2;
-                }
-                SourceLine += BltInfo->SourceSurface->lDelta;
-                DestLine += BltInfo->DestSurface->lDelta;
-            }
-            break;
-
-        case BMF_32BPP:
-            SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
-                         (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) +
-                          4 * BltInfo->SourcePoint.x;
-
-            DestLine = DestBits;
-
-            for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
-            {
-                SourceBits = SourceLine;
-                DestBits = DestLine;
-
-                for (i = BltInfo->DestRect.left; i < BltInfo->DestRect.right; i++)
-                {
-                    *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
-                                           BltInfo->XlateSourceToDest,
-                                           *((PDWORD) SourceBits));
-                    SourceBits += 4;
-                    DestBits += 2;
-                }
-
-                SourceLine += BltInfo->SourceSurface->lDelta;
-                DestLine += BltInfo->DestSurface->lDelta;
-            }
-            break;
-
-        default:
-            DPRINT1("DIB_16BPP_Bitblt: Unhandled Source BPP: %u\n",
-                     BitsPerFormat(BltInfo->SourceSurface->iBitmapFormat));
-            return FALSE;
-    }
-
-    return TRUE;
-}
-
-/* Optimize for bitBlt */
-BOOLEAN
-DIB_16BPP_ColorFill(SURFOBJ* DestSurface, RECTL* DestRect, ULONG color)
-{
-    ULONG DestY;
-
-#if defined(_M_IX86) && !defined(_MSC_VER)
-    /* This is about 10% faster than the generic C code below */
-    ULONG delta = DestSurface->lDelta;
-    ULONG width = (DestRect->right - DestRect->left) ;
-    PULONG pos =  (PULONG) ((PBYTE)DestSurface->pvScan0 + DestRect->top * delta + (DestRect->left<<1));
-    color = (color&0xffff);  /* If the color value is "abcd", put "abcdabcd" into color */
-    color += (color<<16);
-
-    for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
+  LONG     i, j, sx, sy, xColor, f1;
+  PBYTE    SourceBits, DestBits, SourceLine, DestLine;
+  PBYTE    SourceBits_4BPP, SourceLine_4BPP;
+  DestBits = (PBYTE)BltInfo->DestSurface->pvScan0 + (BltInfo->DestRect.top * BltInfo->DestSurface->lDelta) + 2 * BltInfo->DestRect.left;
+
+  switch(BltInfo->SourceSurface->iBitmapFormat)
+  {
+  case BMF_1BPP:
+    sx = BltInfo->SourcePoint.x;
+    sy = BltInfo->SourcePoint.y;
+    for (j=BltInfo->DestRect.top; j<BltInfo->DestRect.bottom; j++)
     {
-        __asm__ __volatile__ (
-        "  cld\n"
-        "  mov  %1,%%ebx\n"
-        "  mov  %2,%%edi\n"
-        "  test $0x03, %%edi\n" /* Align to fullword boundary */
-        "  jz   .FL1\n"
-        "  stosw\n"
-        "  dec  %%ebx\n"
-        "  jz   .FL2\n"
-        ".FL1:\n"
-        "  mov  %%ebx,%%ecx\n"     /* Setup count of fullwords to fill */
-        "  shr  $1,%%ecx\n"
-        "  rep stosl\n"         /* The actual fill */
-        "  test $0x01, %%ebx\n"    /* One left to do at the right side? */
-        "  jz   .FL2\n"
-        "  stosw\n"
-        ".FL2:\n"
-        :
-        : "a" (color), "r" (width), "m" (pos)
-        : "%ecx", "%ebx", "%edi");
-        pos =(PULONG)((ULONG_PTR)pos + delta);
-    }
-#else /* _M_IX86 */
-
-    for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
-    {
-        DIB_16BPP_HLine (DestSurface, DestRect->left, DestRect->right, DestY, color);
-    }
-#endif
-    return TRUE;
-}
-/*
-=======================================
- Stretching functions goes below
- Some parts of code are based on an
- article "Bresenhame image scaling"
- Dr. Dobb Journal, May 2002
-=======================================
-*/
-
-typedef unsigned short PIXEL;
-
-/* 16-bit HiColor (565 format) */
-__inline PIXEL average16(PIXEL a, PIXEL b)
-{
-// This one should be correct, but it's too long
-/*
-    unsigned char r1, g1, b1, r2, g2, b2, rr, gr, br;
-    unsigned short res;
-
-    r1 = (a & 0xF800) >> 11;
-    g1 = (a & 0x7E0) >> 5;
-    b1 = (a & 0x1F);
-
-    r2 = (b & 0xF800) >> 11;
-    g2 = (b & 0x7E0) >> 5;
-    b2 = (b & 0x1F);
-
-    rr = (r1+r2) / 2;
-    gr = (g1+g2) / 2;
-    br = (b1+b2) / 2;
-
-    res = (rr << 11) + (gr << 5) + br;
-
-    return res;
-*/
-  // This one is the short form of the correct one, but does not work for QEMU (expects 555 format)
-  //return (((a ^ b) & 0xf7deU) >> 1) + (a & b);
-
-  //hack until short version works properly
-  return a;
-}
-
-//NOTE: If you change something here, please do the same in other dibXXbpp.c files!
-void ScaleLineAvg16(PIXEL *Target, PIXEL *Source, int SrcWidth, int TgtWidth)
-{
-    int NumPixels = TgtWidth;
-    int IntPart = SrcWidth / TgtWidth;
-    int FractPart = SrcWidth % TgtWidth;
-    int Mid = TgtWidth >> 1;
-    int E = 0;
-    int skip;
-    PIXEL p;
-
-    skip = (TgtWidth < SrcWidth) ? 0 : (TgtWidth / (2*SrcWidth) + 1);
-    NumPixels -= skip;
-
-    while (NumPixels-- > 0)
-    {
-        p = *Source;
-        if (E >= Mid)
+      sx = BltInfo->SourcePoint.x;
+      for (i=BltInfo->DestRect.left; i<BltInfo->DestRect.right; i++)
+      {
+        if(DIB_1BPP_GetPixel(BltInfo->SourceSurface, sx, sy) == 0)
         {
-            p = average16(p, *(Source+1));
+          DIB_16BPP_PutPixel(BltInfo->DestSurface, i, j,
+            XLATEOBJ_iXlate(BltInfo->XlateSourceToDest, 0));
         }
-        *Target++ = p;
-        Source += IntPart;
-        E += FractPart;
-        if (E >= TgtWidth)
+        else
         {
-            E -= TgtWidth;
-            Source++;
+          DIB_16BPP_PutPixel(BltInfo->DestSurface, i, j,
+            XLATEOBJ_iXlate(BltInfo->XlateSourceToDest, 1));
         }
+        sx++;
+      }
+      sy++;
     }
-    while (skip-- > 0)
-    {
-        *Target++ = *Source;
-    }
-}
+    break;
 
-static BOOLEAN
-FinalCopy16(PIXEL *Target, PIXEL *Source, PSPAN ClipSpans, UINT ClipSpansCount, UINT *SpanIndex,
-            UINT DestY, RECTL *DestRect)
-{
-    LONG Left, Right;
+  case BMF_4BPP:
+    SourceBits_4BPP = (PBYTE)BltInfo->SourceSurface->pvScan0 +
+      (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) +
+      (BltInfo->SourcePoint.x >> 1);
 
-    while ( ClipSpans[*SpanIndex].Y < DestY ||
-            (ClipSpans[*SpanIndex].Y == DestY &&
-            ClipSpans[*SpanIndex].X + ClipSpans[*SpanIndex].Width < DestRect->left))
+    for (j=BltInfo->DestRect.top; j<BltInfo->DestRect.bottom; j++)
     {
-        (*SpanIndex)++;
-        if (ClipSpansCount <= *SpanIndex)
+      SourceLine_4BPP = SourceBits_4BPP;
+      sx = BltInfo->SourcePoint.x;
+      f1 = sx & 1;
+
+      for (i=BltInfo->DestRect.left; i<BltInfo->DestRect.right; i++)
+      {
+        xColor = XLATEOBJ_iXlate(BltInfo->XlateSourceToDest,
+          (*SourceLine_4BPP & altnotmask[f1]) >> (4 * (1 - f1)));
+        DIB_16BPP_PutPixel(BltInfo->DestSurface, i, j, xColor);
+        if(f1 == 1)
         {
-            /* No more spans, everything else is clipped away, we're done */
-            return FALSE;
+          SourceLine_4BPP++;
+          f1 = 0;
         }
-    }
-    while (ClipSpans[*SpanIndex].Y == DestY)
-    {
-        if (ClipSpans[*SpanIndex].X < DestRect->right)
+        else
         {
-            Left = max(ClipSpans[*SpanIndex].X, DestRect->left);
-
-            Right = min(ClipSpans[*SpanIndex].X + ClipSpans[*SpanIndex].Width,
-                    DestRect->right);
-
-            memcpy(Target + Left - DestRect->left, Source + Left - DestRect->left,
-                 (Right - Left) * sizeof(PIXEL));
-        }
-
-        (*SpanIndex)++;
-
-        if (ClipSpansCount <= *SpanIndex)
-        {
-          /* No more spans, everything else is clipped away, we're done */
-          return FALSE;
+          f1 = 1;
         }
+        sx++;
+      }
+      SourceBits_4BPP += BltInfo->SourceSurface->lDelta;
     }
+    break;
 
-  return TRUE;
-}
+  case BMF_8BPP:
+    SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
+      (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) +
+      BltInfo->SourcePoint.x;
+    DestLine = DestBits;
 
-//NOTE: If you change something here, please do the same in other dibXXbpp.c files!
-BOOLEAN ScaleRectAvg16(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
-                       RECTL* DestRect, RECTL *SourceRect,
-                       POINTL* MaskOrigin, POINTL BrushOrigin,
-                       CLIPOBJ *ClipRegion, XLATEOBJ *ColorTranslation,
-                       ULONG Mode)
-{
-    int NumPixels = DestRect->bottom - DestRect->top;
-
-    int IntPart = (((SourceRect->bottom - SourceRect->top) /
-                  (DestRect->bottom - DestRect->top)) * SourceSurf->lDelta) >> 1;
-
-    int FractPart = (SourceRect->bottom - SourceRect->top) %
-                    (DestRect->bottom - DestRect->top);
-
-    int Mid = (DestRect->bottom - DestRect->top) >> 1;
-    int E = 0;
-    int skip;
-    PIXEL *ScanLine, *ScanLineAhead;
-    PIXEL *PrevSource = NULL;
-    PIXEL *PrevSourceAhead = NULL;
-
-    PIXEL *Target = (PIXEL *) ((PBYTE)DestSurf->pvScan0 + (DestRect->top *
-                    DestSurf->lDelta) + 2 * DestRect->left);
-
-    PIXEL *Source = (PIXEL *) ((PBYTE)SourceSurf->pvScan0 + (SourceRect->top *
-                    SourceSurf->lDelta) + 2 * SourceRect->left);
-
-    PSPAN ClipSpans;
-    UINT ClipSpansCount;
-    UINT SpanIndex;
-    LONG DestY;
-
-    if (! ClipobjToSpans(&ClipSpans, &ClipSpansCount, ClipRegion, DestRect))
+    for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
     {
-        return FALSE;
-    }
-    if (0 == ClipSpansCount)
-    {
-        /* No clip spans == empty clipping region, everything clipped away */
-        ASSERT(NULL == ClipSpans);
-        return TRUE;
-    }
-    skip = (DestRect->bottom - DestRect->top < SourceRect->bottom - SourceRect->top)
-            ? 0 : ((DestRect->bottom - DestRect->top) /
-                  (2 * (SourceRect->bottom - SourceRect->top)) + 1);
-
-    NumPixels -= skip;
-
-    ScanLine = (PIXEL*)ExAllocatePool(PagedPool, (DestRect->right - DestRect->left) *
-                sizeof(PIXEL));
+      SourceBits = SourceLine;
+      DestBits = DestLine;
+
+      for (i = BltInfo->DestRect.left; i < BltInfo->DestRect.right; i++)
+      {
+        *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
+          BltInfo->XlateSourceToDest, *SourceBits);
+        SourceBits += 1;
+        DestBits += 2;
+      }
 
-    ScanLineAhead = (PIXEL *)ExAllocatePool(PagedPool, (DestRect->right -
-                    DestRect->left) * sizeof(PIXEL));
-  
-    if (!ScanLine || !ScanLineAhead)
-    {
-      if (ScanLine) ExFreePool(ScanLine);
-      if (ScanLineAhead) ExFreePool(ScanLineAhead);
-      return FALSE;
+      SourceLine += BltInfo->SourceSurface->lDelta;
+      DestLine += BltInfo->DestSurface->lDelta;
     }
+    break;
 
-    DestY = DestRect->top;
-    SpanIndex = 0;
-    while (NumPixels-- > 0)
+  case BMF_16BPP:
+    if (NULL == BltInfo->XlateSourceToDest || 0 !=
+      (BltInfo->XlateSourceToDest->flXlate & XO_TRIVIAL))
     {
-        if (Source != PrevSource)
+      if (BltInfo->DestRect.top < BltInfo->SourcePoint.y)
+      {
+        SourceBits = (PBYTE)BltInfo->SourceSurface->pvScan0 +
+          (BltInfo->SourcePoint.y *
+          BltInfo->SourceSurface->lDelta) + 2 *
+          BltInfo->SourcePoint.x;
+
+        for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
         {
-            if (Source == PrevSourceAhead)
-            {
-                /* the next scan line has already been scaled and stored in
-                 * ScanLineAhead; swap the buffers that ScanLine and ScanLineAhead
-                 * point to
-                 */
-                PIXEL *tmp = ScanLine;
-                ScanLine = ScanLineAhead;
-                ScanLineAhead = tmp;
-            }
-            else
-            {
-                ScaleLineAvg16(ScanLine, Source, SourceRect->right - SourceRect->left,
-                DestRect->right - DestRect->left);
-            }
-            PrevSource = Source;
-        }
-
-        if (E >= Mid && PrevSourceAhead != (PIXEL *)((BYTE *)Source +
-            SourceSurf->lDelta))
-        {
-            int x;
-
-            ScaleLineAvg16(ScanLineAhead, (PIXEL *)((BYTE *)Source +
-                           SourceSurf->lDelta), SourceRect->right -
-                           SourceRect->left, DestRect->right - DestRect->left);
+          RtlMoveMemory(DestBits, SourceBits,
+            2 * (BltInfo->DestRect.right -
+            BltInfo->DestRect.left));
 
-            for (x = 0; x < DestRect->right - DestRect->left; x++)
-            {
-                ScanLine[x] = average16(ScanLine[x], ScanLineAhead[x]);
-            }
-
-            PrevSourceAhead = (PIXEL *)((BYTE *)Source + SourceSurf->lDelta);
+          SourceBits += BltInfo->SourceSurface->lDelta;
+          DestBits += BltInfo->DestSurface->lDelta;
         }
-
-        if (! FinalCopy16(Target, ScanLine, ClipSpans, ClipSpansCount, &SpanIndex, DestY, DestRect))
+      }
+      else
+      {
+        SourceBits = (PBYTE)BltInfo->SourceSurface->pvScan0 +
+          ((BltInfo->SourcePoint.y + BltInfo->DestRect.bottom -
+          BltInfo->DestRect.top - 1) *
+          BltInfo->SourceSurface->lDelta) + 2 *
+          BltInfo->SourcePoint.x;
+
+        DestBits = (PBYTE)BltInfo->DestSurface->pvScan0 +
+          ((BltInfo->DestRect.bottom - 1) *
+          BltInfo->DestSurface->lDelta) + 2 *
+          BltInfo->DestRect.left;
+
+        for (j = BltInfo->DestRect.bottom - 1;
+          BltInfo->DestRect.top <= j; j--)
         {
-            /* No more spans, everything else is clipped away, we're done */
-            ExFreePool(ClipSpans);
-            ExFreePool(ScanLine);
-            ExFreePool(ScanLineAhead);
-            return TRUE;
-        }
+          RtlMoveMemory(DestBits, SourceBits, 2 *
+            (BltInfo->DestRect.right -
+            BltInfo->DestRect.left));
 
-        DestY++;
-        Target = (PIXEL *)((BYTE *)Target + DestSurf->lDelta);
-        Source += IntPart;
-        E += FractPart;
-
-        if (E >= DestRect->bottom - DestRect->top)
-        {
-            E -= DestRect->bottom - DestRect->top;
-            Source = (PIXEL *)((BYTE *)Source + SourceSurf->lDelta);
+          SourceBits -= BltInfo->SourceSurface->lDelta;
+          DestBits -= BltInfo->DestSurface->lDelta;
         }
-    } /* while */
-
-    if (skip > 0 && Source != PrevSource)
-    {
-        ScaleLineAvg16(ScanLine, Source, SourceRect->right - SourceRect->left,
-                       DestRect->right - DestRect->left);
+      }
     }
-
-    while (skip-- > 0)
+    else
     {
-        if (! FinalCopy16(Target, ScanLine, ClipSpans, ClipSpansCount, &SpanIndex,
-                          DestY, DestRect))
+      if (BltInfo->DestRect.top < BltInfo->SourcePoint.y)
+      {
+        SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
+          (BltInfo->SourcePoint.y *
+          BltInfo->SourceSurface->lDelta) + 2 *
+          BltInfo->SourcePoint.x;
+
+        DestLine = DestBits;
+        for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
         {
-            /* No more spans, everything else is clipped away, we're done */
-            ExFreePool(ClipSpans);
-            ExFreePool(ScanLine);
-            ExFreePool(ScanLineAhead);
-            return TRUE;
+          SourceBits = SourceLine;
+          DestBits = DestLine;
+          for (i = BltInfo->DestRect.left; i <
+            BltInfo->DestRect.right; i++)
+          {
+            *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
+              BltInfo->XlateSourceToDest,
+              *((WORD *)SourceBits));
+            SourceBits += 2;
+            DestBits += 2;
+          }
+          SourceLine += BltInfo->SourceSurface->lDelta;
+          DestLine += BltInfo->DestSurface->lDelta;
         }
-        DestY++;
-        Target = (PIXEL *)((BYTE *)Target + DestSurf->lDelta);
+      }
+      else
+      {
+        SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
+          ((BltInfo->SourcePoint.y +
+          BltInfo->DestRect.bottom -
+          BltInfo->DestRect.top - 1) *
+          BltInfo->SourceSurface->lDelta) + 2 *
+          BltInfo->SourcePoint.x;
+
+        DestLine = (PBYTE)BltInfo->DestSurface->pvScan0 +
+          ((BltInfo->DestRect.bottom - 1) *
+          BltInfo->DestSurface->lDelta) + 2 *
+          BltInfo->DestRect.left;
+
+        for (j = BltInfo->DestRect.bottom - 1;
+          BltInfo->DestRect.top <= j; j--)
+        {
+          SourceBits = SourceLine;
+          DestBits = DestLine;
+          for (i = BltInfo->DestRect.left; i <
+            BltInfo->DestRect.right; i++)
+          {
+            *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
+              BltInfo->XlateSourceToDest,
+              *((WORD *)SourceBits));
+            SourceBits += 2;
+            DestBits += 2;
+          }
+          SourceLine -= BltInfo->SourceSurface->lDelta;
+          DestLine -= BltInfo->DestSurface->lDelta;
+        }
+      }
     }
+    break;
 
-    ExFreePool(ClipSpans);
-    ExFreePool(ScanLine);
-    ExFreePool(ScanLineAhead);
-
-    return TRUE;
-}
-
-
-//NOTE: If you change something here, please do the same in other dibXXbpp.c files!
-BOOLEAN DIB_16BPP_StretchBlt(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
-                             RECTL* DestRect, RECTL *SourceRect,
-                             POINTL* MaskOrigin, POINTL BrushOrigin,
-                             CLIPOBJ *ClipRegion, XLATEOBJ *ColorTranslation,
-                             ULONG Mode)
-{
-   LONG SrcSizeY;
-   LONG SrcSizeX;
-   LONG DesSizeY;
-   LONG DesSizeX;
-   LONG sx = 0;
-   LONG sy = 0;
-   LONG DesX;
-   LONG DesY;
-   PULONG DestBits;
-   LONG DifflDelta;
-
-   LONG SrcZoomXHight;
-   LONG SrcZoomXLow;
-   LONG SrcZoomYHight;
-   LONG SrcZoomYLow;
-
-   LONG sy_dec = 0;
-   LONG sy_max;
-
-   LONG sx_dec = 0;
-   LONG sx_max;
-
-  DPRINT("DIB_16BPP_StretchBlt: Source BPP: %u, srcRect: (%d,%d)-(%d,%d), dstRect: (%d,%d)-(%d,%d)\n",
-     BitsPerFormat(SourceSurf->iBitmapFormat), SourceRect->left, SourceRect->top, SourceRect->right, SourceRect->bottom,
-     DestRect->left, DestRect->top, DestRect->right, DestRect->bottom);
-
-    /* Calc the Zoom height of Source */
-    SrcSizeY = SourceRect->bottom - SourceRect->top;
-
-    /* Calc the Zoom Width of Source */
-    SrcSizeX = SourceRect->right - SourceRect->left;
-
-    /* Calc the Zoom height of Destinations */
-    DesSizeY = DestRect->bottom - DestRect->top;
-
-    /* Calc the Zoom width of Destinations */
-    DesSizeX = DestRect->right - DestRect->left;
+  case BMF_24BPP:
+    SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
+      (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) +
+      3 * BltInfo->SourcePoint.x;
 
-    /* Calc the zoom factor of source height */
-    SrcZoomYHight = SrcSizeY / DesSizeY;
-    SrcZoomYLow = SrcSizeY - (SrcZoomYHight * DesSizeY);
+    DestLine = DestBits;
 
-    /* Calc the zoom factor of source width */
-    SrcZoomXHight = SrcSizeX / DesSizeX;
-    SrcZoomXLow = SrcSizeX - (SrcZoomXHight * DesSizeX);
-
-    sx_max = DesSizeX;
-    sy_max = DesSizeY;
-    sy = SourceRect->top;
-
-    DestBits = (PULONG)((PBYTE)DestSurf->pvScan0 + (DestRect->left << 1) +
-                               DestRect->top * DestSurf->lDelta);
-
-    DifflDelta = DestSurf->lDelta -  (DesSizeX << 1);
-
-    switch(SourceSurf->iBitmapFormat)
+    for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
     {
+      SourceBits = SourceLine;
+      DestBits = DestLine;
 
-      case BMF_1BPP:
-        /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
-        /* This is a reference implementation, it hasn't been optimized for speed */
-
-       for (DesY=0; DesY<DesSizeY; DesY++)
-       {
-            sx = SourceRect->left;
-            sx_dec = 0;
-
-            for (DesX=0; DesX<DesSizeX; DesX++)
-            {
-                *DestBits = XLATEOBJ_iXlate(ColorTranslation,
-                                            DIB_1BPP_GetPixel(SourceSurf, sx, sy));
-
-                DestBits = (PULONG)((ULONG_PTR)DestBits + 2);
-
-                sx += SrcZoomXHight;
-                sx_dec += SrcZoomXLow;
-                if (sx_dec >= sx_max)
-                {
-                    sx++;
-                    sx_dec -= sx_max;
-                }
-            }
-
-            DestBits = (PULONG)((ULONG_PTR)DestBits + DifflDelta);
-
-            sy += SrcZoomYHight;
-            sy_dec += SrcZoomYLow;
-            if (sy_dec >= sy_max)
-            {
-                sy++;
-                sy_dec -= sy_max;
-            }
-       }
-       break;
-
-      case BMF_4BPP:
-        /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
-        /* This is a reference implementation, it hasn't been optimized for speed */
-
-        for (DesY=0; DesY<DesSizeY; DesY++)
-        {
-            sx = SourceRect->left;
-            sx_dec = 0;
-
-            for (DesX=0; DesX<DesSizeX; DesX++)
-            {
-                  *DestBits = XLATEOBJ_iXlate(ColorTranslation,
-                                          DIB_4BPP_GetPixel(SourceSurf, sx, sy));
-
-                  DestBits = (PULONG)((ULONG_PTR)DestBits + 2);
+      for (i = BltInfo->DestRect.left; i < BltInfo->DestRect.right; i++)
+      {
+        xColor = (*(SourceBits + 2) << 0x10) +
+          (*(SourceBits + 1) << 0x08) + (*(SourceBits));
 
-                  sx += SrcZoomXHight;
-                  sx_dec += SrcZoomXLow;
-                  if (sx_dec >= sx_max)
-                  {
-                        sx++;
-                        sx_dec -= sx_max;
-                  }
-            }
+        *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
+          BltInfo->XlateSourceToDest, xColor);
 
-            DestBits = (PULONG)((ULONG_PTR)DestBits + DifflDelta);
-
-            sy += SrcZoomYHight;
-            sy_dec += SrcZoomYLow;
-            if (sy_dec >= sy_max)
-            {
-                sy++;
-                sy_dec -= sy_max;
-            }
-       }
-       break;
-
-      case BMF_8BPP:
-        /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
-        /* This is a reference implementation, it hasn't been optimized for speed */
-
-        for (DesY=0; DesY<DesSizeY; DesY++)
-        {
-            sx = SourceRect->left;
-            sx_dec = 0;
-
-            for (DesX=0; DesX<DesSizeX; DesX++)
-            {
-                  *DestBits = XLATEOBJ_iXlate(ColorTranslation,
-                                          DIB_8BPP_GetPixel(SourceSurf, sx, sy));
-
-                   DestBits = (PULONG)((ULONG_PTR)DestBits + 2);
-
-                   sx += SrcZoomXHight;
-                   sx_dec += SrcZoomXLow;
-                   if (sx_dec >= sx_max)
-                   {
-                        sx++;
-                        sx_dec -= sx_max;
-                   }
-            }
-
-            DestBits = (PULONG)((ULONG_PTR)DestBits + DifflDelta);
-
-            sy += SrcZoomYHight;
-            sy_dec += SrcZoomYLow;
-            if (sy_dec >= sy_max)
-            {
-                sy++;
-                sy_dec -= sy_max;
-            }
-       }
-       break;
-
-
-      case BMF_24BPP:
-        /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
-        /* This is a reference implementation, it hasn't been optimized for speed */
-
-        DestBits = (PULONG)((PBYTE)DestSurf->pvScan0 + (DestRect->left << 1) +
-                   DestRect->top * DestSurf->lDelta);
-
-        DifflDelta = DestSurf->lDelta -  (DesSizeX << 1);
-
-        for (DesY=0; DesY<DesSizeY; DesY++)
-        {
-            sx = SourceRect->left;
-            sx_dec = 0;
-
-            for (DesX=0; DesX<DesSizeX; DesX++)
-            {
-                *DestBits = XLATEOBJ_iXlate(ColorTranslation,
-                                        DIB_24BPP_GetPixel(SourceSurf, sx, sy));
-
-                DestBits = (PULONG)((ULONG_PTR)DestBits + 2);
-
-                sx += SrcZoomXHight;
-                sx_dec += SrcZoomXLow;
-                if (sx_dec >= sx_max)
-                {
-                    sx++;
-                    sx_dec -= sx_max;
-                }
-            }
-
-            DestBits = (PULONG)((ULONG_PTR)DestBits + DifflDelta);
-
-            sy += SrcZoomYHight;
-            sy_dec += SrcZoomYLow;
-            if (sy_dec >= sy_max)
-            {
-                sy++;
-                sy_dec -= sy_max;
-            }
-       }
-       break;
-
-      case BMF_32BPP:
-        /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
-        /* This is a reference implementation, it hasn't been optimized for speed */
-
-        for (DesY=0; DesY<DesSizeY; DesY++)
-        {
-            sx = SourceRect->left;
-            sx_dec = 0;
+        SourceBits += 3;
+        DestBits += 2;
+      }
+      SourceLine += BltInfo->SourceSurface->lDelta;
+      DestLine += BltInfo->DestSurface->lDelta;
+    }
+    break;
 
-            for (DesX=0; DesX<DesSizeX; DesX++)
-            {
-                *DestBits = XLATEOBJ_iXlate(ColorTranslation,
-                                        DIB_32BPP_GetPixel(SourceSurf, sx, sy));
+  case BMF_32BPP:
+    SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 +
+      (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) +
+      4 * BltInfo->SourcePoint.x;
 
-                DestBits = (PULONG)((ULONG_PTR)DestBits + 2);
+    DestLine = DestBits;
 
-                sx += SrcZoomXHight;
-                sx_dec += SrcZoomXLow;
-                if (sx_dec >= sx_max)
-                {
-                    sx++;
-                    sx_dec -= sx_max;
-                }
-            }
-            DestBits = (PULONG)((ULONG_PTR)DestBits + DifflDelta);
+    for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
+    {
+      SourceBits = SourceLine;
+      DestBits = DestLine;
+
+      for (i = BltInfo->DestRect.left; i < BltInfo->DestRect.right; i++)
+      {
+        *((WORD *)DestBits) = (WORD)XLATEOBJ_iXlate(
+          BltInfo->XlateSourceToDest,
+          *((PDWORD) SourceBits));
+        SourceBits += 4;
+        DestBits += 2;
+      }
 
-            sy += SrcZoomYHight;
-            sy_dec += SrcZoomYLow;
-            if (sy_dec >= sy_max)
-            {
-                sy++;
-                sy_dec -= sy_max;
-            }
-        }
-        break;
+      SourceLine += BltInfo->SourceSurface->lDelta;
+      DestLine += BltInfo->DestSurface->lDelta;
+    }
+    break;
 
-      case BMF_16BPP:
-        return ScaleRectAvg16(DestSurf, SourceSurf, DestRect, SourceRect, MaskOrigin, BrushOrigin,
-                              ClipRegion, ColorTranslation, Mode);
-      break;
+  default:
+    DPRINT1("DIB_16BPP_Bitblt: Unhandled Source BPP: %u\n",
+      BitsPerFormat(BltInfo->SourceSurface->iBitmapFormat));
+    return FALSE;
+  }
 
-      default:
-         DPRINT1("DIB_16BPP_StretchBlt: Unhandled Source BPP: %u\n", BitsPerFormat(SourceSurf->iBitmapFormat));
-      return FALSE;
-    }
+  return TRUE;
+}
 
+/* Optimize for bitBlt */
+BOOLEAN
+DIB_16BPP_ColorFill(SURFOBJ* DestSurface, RECTL* DestRect, ULONG color)
+{
+  ULONG DestY;
 
+#if defined(_M_IX86) && !defined(_MSC_VER)
+  /* This is about 10% faster than the generic C code below */
+  ULONG delta = DestSurface->lDelta;
+  ULONG width = (DestRect->right - DestRect->left) ;
+  PULONG pos =  (PULONG) ((PBYTE)DestSurface->pvScan0 + DestRect->top * delta + (DestRect->left<<1));
+  color = (color&0xffff);  /* If the color value is "abcd", put "abcdabcd" into color */
+  color += (color<<16);
+
+  for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
+  {
+    __asm__ __volatile__ (
+      "  cld\n"
+      "  mov  %1,%%ebx\n"
+      "  mov  %2,%%edi\n"
+      "  test $0x03, %%edi\n" /* Align to fullword boundary */
+      "  jz   .FL1\n"
+      "  stosw\n"
+      "  dec  %%ebx\n"
+      "  jz   .FL2\n"
+      ".FL1:\n"
+      "  mov  %%ebx,%%ecx\n"     /* Setup count of fullwords to fill */
+      "  shr  $1,%%ecx\n"
+      "  rep stosl\n"         /* The actual fill */
+      "  test $0x01, %%ebx\n"    /* One left to do at the right side? */
+      "  jz   .FL2\n"
+      "  stosw\n"
+      ".FL2:\n"
+      :
+    : "a" (color), "r" (width), "m" (pos)
+      : "%ecx", "%ebx", "%edi");
+    pos =(PULONG)((ULONG_PTR)pos + delta);
+  }
+#else /* _M_IX86 */
 
+  for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
+  {
+    DIB_16BPP_HLine (DestSurface, DestRect->left, DestRect->right, DestY, color);
+  }
+#endif
   return TRUE;
 }
 
 BOOLEAN
 DIB_16BPP_TransparentBlt(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
-                         RECTL*  DestRect,  POINTL  *SourcePoint,
+                         RECTL*  DestRect,  RECTL *SourceRect,
                          XLATEOBJ *ColorTranslation, ULONG iTransColor)
 {
-    ULONG RoundedRight, X, Y, SourceX, SourceY, Source, wd, Dest;
-    ULONG *DestBits;
-
-    RoundedRight = DestRect->right - ((DestRect->right - DestRect->left) & 0x1);
-    SourceY = SourcePoint->y;
-    DestBits = (ULONG*)((PBYTE)DestSurf->pvScan0 +
-                      (DestRect->left << 1) +
-                      DestRect->top * DestSurf->lDelta);
-    wd = DestSurf->lDelta - ((DestRect->right - DestRect->left) << 1);
-
-    for(Y = DestRect->top; Y < DestRect->bottom; Y++)
+  ULONG RoundedRight, X, Y, SourceX = 0, SourceY = 0, Source, wd, Dest;
+  ULONG *DestBits;
+
+  LONG DstHeight;
+  LONG DstWidth;
+  LONG SrcHeight;
+  LONG SrcWidth;
+
+  DstHeight = DestRect->bottom - DestRect->top;
+  DstWidth = DestRect->right - DestRect->left;
+  SrcHeight = SourceRect->bottom - SourceRect->top;
+  SrcWidth = SourceRect->right - SourceRect->left;
+
+  RoundedRight = DestRect->right - ((DestRect->right - DestRect->left) & 0x1);
+  DestBits = (ULONG*)((PBYTE)DestSurf->pvScan0 +
+    (DestRect->left << 1) +
+    DestRect->top * DestSurf->lDelta);
+  wd = DestSurf->lDelta - ((DestRect->right - DestRect->left) << 1);
+
+  for(Y = DestRect->top; Y < DestRect->bottom; Y++)
+  {
+    SourceY = SourceRect->top+(Y - DestRect->top) * SrcHeight / DstHeight;
+    for(X = DestRect->left; X < RoundedRight; X += 2, DestBits++, SourceX += 2)
     {
-        SourceX = SourcePoint->x;
-        for(X = DestRect->left; X < RoundedRight; X += 2, DestBits++, SourceX += 2)
+      Dest = *DestBits;
+
+      SourceX = SourceRect->left+(X - DestRect->left) * SrcWidth / DstWidth;
+      if (SourceX >= 0 && SourceY >= 0 &&
+        SourceSurf->sizlBitmap.cx > SourceX && SourceSurf->sizlBitmap.cy > SourceY)
+      {
+        Source = DIB_GetSourceIndex(SourceSurf, SourceX, SourceY);
+        if(Source != iTransColor)
         {
-            Dest = *DestBits;
-            Source = DIB_GetSourceIndex(SourceSurf, SourceX, SourceY);
-
-            if(Source != iTransColor)
-            {
-                Dest &= 0xFFFF0000;
-                Dest |= (XLATEOBJ_iXlate(ColorTranslation, Source) & 0xFFFF);
-            }
-
-            Source = DIB_GetSourceIndex(SourceSurf, SourceX + 1, SourceY);
-            if(Source != iTransColor)
-            {
-                Dest &= 0xFFFF;
-                Dest |= (XLATEOBJ_iXlate(ColorTranslation, Source) << 16);
-            }
-
-            *DestBits = Dest;
-            }
-
-            if(X < DestRect->right)
-            {
-                Source = DIB_GetSourceIndex(SourceSurf, SourceX, SourceY);
-                if(Source != iTransColor)
-                {
-                    *((USHORT*)DestBits) = (USHORT)XLATEOBJ_iXlate(ColorTranslation,
-                                                                   Source);
-                }
-
-                DestBits = (PULONG)((ULONG_PTR)DestBits + 2);
-            }
-
-            SourceY++;
-            DestBits = (ULONG*)((ULONG_PTR)DestBits + wd);
+          Dest &= 0xFFFF0000;
+          Dest |= (XLATEOBJ_iXlate(ColorTranslation, Source) & 0xFFFF);
         }
+      }
 
-    return TRUE;
-}
-
-typedef union
-{
-    ULONG ul;
-    struct
-    {
-        UCHAR red;
-        UCHAR green;
-        UCHAR blue;
-        UCHAR alpha;
-    } col;
-} NICEPIXEL32;
-
-typedef union
-{
-    USHORT us;
-    struct
-    {
-        USHORT  red:5,
-                green:6,
-                blue:5;
-   } col;
-} NICEPIXEL16;
-
-static __inline UCHAR
-Clamp5(ULONG val)
-{
-    return (val > 31) ? 31 : val;
-}
-
-static __inline UCHAR
-Clamp6(ULONG val)
-{
-    return (val > 63) ? 63 : val;
-}
-
-BOOLEAN
-DIB_16BPP_AlphaBlend(SURFOBJ* Dest, SURFOBJ* Source, RECTL* DestRect,
-                     RECTL* SourceRect, CLIPOBJ* ClipRegion,
-                     XLATEOBJ* ColorTranslation, BLENDOBJ* BlendObj)
-{
-    INT Rows, Cols, SrcX, SrcY;
-    register PUSHORT Dst;
-    ULONG DstDelta;
-    BLENDFUNCTION BlendFunc;
-    register NICEPIXEL16 DstPixel;
-    register NICEPIXEL32 SrcPixel;
-    UCHAR Alpha, SrcBpp;
-
-    DPRINT("DIB_16BPP_AlphaBlend: srcRect: (%d,%d)-(%d,%d), dstRect: (%d,%d)-(%d,%d)\n",
-           SourceRect->left, SourceRect->top, SourceRect->right, SourceRect->bottom,
-           DestRect->left, DestRect->top, DestRect->right, DestRect->bottom);
-
-    ASSERT(DestRect->bottom - DestRect->top == SourceRect->bottom - SourceRect->top &&
-           DestRect->right - DestRect->left == SourceRect->right - SourceRect->left);
+      SourceX = SourceRect->left+(X+1 - DestRect->left) * SrcWidth / DstWidth;
+      if (SourceX >= 0 && SourceY >= 0 &&
+        SourceSurf->sizlBitmap.cx > SourceX && SourceSurf->sizlBitmap.cy > SourceY)
+      {
+        Source = DIB_GetSourceIndex(SourceSurf, SourceX, SourceY);
+        if(Source != iTransColor)
+        {
+          Dest &= 0xFFFF;
+          Dest |= (XLATEOBJ_iXlate(ColorTranslation, Source) << 16);
+        }
+      }
 
-    BlendFunc = BlendObj->BlendFunction;
-    if (BlendFunc.BlendOp != AC_SRC_OVER)
-    {
-        DPRINT1("BlendOp != AC_SRC_OVER\n");
-        return FALSE;
-    }
-    if (BlendFunc.BlendFlags != 0)
-    {
-        DPRINT1("BlendFlags != 0\n");
-        return FALSE;
-    }
-    if ((BlendFunc.AlphaFormat & ~AC_SRC_ALPHA) != 0)
-    {
-        DPRINT1("Unsupported AlphaFormat (0x%x)\n", BlendFunc.AlphaFormat);
-        return FALSE;
-    }
-    if ((BlendFunc.AlphaFormat & AC_SRC_ALPHA) != 0 &&
-        BitsPerFormat(Source->iBitmapFormat) != 32)
-    {
-        DPRINT1("Source bitmap must be 32bpp when AC_SRC_ALPHA is set\n");
-        return FALSE;
+      *DestBits = Dest;
     }
 
-    Dst = (PUSHORT)((ULONG_PTR)Dest->pvScan0 + (DestRect->top * Dest->lDelta) +
-          (DestRect->left << 1));
-    DstDelta = Dest->lDelta - ((DestRect->right - DestRect->left) << 1);
-    SrcBpp = BitsPerFormat(Source->iBitmapFormat);
-
-    Rows = DestRect->bottom - DestRect->top;
-    SrcY = SourceRect->top;
-    while (--Rows >= 0)
+    if(X < DestRect->right)
     {
-        Cols = DestRect->right - DestRect->left;
-        SrcX = SourceRect->left;
-        while (--Cols >= 0)
+      SourceX = SourceRect->left+(X - DestRect->left) * SrcWidth / DstWidth;
+      if (SourceX >= 0 && SourceY >= 0 &&
+        SourceSurf->sizlBitmap.cx > SourceX && SourceSurf->sizlBitmap.cy > SourceY)
+      {
+        Source = DIB_GetSourceIndex(SourceSurf, SourceX, SourceY);
+        if(Source != iTransColor)
         {
-            if (SrcBpp <= 16)
-            {
-                DstPixel.us = DIB_GetSource(Source, SrcX++, SrcY, ColorTranslation);
-                SrcPixel.col.red = (DstPixel.col.red << 3) | (DstPixel.col.red >> 2);
-
-                SrcPixel.col.green = (DstPixel.col.green << 2) |
-                                     (DstPixel.col.green >> 4);
-
-                SrcPixel.col.blue = (DstPixel.col.blue << 3) | (DstPixel.col.blue >> 2);
-            }
-            else
-            {
-                SrcPixel.ul = DIB_GetSourceIndex(Source, SrcX++, SrcY);
-            }
-            SrcPixel.col.red = SrcPixel.col.red *
-                               BlendFunc.SourceConstantAlpha / 255;
-
-            SrcPixel.col.green = SrcPixel.col.green *
-                                 BlendFunc.SourceConstantAlpha / 255;
-
-            SrcPixel.col.blue = SrcPixel.col.blue *
-                                BlendFunc.SourceConstantAlpha / 255;
-
-            SrcPixel.col.alpha = (SrcBpp == 32) ?
-                                 (SrcPixel.col.alpha *
-                                 BlendFunc.SourceConstantAlpha / 255) :
-                                 BlendFunc.SourceConstantAlpha;
-
-            Alpha = ((BlendFunc.AlphaFormat & AC_SRC_ALPHA) != 0) ?
-                    SrcPixel.col.alpha : BlendFunc.SourceConstantAlpha;
-
-         DstPixel.us = *Dst;
-         DstPixel.col.red = Clamp5(DstPixel.col.red * (255 - Alpha) / 255 +
-                                   (SrcPixel.col.red >> 3));
-
-         DstPixel.col.green = Clamp6(DstPixel.col.green * (255 - Alpha) / 255 +
-                                     (SrcPixel.col.green >> 2));
-
-         DstPixel.col.blue = Clamp5(DstPixel.col.blue * (255 - Alpha) / 255 +
-                                    (SrcPixel.col.blue >> 3));
-
-         *Dst++ = DstPixel.us;
+          *((USHORT*)DestBits) = (USHORT)XLATEOBJ_iXlate(ColorTranslation,
+            Source);
+        }
       }
 
-      Dst = (PUSHORT)((ULONG_PTR)Dst + DstDelta);
-      SrcY++;
+      DestBits = (PULONG)((ULONG_PTR)DestBits + 2);
     }
 
-    return TRUE;
+    DestBits = (ULONG*)((ULONG_PTR)DestBits + wd);
+  }
+
+  return TRUE;
 }
 
 /* EOF */