fixed working strechblt for all dibxx now. do not say it does not take offset of...
[reactos.git] / reactos / subsys / win32k / dib / dib24bpp.c
index 36911ad..30493e1 100644 (file)
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
-/* $Id: dib24bpp.c,v 1.19 2004/04/06 17:54:32 weiden Exp $ */
-#undef WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <stdlib.h>
-#include <win32k/bitmaps.h>
-#include <win32k/brush.h>
-#include <win32k/debug.h>
+/* $Id$ */
+
+#include <w32k.h>
+
+#define NDEBUG
 #include <debug.h>
-#include <include/object.h>
-#include <ddk/winddi.h>
-#include "../eng/objects.h"
-#include "dib.h"
 
 VOID
-DIB_24BPP_PutPixel(PSURFOBJ SurfObj, LONG x, LONG y, ULONG c)
+DIB_24BPP_PutPixel(SURFOBJ *SurfObj, LONG x, LONG y, ULONG c)
 {
-  PBYTE addr = SurfObj->pvScan0 + (y * SurfObj->lDelta) + (x << 1) + x;
+  PBYTE addr = (PBYTE)SurfObj->pvScan0 + (y * SurfObj->lDelta) + (x << 1) + x;
   *(PUSHORT)(addr) = c & 0xFFFF;
   *(addr + 2) = (c >> 16) & 0xFF;
 }
 
 ULONG
-DIB_24BPP_GetPixel(PSURFOBJ SurfObj, LONG x, LONG y)
+DIB_24BPP_GetPixel(SURFOBJ *SurfObj, LONG x, LONG y)
 {
-  PBYTE addr = SurfObj->pvScan0 + y * SurfObj->lDelta + (x << 1) + x;
+  PBYTE addr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta + (x << 1) + x;
   return *(PUSHORT)(addr) + (*(addr + 2) << 16);
 }
 
 VOID
-DIB_24BPP_HLine(PSURFOBJ SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
+DIB_24BPP_HLine(SURFOBJ *SurfObj, LONG x1, LONG x2, LONG y, ULONG c)
 {
-  PBYTE addr = SurfObj->pvScan0 + y * SurfObj->lDelta + (x1 << 1) + x1;
-  LONG cx = x1;
-
-  c &= 0xFFFFFF;
-  while(cx < x2) {
-    *(PUSHORT)(addr) = c & 0xFFFF;
-    addr += 2;
-    *(addr) = c >> 16;
-    addr += 1;
-    ++cx;
-  }
+  PBYTE addr = (PBYTE)SurfObj->pvScan0 + y * SurfObj->lDelta + (x1 << 1) + x1;
+  ULONG Count = x2 - x1;
+#ifndef _M_IX86
+  ULONG MultiCount;
+  ULONG Fill[3];
+#endif
+
+  if (Count < 8)
+    {
+      /* For small fills, don't bother doing anything fancy */
+      while (Count--)
+        {
+          *(PUSHORT)(addr) = c;
+          addr += 2;
+          *(addr) = c >> 16;
+          addr += 1;
+        }
+    }
+  else
+    {
+      /* Align to 4-byte address */
+      while (0 != ((ULONG_PTR) addr & 0x3))
+        {
+          *(PUSHORT)(addr) = c;
+          addr += 2;
+          *(addr) = c >> 16;
+          addr += 1;
+          Count--;
+        }
+      /* If the color we need to fill with is 0ABC, then the final mem pattern
+       * (note little-endianness) would be:
+       *
+       * |C.B.A|C.B.A|C.B.A|C.B.A|   <- pixel borders
+       * |C.B.A.C|B.A.C.B|A.C.B.A|   <- ULONG borders
+       *
+       * So, taking endianness into account again, we need to fill with these
+       * ULONGs: CABC BCAB ABCA */
+#ifdef _M_IX86
+       /* This is about 30% faster than the generic C code below */
+       __asm__ __volatile__ (
+"      movl %1, %%ecx\n"
+"      andl $0xffffff, %%ecx\n"         /* 0ABC */
+"      movl %%ecx, %%ebx\n"             /* Construct BCAB in ebx */
+"      shrl $8, %%ebx\n"
+"      movl %%ecx, %%eax\n"
+"      shll $16, %%eax\n"
+"      orl  %%eax, %%ebx\n"
+"      movl %%ecx, %%edx\n"             /* Construct ABCA in edx */
+"      shll $8, %%edx\n"
+"      movl %%ecx, %%eax\n"
+"      shrl $16, %%eax\n"
+"      orl  %%eax, %%edx\n"
+"      movl %%ecx, %%eax\n"             /* Construct CABC in eax */
+"      shll $24, %%eax\n"
+"      orl  %%ecx, %%eax\n"
+"      movl %2, %%ecx\n"                /* Load count */
+"      shr  $2, %%ecx\n"
+"      movl %3, %%edi\n"                /* Load dest */
+"0:\n"
+"      movl %%eax, (%%edi)\n"           /* Store 4 pixels, 12 bytes */
+"      movl %%ebx, 4(%%edi)\n"
+"      movl %%edx, 8(%%edi)\n"
+"      addl $12, %%edi\n"
+"      dec  %%ecx\n"
+"      jnz  0b\n"
+"      movl %%edi, %0\n"
+  : "=m"(addr)
+  : "m"(c), "m"(Count), "m"(addr)
+  : "%eax", "%ebx", "%ecx", "%edx", "%edi");
+#else
+      c = c & 0xffffff;                /* 0ABC */
+      Fill[0] = c | (c << 24);         /* CABC */
+      Fill[1] = (c >> 8) | (c << 16);  /* BCAB */
+      Fill[2] = (c << 8) | (c >> 16);  /* ABCA */
+      MultiCount = Count / 4;
+      do
+        {
+          *(PULONG)addr = Fill[0];
+          addr += 4;
+          *(PULONG)addr = Fill[1];
+          addr += 4;
+          *(PULONG)addr = Fill[2];
+          addr += 4;
+        }
+      while (0 != --MultiCount);
+#endif
+      Count = Count & 0x03;
+      while (0 != Count--)
+        {
+          *(PUSHORT)(addr) = c;
+          addr += 2;
+          *(addr) = c >> 16;
+          addr += 1;
+        }
+    }
 }
 
 VOID
-DIB_24BPP_VLine(PSURFOBJ SurfObj, LONG x, LONG y1, LONG y2, ULONG c)
+DIB_24BPP_VLine(SURFOBJ *SurfObj, LONG x, LONG y1, LONG y2, ULONG c)
 {
-  PBYTE addr = SurfObj->pvScan0 + y1 * SurfObj->lDelta + (x << 1) + x;
+  PBYTE addr = (PBYTE)SurfObj->pvScan0 + y1 * SurfObj->lDelta + (x << 1) + x;
   LONG lDelta = SurfObj->lDelta;
 
   c &= 0xFFFFFF;
@@ -76,34 +154,31 @@ DIB_24BPP_VLine(PSURFOBJ SurfObj, LONG x, LONG y1, LONG y2, ULONG c)
 }
 
 BOOLEAN
-DIB_24BPP_BitBltSrcCopy(  SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
-                         SURFGDI *DestGDI,  SURFGDI *SourceGDI,
-                         PRECTL  DestRect,  POINTL  *SourcePoint,
-                         XLATEOBJ *ColorTranslation)
+DIB_24BPP_BitBltSrcCopy(PBLTINFO BltInfo)
 {
   LONG     i, j, sx, sy, xColor, f1;
   PBYTE    SourceBits, DestBits, SourceLine, DestLine;
   PBYTE    SourceBits_4BPP, SourceLine_4BPP;
   PWORD    SourceBits_16BPP, SourceLine_16BPP;
 
-  DestBits = DestSurf->pvScan0 + (DestRect->top * DestSurf->lDelta) + DestRect->left * 3;
+  DestBits = (PBYTE)BltInfo->DestSurface->pvScan0 + (BltInfo->DestRect.top * BltInfo->DestSurface->lDelta) + BltInfo->DestRect.left * 3;
 
-  switch(SourceGDI->BitsPerPixel)
+  switch(BltInfo->SourceSurface->iBitmapFormat)
   {
-    case 1:
-      sx = SourcePoint->x;
-      sy = SourcePoint->y;
+    case BMF_1BPP:
+      sx = BltInfo->SourcePoint.x;
+      sy = BltInfo->SourcePoint.y;
 
-      for (j=DestRect->top; j<DestRect->bottom; j++)
+      for (j=BltInfo->DestRect.top; j<BltInfo->DestRect.bottom; j++)
       {
-        sx = SourcePoint->x;
-        for (i=DestRect->left; i<DestRect->right; i++)
+        sx = BltInfo->SourcePoint.x;
+        for (i=BltInfo->DestRect.left; i<BltInfo->DestRect.right; i++)
         {
-          if(DIB_1BPP_GetPixel(SourceSurf, sx, sy) == 0)
+          if(DIB_1BPP_GetPixel(BltInfo->SourceSurface, sx, sy) == 0)
           {
-            DIB_24BPP_PutPixel(DestSurf, i, j, XLATEOBJ_iXlate(ColorTranslation, 0));
+            DIB_24BPP_PutPixel(BltInfo->DestSurface, i, j, XLATEOBJ_iXlate(BltInfo->XlateSourceToDest, 0));
           } else {
-            DIB_24BPP_PutPixel(DestSurf, i, j, XLATEOBJ_iXlate(ColorTranslation, 1));
+            DIB_24BPP_PutPixel(BltInfo->DestSurface, i, j, XLATEOBJ_iXlate(BltInfo->XlateSourceToDest, 1));
           }
           sx++;
         }
@@ -111,19 +186,19 @@ DIB_24BPP_BitBltSrcCopy(  SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
       }
       break;
 
-    case 4:
-      SourceBits_4BPP = SourceSurf->pvScan0 + (SourcePoint->y * SourceSurf->lDelta) + (SourcePoint->x >> 1);
+    case BMF_4BPP:
+      SourceBits_4BPP = (PBYTE)BltInfo->SourceSurface->pvScan0 + (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) + (BltInfo->SourcePoint.x >> 1);
 
-      for (j=DestRect->top; j<DestRect->bottom; j++)
+      for (j=BltInfo->DestRect.top; j<BltInfo->DestRect.bottom; j++)
       {
         SourceLine_4BPP = SourceBits_4BPP;
         DestLine = DestBits;
-        sx = SourcePoint->x;
+        sx = BltInfo->SourcePoint.x;
         f1 = sx & 1;
 
-        for (i=DestRect->left; i<DestRect->right; i++)
+        for (i=BltInfo->DestRect.left; i<BltInfo->DestRect.right; i++)
         {
-          xColor = XLATEOBJ_iXlate(ColorTranslation,
+          xColor = XLATEOBJ_iXlate(BltInfo->XlateSourceToDest,
               (*SourceLine_4BPP & altnotmask[f1]) >> (4 * (1 - f1)));
           *DestLine++ = xColor & 0xff;
           *(PWORD)DestLine = xColor >> 8;
@@ -132,114 +207,114 @@ DIB_24BPP_BitBltSrcCopy(  SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
           sx++;
         }
 
-        SourceBits_4BPP += SourceSurf->lDelta;
-        DestBits += DestSurf->lDelta;
+        SourceBits_4BPP += BltInfo->SourceSurface->lDelta;
+        DestBits += BltInfo->DestSurface->lDelta;
       }
       break;
 
-    case 8:
-      SourceLine = SourceSurf->pvScan0 + (SourcePoint->y * SourceSurf->lDelta) + SourcePoint->x;
+    case BMF_8BPP:
+      SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 + (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) + BltInfo->SourcePoint.x;
       DestLine = DestBits;
 
-      for (j = DestRect->top; j < DestRect->bottom; j++)
+      for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
       {
         SourceBits = SourceLine;
         DestBits = DestLine;
 
-        for (i = DestRect->left; i < DestRect->right; i++)
+        for (i = BltInfo->DestRect.left; i < BltInfo->DestRect.right; i++)
         {
-          xColor = XLATEOBJ_iXlate(ColorTranslation, *SourceBits);
+          xColor = XLATEOBJ_iXlate(BltInfo->XlateSourceToDest, *SourceBits);
           *DestBits = xColor & 0xff;
           *(PWORD)(DestBits + 1) = xColor >> 8;
           SourceBits += 1;
          DestBits += 3;
         }
 
-        SourceLine += SourceSurf->lDelta;
-        DestLine += DestSurf->lDelta;
+        SourceLine += BltInfo->SourceSurface->lDelta;
+        DestLine += BltInfo->DestSurface->lDelta;
       }
       break;
 
-    case 16:
-      SourceBits_16BPP = SourceSurf->pvScan0 + (SourcePoint->y * SourceSurf->lDelta) + 2 * SourcePoint->x;
+    case BMF_16BPP:
+      SourceBits_16BPP = (PWORD)((PBYTE)BltInfo->SourceSurface->pvScan0 + (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) + 2 * BltInfo->SourcePoint.x);
 
-      for (j=DestRect->top; j<DestRect->bottom; j++)
+      for (j=BltInfo->DestRect.top; j<BltInfo->DestRect.bottom; j++)
       {
         SourceLine_16BPP = SourceBits_16BPP;
         DestLine = DestBits;
 
-        for (i=DestRect->left; i<DestRect->right; i++)
+        for (i=BltInfo->DestRect.left; i<BltInfo->DestRect.right; i++)
         {
-          xColor = XLATEOBJ_iXlate(ColorTranslation, *SourceLine_16BPP);
+          xColor = XLATEOBJ_iXlate(BltInfo->XlateSourceToDest, *SourceLine_16BPP);
           *DestLine++ = xColor & 0xff;
           *(PWORD)DestLine = xColor >> 8;
           DestLine += 2;
           SourceLine_16BPP++;
         }
 
-        SourceBits_16BPP = (PWORD)((PBYTE)SourceBits_16BPP + SourceSurf->lDelta);
-        DestBits += DestSurf->lDelta;
+        SourceBits_16BPP = (PWORD)((PBYTE)SourceBits_16BPP + BltInfo->SourceSurface->lDelta);
+        DestBits += BltInfo->DestSurface->lDelta;
       }
       break;
 
-    case 24:
-      if (NULL == ColorTranslation || 0 != (ColorTranslation->flXlate & XO_TRIVIAL))
+    case BMF_24BPP:
+      if (NULL == BltInfo->XlateSourceToDest || 0 != (BltInfo->XlateSourceToDest->flXlate & XO_TRIVIAL))
       {
-       if (DestRect->top < SourcePoint->y)
+       if (BltInfo->DestRect.top < BltInfo->SourcePoint.y)
          {
-           SourceBits = SourceSurf->pvScan0 + (SourcePoint->y * SourceSurf->lDelta) + 3 * SourcePoint->x;
-           for (j = DestRect->top; j < DestRect->bottom; j++)
+           SourceBits = (PBYTE)BltInfo->SourceSurface->pvScan0 + (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) + 3 * BltInfo->SourcePoint.x;
+           for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
              {
-               RtlMoveMemory(DestBits, SourceBits, 3 * (DestRect->right - DestRect->left));
-               SourceBits += SourceSurf->lDelta;
-               DestBits += DestSurf->lDelta;
+               RtlMoveMemory(DestBits, SourceBits, 3 * (BltInfo->DestRect.right - BltInfo->DestRect.left));
+               SourceBits += BltInfo->SourceSurface->lDelta;
+               DestBits += BltInfo->DestSurface->lDelta;
              }
          }
        else
          {
-           SourceBits = SourceSurf->pvScan0 + ((SourcePoint->y + DestRect->bottom - DestRect->top - 1) * SourceSurf->lDelta) + 3 * SourcePoint->x;
-           DestBits = DestSurf->pvScan0 + ((DestRect->bottom - 1) * DestSurf->lDelta) + 3 * DestRect->left;
-           for (j = DestRect->bottom - 1; DestRect->top <= j; j--)
+           SourceBits = (PBYTE)BltInfo->SourceSurface->pvScan0 + ((BltInfo->SourcePoint.y + BltInfo->DestRect.bottom - BltInfo->DestRect.top - 1) * BltInfo->SourceSurface->lDelta) + 3 * BltInfo->SourcePoint.x;
+           DestBits = (PBYTE)BltInfo->DestSurface->pvScan0 + ((BltInfo->DestRect.bottom - 1) * BltInfo->DestSurface->lDelta) + 3 * BltInfo->DestRect.left;
+           for (j = BltInfo->DestRect.bottom - 1; BltInfo->DestRect.top <= j; j--)
              {
-               RtlMoveMemory(DestBits, SourceBits, 3 * (DestRect->right - DestRect->left));
-               SourceBits -= SourceSurf->lDelta;
-               DestBits -= DestSurf->lDelta;
+               RtlMoveMemory(DestBits, SourceBits, 3 * (BltInfo->DestRect.right - BltInfo->DestRect.left));
+               SourceBits -= BltInfo->SourceSurface->lDelta;
+               DestBits -= BltInfo->DestSurface->lDelta;
              }
          }
       }
       else
       {
        /* FIXME */
-       DPRINT1("DIB_24BPP_Bitblt: Unhandled ColorTranslation for 16 -> 16 copy");
+       DPRINT1("DIB_24BPP_Bitblt: Unhandled BltInfo->XlateSourceToDest for 16 -> 16 copy\n");
         return FALSE;
       }
       break;
 
-    case 32:
-      SourceLine = SourceSurf->pvScan0 + (SourcePoint->y * SourceSurf->lDelta) + 4 * SourcePoint->x;
+    case BMF_32BPP:
+      SourceLine = (PBYTE)BltInfo->SourceSurface->pvScan0 + (BltInfo->SourcePoint.y * BltInfo->SourceSurface->lDelta) + 4 * BltInfo->SourcePoint.x;
       DestLine = DestBits;
 
-      for (j = DestRect->top; j < DestRect->bottom; j++)
+      for (j = BltInfo->DestRect.top; j < BltInfo->DestRect.bottom; j++)
       {
         SourceBits = SourceLine;
         DestBits = DestLine;
 
-        for (i = DestRect->left; i < DestRect->right; i++)
+        for (i = BltInfo->DestRect.left; i < BltInfo->DestRect.right; i++)
         {
-          xColor = XLATEOBJ_iXlate(ColorTranslation, *((PDWORD) SourceBits));
+          xColor = XLATEOBJ_iXlate(BltInfo->XlateSourceToDest, *((PDWORD) SourceBits));
           *DestBits = xColor & 0xff;
           *(PWORD)(DestBits + 1) = xColor >> 8;
           SourceBits += 4;
          DestBits += 3;
         }
 
-        SourceLine += SourceSurf->lDelta;
-        DestLine += DestSurf->lDelta;
+        SourceLine += BltInfo->SourceSurface->lDelta;
+        DestLine += BltInfo->DestSurface->lDelta;
       }
       break;
 
     default:
-      DbgPrint("DIB_24BPP_Bitblt: Unhandled Source BPP: %u\n", SourceGDI->BitsPerPixel);
+      DbgPrint("DIB_24BPP_Bitblt: Unhandled Source BPP: %u\n", BitsPerFormat(BltInfo->SourceSurface->iBitmapFormat));
       return FALSE;
   }
 
@@ -247,127 +322,444 @@ DIB_24BPP_BitBltSrcCopy(  SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
 }
 
 BOOLEAN
-DIB_24BPP_BitBlt(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
-                SURFGDI *DestGDI,  SURFGDI *SourceGDI,
-                PRECTL  DestRect,  POINTL  *SourcePoint,
-                PBRUSHOBJ Brush, PPOINTL BrushOrigin,
-                XLATEOBJ *ColorTranslation, ULONG Rop4)
+DIB_24BPP_BitBlt(PBLTINFO BltInfo)
 {
-   ULONG X, Y;
+   ULONG DestX, DestY;
    ULONG SourceX, SourceY;
-   ULONG Dest, Source, Pattern;
-   PBYTE DestBits;
+   ULONG PatternY = 0;
+   ULONG Dest, Source = 0, Pattern = 0;
    BOOL UsesSource;
    BOOL UsesPattern;
-   /* Pattern brushes */
-   PGDIBRUSHOBJ GdiBrush;
-   HBITMAP PatternSurface = NULL;
-   PSURFOBJ PatternObj;
-   ULONG PatternWidth, PatternHeight;
+   PBYTE DestBits;
 
-   if (Rop4 == SRCCOPY)
-   {
-      return DIB_24BPP_BitBltSrcCopy(
-         DestSurf,
-         SourceSurf,
-         DestGDI,
-         SourceGDI,
-         DestRect,
-         SourcePoint,
-         ColorTranslation);
-   }
+   UsesSource = ROP4_USES_SOURCE(BltInfo->Rop4);
+   UsesPattern = ROP4_USES_PATTERN(BltInfo->Rop4);
+
+   SourceY = BltInfo->SourcePoint.y;
+   DestBits = (PBYTE)(
+      (PBYTE)BltInfo->DestSurface->pvScan0 +
+      (BltInfo->DestRect.left << 1) + BltInfo->DestRect.left +
+      BltInfo->DestRect.top * BltInfo->DestSurface->lDelta);
 
-   UsesSource = ((Rop4 & 0xCC0000) >> 2) != (Rop4 & 0x330000);
-   UsesPattern = ((Rop4 & 0xF00000) >> 4) != (Rop4 & 0x0F0000);  
-      
    if (UsesPattern)
    {
-      if (Brush == NULL)
+      if (BltInfo->PatternSurface)
       {
-         UsesPattern = FALSE;
-      } else
-      if (Brush->iSolidColor == 0xFFFFFFFF)
+         PatternY = (BltInfo->DestRect.top + BltInfo->BrushOrigin.y) %
+                    BltInfo->PatternSurface->sizlBitmap.cy;
+      }
+      else
       {
-         PBITMAPOBJ PatternBitmap;
-
-         GdiBrush = CONTAINING_RECORD(
-            Brush,
-            GDIBRUSHOBJ,
-            BrushObject);
-
-         PatternBitmap = BITMAPOBJ_LockBitmap(GdiBrush->hbmPattern);
-         PatternSurface = BitmapToSurf(PatternBitmap, NULL);
-         BITMAPOBJ_UnlockBitmap(GdiBrush->hbmPattern);
-
-         PatternObj = (PSURFOBJ)AccessUserObject((ULONG)PatternSurface);
-         PatternWidth = PatternObj->sizlBitmap.cx;
-         PatternHeight = PatternObj->sizlBitmap.cy;
+         Pattern = BltInfo->Brush->iSolidColor;
       }
    }
 
-   SourceY = SourcePoint->y;
-   DestBits = (PBYTE)(
-      DestSurf->pvScan0 +
-      (DestRect->left << 1) + DestRect->left +
-      DestRect->top * DestSurf->lDelta);
-
-   for (Y = DestRect->top; Y < DestRect->bottom; Y++)
+   for (DestY = BltInfo->DestRect.top; DestY < BltInfo->DestRect.bottom; DestY++)
    {
-      SourceX = SourcePoint->x;
-      for (X = DestRect->left; X < DestRect->right; X++, DestBits += 3, SourceX++)
+      SourceX = BltInfo->SourcePoint.x;
+
+      for (DestX = BltInfo->DestRect.left; DestX < BltInfo->DestRect.right; DestX++, DestBits += 3, SourceX++)
       {
          Dest = *((PUSHORT)DestBits) + (*(DestBits + 2) << 16);
+
          if (UsesSource)
          {
-            Source = DIB_GetSource(SourceSurf, SourceGDI, SourceX, SourceY, ColorTranslation);
+            Source = DIB_GetSource(BltInfo->SourceSurface, SourceX, SourceY, BltInfo->XlateSourceToDest);
          }
 
-         if (UsesPattern)
+         if (BltInfo->PatternSurface)
         {
-            if (Brush->iSolidColor == 0xFFFFFFFF)
-            {
-               Pattern = DIB_1BPP_GetPixel(PatternObj, X % PatternWidth, Y % PatternHeight) ? GdiBrush->crFore : GdiBrush->crBack;
-            }
-            else
-            {
-               Pattern = Brush->iSolidColor;
-            }
+            Pattern = DIB_GetSource(BltInfo->PatternSurface, (DestX + BltInfo->BrushOrigin.x) % BltInfo->PatternSurface->sizlBitmap.cx, PatternY, BltInfo->XlatePatternToDest);
          }
 
-         Dest = DIB_DoRop(Rop4, Dest, Source, Pattern) & 0xFFFFFF;
+         Dest = DIB_DoRop(BltInfo->Rop4, Dest, Source, Pattern) & 0xFFFFFF;
          *(PUSHORT)(DestBits) = Dest & 0xFFFF;
          *(DestBits + 2) = Dest >> 16;
       }
 
       SourceY++;
-      DestBits -= (DestRect->right - DestRect->left) * 3;
-      DestBits += DestSurf->lDelta;
+      if (BltInfo->PatternSurface)
+      {
+         PatternY++;
+         PatternY %= BltInfo->PatternSurface->sizlBitmap.cy;
+      }
+      DestBits -= (BltInfo->DestRect.right - BltInfo->DestRect.left) * 3;
+      DestBits += BltInfo->DestSurface->lDelta;
    }
 
-   if (PatternSurface != NULL)
-      EngDeleteSurface(PatternSurface);
-  
    return TRUE;
 }
 
+/* BitBlt Optimize */
+BOOLEAN 
+DIB_24BPP_ColorFill(SURFOBJ* DestSurface, RECTL* DestRect, ULONG color)
+{
+  ULONG DestY; 
+
+#ifdef _M_IX86
+  PBYTE xaddr = (PBYTE)DestSurface->pvScan0 + DestRect->top * DestSurface->lDelta + (DestRect->left << 1) + DestRect->left;
+  PBYTE addr;
+  ULONG Count;
+  ULONG xCount=DestRect->right - DestRect->left;
+
+  for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
+  {
+    Count = xCount;
+    addr = xaddr;    
+    xaddr = (PBYTE)((ULONG_PTR)addr + DestSurface->lDelta);
+
+    if (Count < 8)
+    {
+      /* For small fills, don't bother doing anything fancy */
+      while (Count--)
+        {
+          *(PUSHORT)(addr) = color;
+          addr += 2;
+          *(addr) = color >> 16;
+          addr += 1;
+        }
+    }
+  else
+    {
+      /* Align to 4-byte address */
+      while (0 != ((ULONG_PTR) addr & 0x3))
+        {
+          *(PUSHORT)(addr) = color;
+          addr += 2;
+          *(addr) = color >> 16;
+          addr += 1;
+          Count--;
+        }
+      /* If the color we need to fill with is 0ABC, then the final mem pattern
+       * (note little-endianness) would be:
+       *
+       * |C.B.A|C.B.A|C.B.A|C.B.A|   <- pixel borders
+       * |C.B.A.C|B.A.C.B|A.C.B.A|   <- ULONG borders
+       *
+       * So, taking endianness into account again, we need to fill with these
+       * ULONGs: CABC BCAB ABCA */
+
+       /* This is about 30% faster than the generic C code below */
+       __asm__ __volatile__ (
+"      movl %1, %%ecx\n"
+"      andl $0xffffff, %%ecx\n"         /* 0ABC */
+"      movl %%ecx, %%ebx\n"             /* Construct BCAB in ebx */
+"      shrl $8, %%ebx\n"
+"      movl %%ecx, %%eax\n"
+"      shll $16, %%eax\n"
+"      orl  %%eax, %%ebx\n"
+"      movl %%ecx, %%edx\n"             /* Construct ABCA in edx */
+"      shll $8, %%edx\n"
+"      movl %%ecx, %%eax\n"
+"      shrl $16, %%eax\n"
+"      orl  %%eax, %%edx\n"
+"      movl %%ecx, %%eax\n"             /* Construct CABC in eax */
+"      shll $24, %%eax\n"
+"      orl  %%ecx, %%eax\n"
+"      movl %2, %%ecx\n"                /* Load count */
+"      shr  $2, %%ecx\n"
+"      movl %3, %%edi\n"                /* Load dest */
+".FL1:\n"
+"      movl %%eax, (%%edi)\n"           /* Store 4 pixels, 12 bytes */
+"      movl %%ebx, 4(%%edi)\n"
+"      movl %%edx, 8(%%edi)\n"
+"      addl $12, %%edi\n"
+"      dec  %%ecx\n"
+"      jnz  .FL1\n"
+"      movl %%edi, %0\n"
+  : "=m"(addr)
+  : "m"(color), "m"(Count), "m"(addr)
+  : "%eax", "%ebx", "%ecx", "%edx", "%edi");
+   Count = Count & 0x03;
+      while (0 != Count--)
+        {
+          *(PUSHORT)(addr) = color;
+          addr += 2;
+          *(addr) = color >> 16;
+          addr += 1;
+        }
+    }
+  }
+#else
+
+  for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
+    {                                                  
+      DIB_24BPP_HLine(DestSurface, DestRect->left, DestRect->right, DestY, color);                                                     
+    }
+#endif
+  return TRUE;
+}
+
+//NOTE: If you change something here, please do the same in other dibXXbpp.c files!
 BOOLEAN DIB_24BPP_StretchBlt(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
-                            SURFGDI *DestGDI, SURFGDI *SourceGDI,
                             RECTL* DestRect, RECTL *SourceRect,
-                            POINTL* MaskOrigin, POINTL* BrushOrigin,
-                                       XLATEOBJ *ColorTranslation, ULONG Mode)
+                            POINTL* MaskOrigin, POINTL BrushOrigin,
+                            CLIPOBJ *ClipRegion, XLATEOBJ *ColorTranslation,
+                            ULONG Mode)
 {
-  DbgPrint("DIB_24BPP_StretchBlt: Source BPP: %u\n", SourceGDI->BitsPerPixel);
-  return FALSE;
+   LONG SrcSizeY;
+   LONG SrcSizeX;
+   LONG DesSizeY;
+   LONG DesSizeX;      
+   LONG sx;
+   LONG sy;
+   LONG DesX;
+   LONG DesY;
+   LONG color;
+  
+   SrcSizeY = SourceRect->bottom - SourceRect->top;
+   SrcSizeX = SourceRect->right - SourceRect->left;
+  
+   DesSizeY = DestRect->bottom - DestRect->top;
+   DesSizeX = DestRect->right - DestRect->left;
+
+   switch(SourceSurf->iBitmapFormat)
+   {
+      case BMF_1BPP:
+         /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
+      /* This is a reference implementation, it hasn't been optimized for speed */
+                      
+       for (DesY=DestRect->top; DesY<DestRect->bottom; DesY++)
+       {                        
+           sy = (((DesY - DestRect->top) * SrcSizeY) / DesSizeY) + SourceRect->top;
+                     
+            for (DesX=DestRect->left; DesX<DestRect->right; DesX++)
+            {                  
+                  sx = (((DesX - DestRect->left) * SrcSizeX) / DesSizeX) + SourceRect->left;
+                               
+                  if(DIB_1BPP_GetPixel(SourceSurf, sx, sy) == 0)
+                                 {
+                                       DIB_24BPP_PutPixel(DestSurf, DesX, DesY, XLATEOBJ_iXlate(ColorTranslation, 0));
+                  } 
+                                 else 
+                                 {
+                    DIB_24BPP_PutPixel(DestSurf, DesX, DesY, XLATEOBJ_iXlate(ColorTranslation, 1));
+                  }
+            }
+       }               
+
+         break;
+
+      case BMF_4BPP:           
+      /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
+      /* This is a reference implementation, it hasn't been optimized for speed */
+                      
+       for (DesY=DestRect->top; DesY<DestRect->bottom; DesY++)
+       {                        
+           sy = (((DesY - DestRect->top) * SrcSizeY) / DesSizeY) + SourceRect->top;
+                     
+            for (DesX=DestRect->left; DesX<DestRect->right; DesX++)
+            {                  
+                 sx = (((DesX - DestRect->left) * SrcSizeX) / DesSizeX) + SourceRect->left;            
+                 color = DIB_4BPP_GetPixel(SourceSurf, sx, sy);
+                 DIB_24BPP_PutPixel(DestSurf, DesX, DesY, XLATEOBJ_iXlate(ColorTranslation, color));
+            }
+       }                  
+      break;
+
+      case BMF_8BPP:           
+      /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
+      /* This is a reference implementation, it hasn't been optimized for speed */
+                      
+       for (DesY=DestRect->top; DesY<DestRect->bottom; DesY++)
+       {                        
+           sy = (((DesY - DestRect->top) * SrcSizeY) / DesSizeY) + SourceRect->top;
+                     
+            for (DesX=DestRect->left; DesX<DestRect->right; DesX++)
+            {                  
+                 sx = (((DesX - DestRect->left) * SrcSizeX) / DesSizeX) + SourceRect->left;            
+                 color = DIB_8BPP_GetPixel(SourceSurf, sx, sy);
+                 DIB_24BPP_PutPixel(DestSurf, DesX, DesY, XLATEOBJ_iXlate(ColorTranslation, color));
+            }
+       }                  
+      break;
+
+      case BMF_16BPP:          
+      /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
+      /* This is a reference implementation, it hasn't been optimized for speed */
+                      
+       for (DesY=DestRect->top; DesY<DestRect->bottom; DesY++)
+       {                        
+           sy = (((DesY - DestRect->top) * SrcSizeY) / DesSizeY) + SourceRect->top;
+                     
+            for (DesX=DestRect->left; DesX<DestRect->right; DesX++)
+            {                  
+                 sx = (((DesX - DestRect->left) * SrcSizeX) / DesSizeX) + SourceRect->left;            
+                 color = DIB_16BPP_GetPixel(SourceSurf, sx, sy);
+                 DIB_24BPP_PutPixel(DestSurf, DesX, DesY, XLATEOBJ_iXlate(ColorTranslation, color));
+            }
+       }                  
+      break;
+
+      case BMF_24BPP:          
+      /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
+      /* This is a reference implementation, it hasn't been optimized for speed */
+                      
+       for (DesY=DestRect->top; DesY<DestRect->bottom; DesY++)
+       {                        
+           sy = (((DesY - DestRect->top) * SrcSizeY) / DesSizeY) + SourceRect->top;
+                     
+            for (DesX=DestRect->left; DesX<DestRect->right; DesX++)
+            {                  
+                 sx = (((DesX - DestRect->left) * SrcSizeX) / DesSizeX) + SourceRect->left;            
+                 color = DIB_24BPP_GetPixel(SourceSurf, sx, sy);
+                 DIB_24BPP_PutPixel(DestSurf, DesX, DesY, XLATEOBJ_iXlate(ColorTranslation, color));
+            }
+       }                  
+      break;
+
+      case BMF_32BPP:          
+      /* FIXME :  MaskOrigin, BrushOrigin, ClipRegion, Mode ? */
+      /* This is a reference implementation, it hasn't been optimized for speed */
+                      
+       for (DesY=DestRect->top; DesY<DestRect->bottom; DesY++)
+       {                        
+           sy = (((DesY - DestRect->top) * SrcSizeY) / DesSizeY) + SourceRect->top;
+                     
+            for (DesX=DestRect->left; DesX<DestRect->right; DesX++)
+            {                  
+                 sx = (((DesX - DestRect->left) * SrcSizeX) / DesSizeX) + SourceRect->left;            
+                 color = DIB_32BPP_GetPixel(SourceSurf, sx, sy);
+                 DIB_24BPP_PutPixel(DestSurf, DesX, DesY, XLATEOBJ_iXlate(ColorTranslation, color));
+            }
+       }                  
+      break;
+
+      default:
+      //DPRINT1("DIB_24BPP_StretchBlt: Unhandled Source BPP: %u\n", BitsPerFormat(SourceSurf->iBitmapFormat));
+      return FALSE;
+    }
+  
+  return TRUE;
 }
 
-BOOLEAN 
+BOOLEAN
 DIB_24BPP_TransparentBlt(SURFOBJ *DestSurf, SURFOBJ *SourceSurf,
-                         PSURFGDI DestGDI,  PSURFGDI SourceGDI,
                          RECTL*  DestRect,  POINTL  *SourcePoint,
                          XLATEOBJ *ColorTranslation, ULONG iTransColor)
 {
-  return FALSE;
+  ULONG X, Y, SourceX, SourceY, Source, wd, Dest;
+  BYTE *DestBits;
+
+  SourceY = SourcePoint->y;
+  DestBits = (BYTE*)((PBYTE)DestSurf->pvScan0 +
+                      (DestRect->left << 2) +
+                      DestRect->top * DestSurf->lDelta);
+  wd = DestSurf->lDelta - ((DestRect->right - DestRect->left) << 2);
+
+  for(Y = DestRect->top; Y < DestRect->bottom; Y++)
+  {
+    SourceX = SourcePoint->x;
+    for(X = DestRect->left; X < DestRect->right; X++, DestBits += 3, SourceX++)
+    {
+      Source = DIB_GetSourceIndex(SourceSurf, SourceX, SourceY);
+      if(Source != iTransColor)
+      {
+        Dest = XLATEOBJ_iXlate(ColorTranslation, Source) & 0xFFFFFF;
+         *(PUSHORT)(DestBits) = Dest & 0xFFFF;
+         *(DestBits + 2) = Dest >> 16;
+      }
+    }
+
+    SourceY++;
+    DestBits = (BYTE*)((ULONG_PTR)DestBits + wd);
+  }
+
+  return TRUE;
+}
+
+typedef union {
+   ULONG ul;
+   struct {
+      UCHAR red;
+      UCHAR green;
+      UCHAR blue;
+      UCHAR alpha;
+   } col;
+} NICEPIXEL32;
+
+STATIC inline UCHAR
+Clamp8(ULONG val)
+{
+   return (val > 255) ? 255 : val;
+}
+
+BOOLEAN
+DIB_24BPP_AlphaBlend(SURFOBJ* Dest, SURFOBJ* Source, RECTL* DestRect,
+                     RECTL* SourceRect, CLIPOBJ* ClipRegion,
+                     XLATEOBJ* ColorTranslation, BLENDOBJ* BlendObj)
+{
+   INT Rows, Cols, SrcX, SrcY;
+   register PUCHAR Dst;
+   ULONG DstDelta;
+   BLENDFUNCTION BlendFunc;
+   register NICEPIXEL32 DstPixel, SrcPixel;
+   UCHAR Alpha, SrcBpp;
+
+   DPRINT("DIB_24BPP_AlphaBlend: srcRect: (%d,%d)-(%d,%d), dstRect: (%d,%d)-(%d,%d)\n",
+          SourceRect->left, SourceRect->top, SourceRect->right, SourceRect->bottom,
+          DestRect->left, DestRect->top, DestRect->right, DestRect->bottom);
+
+   ASSERT(DestRect->bottom - DestRect->top == SourceRect->bottom - SourceRect->top &&
+          DestRect->right - DestRect->left == SourceRect->right - SourceRect->left);
+
+   BlendFunc = BlendObj->BlendFunction;
+   if (BlendFunc.BlendOp != AC_SRC_OVER)
+   {
+      DPRINT1("BlendOp != AC_SRC_OVER\n");
+      return FALSE;
+   }
+   if (BlendFunc.BlendFlags != 0)
+   {
+      DPRINT1("BlendFlags != 0\n");
+      return FALSE;
+   }
+   if ((BlendFunc.AlphaFormat & ~AC_SRC_ALPHA) != 0)
+   {
+      DPRINT1("Unsupported AlphaFormat (0x%x)\n", BlendFunc.AlphaFormat);
+      return FALSE;
+   }
+   if ((BlendFunc.AlphaFormat & AC_SRC_ALPHA) != 0 &&
+       BitsPerFormat(Source->iBitmapFormat) != 32)
+   {
+      DPRINT1("Source bitmap must be 32bpp when AC_SRC_ALPHA is set\n");
+      return FALSE;
+   }
+
+   Dst = (PUCHAR)((ULONG_PTR)Dest->pvScan0 + (DestRect->top * Dest->lDelta) +
+                             (DestRect->left * 3));
+   DstDelta = Dest->lDelta - ((DestRect->right - DestRect->left) * 3);
+   SrcBpp = BitsPerFormat(Source->iBitmapFormat);
+
+   Rows = DestRect->bottom - DestRect->top;
+   SrcY = SourceRect->top;
+   while (--Rows >= 0)
+   {
+      Cols = DestRect->right - DestRect->left;
+      SrcX = SourceRect->left;
+      while (--Cols >= 0)
+      {
+         SrcPixel.ul = DIB_GetSource(Source, SrcX++, SrcY, ColorTranslation);
+         SrcPixel.col.red = SrcPixel.col.red * BlendFunc.SourceConstantAlpha / 255;
+         SrcPixel.col.green = SrcPixel.col.green * BlendFunc.SourceConstantAlpha / 255;
+         SrcPixel.col.blue = SrcPixel.col.blue * BlendFunc.SourceConstantAlpha / 255;
+         SrcPixel.col.alpha = (SrcBpp == 32) ? (SrcPixel.col.alpha * BlendFunc.SourceConstantAlpha / 255) : BlendFunc.SourceConstantAlpha;
+
+         Alpha = ((BlendFunc.AlphaFormat & AC_SRC_ALPHA) != 0) ?
+                 SrcPixel.col.alpha : BlendFunc.SourceConstantAlpha;
+
+         DstPixel.ul = *Dst;
+         DstPixel.col.red = Clamp8(DstPixel.col.red * (255 - Alpha) / 255 + SrcPixel.col.red);
+         DstPixel.col.green = Clamp8(DstPixel.col.green * (255 - Alpha) / 255 + SrcPixel.col.green);
+         DstPixel.col.blue = Clamp8(DstPixel.col.blue * (255 - Alpha) / 255 + SrcPixel.col.blue);
+         *Dst = DstPixel.ul;
+         Dst = (PUCHAR)((ULONG_PTR)Dst + 3);
+      }
+      Dst = (PUCHAR)((ULONG_PTR)Dst + DstDelta);
+      SrcY++;
+   }
+
+   return TRUE;
 }
 
 /* EOF */