24451140f7c7cb64afd990de51bb5315989f6d7a
[reactos.git] / subsystems / win32 / win32k / dib / i386 / dib24bpp_hline.s
1 /*
2 * PROJECT: Win32 subsystem
3 * LICENSE: See COPYING in the top level directory
4 * FILE: subsystems/win32/win32k/dib/i386/dib24bpp_hline.c
5 * PURPOSE: ASM optimised 24bpp HLine
6 * PROGRAMMERS: Magnus Olsen
7 */
8
9 .globl _DIB_24BPP_HLine
10 .intel_syntax noprefix
11
12 .def _DIB_24BPP_HLine;
13 .scl 2;
14 .type 32;
15 .endef
16 _DIB_24BPP_HLine:
17 push edi
18 push esi
19 push ebx
20 sub esp, 24
21 mov ebx, [esp+40]
22 mov edi, [esp+52]
23 mov ecx, [esp+44]
24 mov eax, [ebx+36]
25 mov esi, [ebx+32]
26 mov edx, [esp+48]
27 imul eax, edi
28 sub edx, ecx
29 mov [esp], edx
30 add eax, esi
31 lea eax, [eax+ecx*2]
32 add eax, ecx
33 cmp edx, 7
34 mov esi, edx
35 mov [esp+4], eax
36 ja Align4byte
37 lea eax, [edx-1]
38 mov [esp], eax
39 inc eax
40 jnz small_fill
41 add esp, 24
42 pop ebx
43 pop esi
44 pop edi
45 ret
46
47 /* For small fills, don't bother doing anything fancy */
48 small_fill:
49 movzx ecx, word ptr [esp+58]
50 mov edx, [esp+4]
51 mov esi, [esp+56]
52 lea eax, [edx+2]
53 mov [esp+4], eax
54 mov [edx+2], cl
55 mov eax, [esp]
56 inc dword ptr [esp+4]
57 mov [edx], si
58 dec eax
59 mov [esp], eax
60 inc eax
61 jnz small_fill
62 add esp, 24
63 pop ebx
64 pop esi
65 pop edi
66 ret
67
68 Align4byte:
69 /* Align to 4-byte address */
70 test al, 3
71 mov ecx, eax
72 jz loop1
73 lea esi, [esi+0]
74 lea edi, [edi+0]
75
76 loopasmversion:
77 /* This is about 30% faster than the generic C code below */
78 movzx edx, word ptr [esp+58]
79 lea edi, [ecx+2]
80 mov eax, [esp+56]
81 mov [esp+4], edi
82 mov [ecx+2], dl
83 mov ebx, [esp+4]
84 mov [ecx], ax
85 mov edx, [esp]
86 inc ebx
87 mov [esp+4], ebx
88 dec edx
89 test bl, 3
90 mov [esp], edx
91 mov ecx, ebx
92 jnz loopasmversion
93 mov esi, edx
94
95 loop1:
96 mov ecx, [esp+56]
97 and ecx, 16777215
98 mov ebx, ecx
99 shr ebx, 8
100 mov eax, ecx
101 shl eax, 16
102 or ebx, eax
103 mov edx, ecx
104 shl edx, 8
105 mov eax, ecx
106 shr eax, 16
107 or edx, eax
108 mov eax, ecx
109 shl eax, 24
110 or eax, ecx
111 mov ecx, [esp]
112 shr ecx, 2
113 mov edi, [esp+4]
114 loop2:
115 mov [edi], eax
116 mov [edi+4], ebx
117 mov [edi+8], edx
118 add edi, 12
119 dec ecx
120 jnz loop2
121 mov [esp+4], edi
122 and esi, 3
123 lea eax, [esi-1]
124 mov [esp], eax
125 inc eax
126 jnz leftoverfromthemainloop
127 add esp, 24
128 pop ebx
129 pop esi
130 pop edi
131 ret
132
133 leftoverfromthemainloop:
134
135 /* Count = Count & 0x03; */
136 mov ecx, [esp+4]
137 mov ebx, [esp+56]
138 lea esi, [ecx+2]
139 mov [ecx], bx
140 shr ebx, 16
141 mov [esp+4], esi
142 mov [ecx+2], bl
143 mov eax, [esp]
144 inc dword ptr [esp+4]
145 dec eax
146 mov [esp], eax
147 inc eax
148 jnz leftoverfromthemainloop
149 add esp, 24
150 pop ebx
151 pop esi
152 pop edi
153 ret