[WIN32SS] Improve the FILE header section. Brought to you by Adam Stachowicz. CORE...
[reactos.git] / reactos / win32ss / gdi / dib / i386 / dib24bpp_hline.s
1 /*
2 * PROJECT: Win32 subsystem
3 * LICENSE: See COPYING in the top level directory
4 * FILE: win32ss/gdi/dib/i386/dib24bpp_hline.s
5 * PURPOSE: ASM optimised 24bpp HLine
6 * PROGRAMMERS: Magnus Olsen
7 */
8
9 #include <asm.inc>
10
11 .code
12
13 PUBLIC _DIB_24BPP_HLine
14
15 _DIB_24BPP_HLine:
16 push edi
17 push esi
18 push ebx
19 sub esp, 24
20 mov ebx, [esp+40]
21 mov edi, [esp+52]
22 mov ecx, [esp+44]
23 mov eax, [ebx+36]
24 mov esi, [ebx+32]
25 mov edx, [esp+48]
26 imul eax, edi
27 sub edx, ecx
28 mov [esp], edx
29 add eax, esi
30 lea eax, [eax+ecx*2]
31 add eax, ecx
32 cmp edx, 7
33 mov esi, edx
34 mov [esp+4], eax
35 ja Align4byte
36 lea eax, [edx-1]
37 mov [esp], eax
38 inc eax
39 jnz small_fill
40 add esp, 24
41 pop ebx
42 pop esi
43 pop edi
44 ret
45
46 /* For small fills, don't bother doing anything fancy */
47 small_fill:
48 movzx ecx, word ptr [esp+58]
49 mov edx, [esp+4]
50 mov esi, [esp+56]
51 lea eax, [edx+2]
52 mov [esp+4], eax
53 mov [edx+2], cl
54 mov eax, [esp]
55 inc dword ptr [esp+4]
56 mov [edx], si
57 dec eax
58 mov [esp], eax
59 inc eax
60 jnz small_fill
61 add esp, 24
62 pop ebx
63 pop esi
64 pop edi
65 ret
66
67 Align4byte:
68 /* Align to 4-byte address */
69 test al, 3
70 mov ecx, eax
71 jz loop1
72 lea esi, [esi+0]
73 lea edi, [edi+0]
74
75 loopasmversion:
76 /* This is about 30% faster than the generic C code below */
77 movzx edx, word ptr [esp+58]
78 lea edi, [ecx+2]
79 mov eax, [esp+56]
80 mov [esp+4], edi
81 mov [ecx+2], dl
82 mov ebx, [esp+4]
83 mov [ecx], ax
84 mov edx, [esp]
85 inc ebx
86 mov [esp+4], ebx
87 dec edx
88 test bl, 3
89 mov [esp], edx
90 mov ecx, ebx
91 jnz loopasmversion
92 mov esi, edx
93
94 loop1:
95 mov ecx, [esp+56]
96 and ecx, 16777215
97 mov ebx, ecx
98 shr ebx, 8
99 mov eax, ecx
100 shl eax, 16
101 or ebx, eax
102 mov edx, ecx
103 shl edx, 8
104 mov eax, ecx
105 shr eax, 16
106 or edx, eax
107 mov eax, ecx
108 shl eax, 24
109 or eax, ecx
110 mov ecx, [esp]
111 shr ecx, 2
112 mov edi, [esp+4]
113 loop2:
114 mov [edi], eax
115 mov [edi+4], ebx
116 mov [edi+8], edx
117 add edi, 12
118 dec ecx
119 jnz loop2
120 mov [esp+4], edi
121 and esi, 3
122 lea eax, [esi-1]
123 mov [esp], eax
124 inc eax
125 jnz leftoverfromthemainloop
126 add esp, 24
127 pop ebx
128 pop esi
129 pop edi
130 ret
131
132 leftoverfromthemainloop:
133
134 /* Count = Count & 0x03; */
135 mov ecx, [esp+4]
136 mov ebx, [esp+56]
137 lea esi, [ecx+2]
138 mov [ecx], bx
139 shr ebx, 16
140 mov [esp+4], esi
141 mov [ecx+2], bl
142 mov eax, [esp]
143 inc dword ptr [esp+4]
144 dec eax
145 mov [esp], eax
146 inc eax
147 jnz leftoverfromthemainloop
148 add esp, 24
149 pop ebx
150 pop esi
151 pop edi
152 ret
153
154 END