-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathimage_helpers.cpp
More file actions
215 lines (205 loc) · 4.41 KB
/
image_helpers.cpp
File metadata and controls
215 lines (205 loc) · 4.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#include <windows.h>
#include "header.h"
// convert scanline from 8bpp to 32bpp
__declspec(naked) void __cdecl
color_convert( BYTE* src, RGBQUAD* pal, DWORD* dst, DWORD cnt )
{
UNREFERENCED_PARAMETER( src );
UNREFERENCED_PARAMETER( pal );
UNREFERENCED_PARAMETER( dst );
UNREFERENCED_PARAMETER( cnt );
__asm
{
push edi
push esi
push ebp
push ebx
mov esi,0x14[esp] // src
mov ebp,0x18[esp] // pal
mov edi,0x1C[esp] // dst
lbl_loop:
// read src image
movzx eax, byte ptr [esi]
movzx ebx, byte ptr 1[esi]
movzx ecx, byte ptr 2[esi]
movzx edx, byte ptr 3[esi]
// read colors from palette
mov eax, [ebp+eax*4]
mov ebx, [ebp+ebx*4]
mov ecx, [ebp+ecx*4]
mov edx, [ebp+edx*4]
lea ebp,0x20[esp] // cnt
// write colors to dst
mov [edi], eax
mov 4[edi], ebx
mov 8[edi], ecx
mov 12[edi], edx
// upkeep
sub DWORD PTR [ebp], 1
lea esi, [esi+4]
lea edi, [edi+16]
mov ebp,0x18[esp] // pal
jnz lbl_loop
pop ebx
pop ebp
pop esi
pop edi
ret
}
}
// for each 64 byte chunk:
// composite dib_bits onto client_bits using 0xFE as the transparent color-key
// then fill dib_bits with 0xFE,
// then color convert client_bits into d3d_bits
__declspec(naked) void __stdcall multiblt( DWORD pitch, DWORD* d3d_bits )
{
UNREFERENCED_PARAMETER( pitch );
UNREFERENCED_PARAMETER( d3d_bits );
__asm {
pcmpeqw xmm7, xmm7
push edi
push esi
psllw xmm7, 1
push ebp
push ebx
packsswb xmm7, xmm7 // 0xFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFE
sub DWORD PTR 20[esp],640*4
push 480
push 640/64
sub esp,4
mov ebp, dib_bits
mov edi, 36[esp] // d3d_bits
lea esi, [client_bits]
mov 36[esp],ebp
/*
esi == client_bits_cp
edi == d3d_bits_cp
ebp == dib_bits_cp or palette
[esp+36] == preserve dib_bits_cp while ebp is set to palette
[esp+32] == d3d_bits->pitch_in_bytes - width_in_bytes
[esp+8] == height count down
[esp+4] == width count down
[esp] == 64/4 count down
*/
align 8
L1:
// read src
movdqa xmm0, [ebp]
movdqa xmm1, 16[ebp]
movdqa xmm2, 32[ebp]
movdqa xmm3, 48[ebp]
// erase src
movdqa [ebp], xmm7
movdqa 16[ebp], xmm7
movdqa 32[ebp], xmm7
movdqa 48[ebp], xmm7
// mask = ( src == 0xFE ) ? 0xFF : 0x00
// dst = ( !mask & src ) | ( mask & dst )
movdqa xmm4, [esi] // read 0[dst]
movdqa xmm6, 16[esi]
movdqa xmm5, xmm0 // preserve [src]
pcmpeqb xmm0, xmm7 // generate mask for [src]
pand xmm4, xmm0 // mask & [dst]
pandn xmm0, xmm5 // !mask & [src]
movdqa xmm5, xmm1
pcmpeqb xmm1, xmm7
por xmm0, xmm4 // new [dst]
movdqa xmm4, 32[esi]
pand xmm6, xmm1
pandn xmm1, xmm5
movdqa xmm5, xmm2
pcmpeqb xmm2, xmm7
por xmm1, xmm6
movdqa xmm6, 48[esi]
pand xmm4, xmm2
pandn xmm2, xmm5
movdqa xmm5, xmm3
pcmpeqb xmm3, xmm7
por xmm2, xmm4
pand xmm6, xmm3
pandn xmm3, xmm5
add DWORD PTR 36[esp], 64 // move pointer ahead 64 bytes
por xmm3, xmm6
lea ebp, [bmi.palette]
// fastest?? to write back to cache line then movzx to r32?
movdqa [esi], xmm0
movdqa 16[esi], xmm1
movdqa 32[esi], xmm2
movdqa 48[esi], xmm3
mov DWORD PTR [esp],64/4
align 8
L2:
// read src image
movzx eax, byte ptr [esi]
movzx ebx, byte ptr 1[esi]
movzx ecx, byte ptr 2[esi]
movzx edx, byte ptr 3[esi]
add esi, 4
// read colors from palette
mov eax, [ebp+eax*4]
mov ebx, [ebp+ebx*4]
mov ecx, [ebp+ecx*4]
mov edx, [ebp+edx*4]
sub DWORD PTR [esp], 1 // loop cnt
// write colors to dst
mov [edi], eax
mov 4[edi], ebx
mov 8[edi], ecx
mov 12[edi], edx
//
lea edi, [edi+16]
jnz L2 // do 64 bytes
mov ebp, 36[esp]
sub DWORD PTR 4[esp], 1
jnz L1 // do 640 bytes
add edi, 32[esp] // padding at end of scanline
sub DWORD PTR 8[esp], 1
mov DWORD PTR 4[esp], 640/64
jnz L1 // do 480 scanline
add esp, 12
pop ebx
pop ebp
pop esi
pop edi
ret 8
}
}
/* composite for 386
// add 1, cmp < 0xFF, then gen mask with sbb?
cmp
sbb edx, edx ; = (b > a) ? 0xFFFFFFFF : 0
and edx, eax ;
add ebx, edx ;
*/
/*
// not tested
// memcchr( p, 0xFE, 640 )...
__declspec(naked) bool __cdecl is_scanline_dirty( BYTE* p )
{
pcmpeqw xmm7, xmm7
mov edx,4[esp]
psllw xmm7, 1
mov ecx, 640/64
packsswb xmm7, xmm7 // 0xFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFE
L1:
movdqa xmm0, [edx]
movdqa xmm1, 16[edx]
movdqa xmm2, 32[edx]
movdqa xmm3, 48[edx]
add edx,64
sub ecx,1
pcmpeqd xmm0, xmm7 // pcmpeqd should be just as fast as pxor ?
pcmpeqd xmm1, xmm7
pcmpeqd xmm2, xmm7
pcmpeqd xmm3, xmm7
por xmm0,xmm1
por xmm4,xmm2
por xmm5,xmm3
por xmm6,xmm0
jnz L1
por xmm4,xmm5
por xmm4,xmm6
pmovmskb eax,xmm4
ret
}
*/