source: trunk/src/ddraw/new/asmutil.asm@ 10367

Last change on this file since 10367 was 4322, checked in by mike, 25 years ago

Fixed many many bugs in the 8/16bit transblt functions.

File size: 40.7 KB
Line 
1; $Id: asmutil.asm,v 1.2 2000-09-25 18:53:45 mike Exp $
2
3;
4; asmutil.asm Color key bit blitting for DirectDraw
5;
6; Copyright 1998 Sander van Leeuwen
7; 1999 Markus Montkowski
8;
9; Project Odin Software License can be found in LICENSE.TXT
10;
11
12 NAME asmutil
13.586p
14.MMX
15
16CODE32 SEGMENT DWORD USE32 PUBLIC 'CODE'
17CODE32 ENDS
18DATA32 SEGMENT DWORD USE32 PUBLIC 'DATA'
19DATA32 ENDS
20CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
21CONST32 ENDS
22BSS32 SEGMENT DWORD USE32 PUBLIC 'BSS'
23BSS32 ENDS
24DGROUP GROUP CONST32, BSS32, DATA32
25 ASSUME CS:FLAT, DS:FLAT, SS:FLAT, ES:FLAT
26 DATA32 SEGMENT
27 DATA32 ENDS
28 BSS32 SEGMENT
29 BSS32 ENDS
30 CONST32 SEGMENT
31 CONST32 ENDS
32
33
34CODE32 SEGMENT
35
36 PUBLIC _BlitColorKey8
37
38; endpos = destbuf + blitlinesize;
39; while(destbuf < endpos) {
40; if(*srcbuf == colorkey) {
41; destbuf++;
42; }
43; else *destbuf++ = *srcbuf;
44; srcbuf++;
45; }
46; destbuf += (destscanlinesize-blitlinesize);
47; srcbuf += (srcscanlinesize-blitlinesize);
48;void BlitColorKey8(char *dest, char *src, ULONG key, ULONG linesize)
49_BlitColorKey8 PROC NEAR
50 push ebp
51 mov ebp, esp
52 push edi
53 push esi
54 push eax
55 push ebx
56 push ecx
57 push edx
58
59 mov edi, dword ptr [ebp+8] ;dest
60 mov esi, dword ptr [ebp+12] ;src
61 mov ecx, dword ptr [ebp+20] ;linesize
62 mov edx, dword ptr [ebp+16] ;colorkey
63
64 and ecx, 3
65 mov dh , dl
66 push ecx ;do the remaining bytes afterwards
67 mov eax, edx
68 shl edx, 16
69 and eax, 0000FFFFh
70 mov ecx, dword ptr [ebp+20] ;linesize
71 or edx, eax ; edx now contains the colorkey in each byte
72 shr ecx, 2 ;linesize in dwords
73 jz blitremain ; less then 4 bytes
74 jmp blitStart
75blitloop:
76 add esi, 4
77 add edi, 4
78blitStart:
79 mov ebx, dword ptr [esi]
80 mov eax, dword ptr [edi]
81 cmp ebx, edx ; All 4 bytes transparent?
82 jz TTTT
83 cmp bx, dx ; lower 2 bytes transparent?
84 jz XXTT
85 cmp bl, dl ; lower byte transparent?
86 jz XXOT
87 mov al, bl
88 cmp bh, dh ; upper Byte transparent then skip copy
89 jz XXTT
90XXOT:
91 mov ah, bh
92XXTT: ; handle upper 2 pixel
93 ror eax, 16
94 ror ebx, 16
95 cmp bx, dx
96 jz skipbyte4
97 cmp bl, dl
98 je skipbyte3
99 mov al, bl
100skipbyte3:
101 cmp bh, dh
102 je skipbyte4
103 mov ah, bh
104skipbyte4:
105 ror eax, 16
106 mov dword ptr [edi], eax
107TTTT:
108 dec ecx
109 jnz blitloop
110blitremain:
111 pop ecx
112 cmp ecx, 2
113 ja blit3
114 jz blit2
115 test ecx,ecx
116 jz endofblit
117 mov eax, dword ptr [esi]
118 mov ebx, dword ptr [edi]
119 cmp al,dl
120 jz endofblit
121 mov bl,al
122 mov dword ptr[edi],ebx
123 jmp endofblit
124
125blit3:
126 mov eax, dword ptr [esi]
127 mov ebx, dword ptr [edi]
128 cmp ax, dx
129 jz TTX
130 cmp ah, dh
131 jz TXX
132 mov bh, ah
133TXX:
134 cmp al, dl
135 jz TTX
136 mov bl, al
137TTX:
138 ror eax, 16
139 ror ebx, 16
140 cmp al, dl
141 jz Cpyback
142 mov bl, al
143Cpyback:
144 ror ebx, 16
145 mov dword ptr [edi], ebx
146 jmp endofblit
147
148blit2:
149 mov eax, dword ptr [esi]
150 mov ebx, dword ptr [edi]
151 cmp ax, dx ; both bytes transparent ?
152 jz endofblit
153 cmp ah, dh
154 jz TX
155 mov bh, ah
156TX:
157 cmp al, dl
158 jz OT
159 mov bl, al
160OT:
161 mov dword ptr[edi], ebx
162
163endofblit:
164 pop edx
165 pop ecx
166 pop ebx
167 pop eax
168 pop esi
169 pop edi
170 pop ebp
171 ret
172_BlitColorKey8 ENDP
173
174 PUBLIC _BlitColorKey16
175
176; endpos = destbuf + blitlinesize;
177; while(destbuf < endpos) {
178; if(*srcbuf == colorkey) {
179; destbuf++;
180; }
181; else *destbuf++ = *srcbuf;
182; srcbuf++;
183; }
184; destbuf += (destscanlinesize-blitlinesize);
185; srcbuf += (srcscanlinesize-blitlinesize);
186;void BlitColorKey16(char *dest, char *src, ULONG key, ULONG linesize)
187_BlitColorKey16 PROC NEAR
188 push ebp
189 mov ebp, esp
190 push edi
191 push esi
192 push eax
193 push ebx
194 push ecx
195 push edx
196
197 mov edi, dword ptr [ebp+8] ;dest
198 mov esi, dword ptr [ebp+12] ;src
199 mov ecx, dword ptr [ebp+20] ;linesize
200 mov edx, dword ptr [ebp+16] ;colorkey
201
202 mov eax, edx
203 shl edx, 16;
204 and eax, 0000FFFFh
205 or edx,eax ; create dwColorKey
206 shr ecx, 1 ; linesize in dwords
207 jz OnePixel ; FIXME: BUG if ecx was really 1!
208
209blitloop16:
210 mov eax, dword ptr [esi]
211 mov ebx, dword ptr [edi]
212 add esi, 4
213 cmp eax, edx ; are both pixel transparent?
214 je LoopUp ; Yes, then Jump to loopend
215 cmp ax, dx ; Is lower pixel transparent
216 je DrawOT ; Yes So We got OT (OPAQUE/Transparent
217 mov bx, ax ; No so copy the lower pixel
218DrawOT:
219 ror eax, 16 ;
220 cmp ax, dx ; Is higher pixel transparent
221 je CopyBack ;
222 mov bx, ax
223CopyBack:
224 ror ebx,16
225 mov dword ptr[edi], ebx ; copy back the result in ebx
226LoopUp:
227 mov ebx, dword ptr [ebp+20] ; V load this this in case we are done
228 add edi, 4 ; U
229OnePixel:
230 dec ecx
231 jnz blitloop16
232 test ebx, 1 ; Do we have an odd linesize
233 jz endofblit16
234 mov eax, dword ptr [esi]
235 mov ebx, dword ptr [edi]
236 cmp ax, dx
237 je endofblit16 ; last pixel is transparent
238 mov bx,ax ; No so copy the lower pixel
239 mov dword ptr [edi], ebx ; copy back the result in ebx
240
241endofblit16:
242 pop edx
243 pop ecx
244 pop ebx
245 pop eax
246 pop esi
247 pop edi
248 pop ebp
249 ret
250_BlitColorKey16 ENDP
251
252
253 PUBLIC _BlitColorKey8MMX
254; Now the same as BlitColorKey8 now with MMX
255;void BlitColorKey8MMX(char *dest, char *src, ULONG key, ULONG linesize)
256
257_BlitColorKey8MMX PROC NEAR
258 push ebp
259 mov ebp, esp
260 push edi
261 push esi
262 push eax
263 push ebx
264 push ecx
265 push edx
266
267 mov edx, [ebp+16] ;colorkey (in dl)
268 mov edi, [ebp+8] ;dest
269 mov esi, [ebp+12] ;src
270 mov ecx, dword ptr [ebp+20] ;linesize
271 mov dh,dl
272 mov eax,edx
273 shl edx,16
274 mov dx,ax
275 movd mm4,edx
276 movd mm5,edx
277 psllq mm4,32
278 por mm4,mm5
279 shr ecx,3
280 jz BltRemain8
281
282bltLoopMMX8:
283 movq mm0, [esi] ; get source qword
284 movq mm1, [edi] ; get dest qword
285 movq mm2, mm0 ; copy source
286 pcmpeqb mm0, mm4 ; create mask
287 pand mm1, mm0 ; mask dest
288 pandn mm0, mm2 ; NOT mask AND source
289 por mm1, mm0 ; or them
290 movq qword ptr [edi], mm1 ; write back result
291 add esi, 8
292 add edi, 8
293 dec ecx
294 jnz bltLoopMMX8
295BltRemain8:
296 mov eax, dword ptr [ebp+20];
297 and eax, 7
298 jmp ds:JmpTable[eax*4]
299
300align 4
301
302JmpTable:
303 dd offset cs:bltEndMMX8
304 dd offset cs:blt1MMX8
305 dd offset cs:blt2MMX8
306 dd offset cs:blt3MMX8
307 dd offset cs:blt4MMX8
308 dd offset cs:blt5MMX8
309 dd offset cs:blt6MMX8
310 dd offset cs:blt7MMX8
311align 2
312;
313; Maybe it would be faster for 7-5 to load a qword into mm0/mm1
314; but we might cross a page and so I guess this is saver
315;
316blt7MMX8:
317 movd mm0, dword ptr[esi]
318 mov ax, word ptr[esi+4]
319 mov bx, word ptr[edi+4]
320 movd mm1, dword ptr[edi]
321 psllq mm0, 32
322 shl eax, 8
323 shl ebx, 8
324 mov al, byte ptr[esi+6]
325 mov bl, byte ptr[edi+6]
326 movd mm5, eax
327 por mm0, mm5
328 psllq mm1, 32
329 movd mm6, ebx
330 por mm1, mm6
331 movq mm2, mm0 ; copy source
332 pcmpeqb mm0, mm4 ; create mask
333 pand mm1, mm0 ; mask dest
334 pandn mm0, mm2 ; mask source
335 por mm1, mm0 ; or them
336 movd eax, mm1
337 psrlq mm1,32
338 mov byte ptr[edi+6], al
339 movd dword ptr[edi], mm1
340 shr eax,8
341 mov word ptr[edi+4], ax
342 jmp bltEndMMX8
343
344blt6MMX8:
345 movd mm0, dword ptr[esi]
346 mov ax, word ptr[esi+4]
347 mov bx, word ptr[edi+4]
348 movd mm1, dword ptr[edi]
349 psllq mm0, 32
350 psllq mm1, 32
351 movd mm5, eax
352 por mm0, mm5
353 movd mm6, ebx
354 por mm1, mm6
355 movq mm2, mm0 ; copy source
356 pcmpeqb mm0, mm4 ; create mask
357 pand mm1, mm0 ; mask dest
358 pandn mm0, mm2 ; mask source
359 por mm1, mm0 ; or them
360 movd eax, mm1
361 psrlq mm1,32
362 mov word ptr[edi+4], ax
363 movd dword ptr[edi], mm1
364 jmp bltEndMMX8
365
366blt5MMX8:
367 movd mm0, dword ptr[esi]
368 movd mm1, dword ptr[edi]
369 movq mm2, mm0 ; copy source
370 pcmpeqb mm0, mm4 ; create mask
371 pand mm1, mm0 ; mask dest
372 add esi, 4
373 pandn mm0, mm2 ; mask source
374 por mm1, mm0 ; or them
375 movd dword ptr[edi], mm1
376 add edi,4
377 jmp blt1MMX8
378
379blt4MMX8:
380 movd mm0, dword ptr[esi]
381 movd mm1, dword ptr[edi]
382 movq mm2,mm0 ; copy source
383 pcmpeqb mm0,mm4 ; create mask
384 pand mm1,mm0 ; mask dest
385 pandn mm0,mm2 ; mask source
386 por mm1,mm0 ; or them
387 movd dword ptr [edi], mm1 ; write back result
388 jmp bltEndMMX8
389;
390; loading a dword into mm0/mm1 might be faster for 3-2...
391;
392blt3MMX8:
393 mov ax, word ptr [esi]
394 mov bx, word ptr [edi]
395 shl eax,8 ; 3 Pixel left to blit
396 shl ebx,8 ; so shift the buffers
397 mov al,byte ptr[esi+2]
398 mov bl,byte ptr[edi+2]
399 movd mm0,eax
400 movd mm1,ebx
401 movq mm2,mm0
402 pcmpeqb mm0,mm4 ; create mask
403 pand mm1,mm0 ; mask dest
404 pandn mm0,mm2 ; mask source
405 por mm1,mm0 ; or them
406 movd eax, mm1 ; write back result
407 mov byte ptr[edi+2], al
408 shr eax, 8
409 mov word ptr[edi], ax
410 jmp bltEndMMX8
411
412blt2MMX8:
413 mov al, byte ptr [esi]
414 cmp al, dl
415 je blt1aMMX8
416 mov byte ptr [edi], al
417; mov bl, byte ptr [esi+1]
418; cmp bl, dl
419; je bltEndMMX8
420; mov byte ptr [edi+1], bl
421; jmp bltEndMMX8
422blt1aMMX8:
423 add esi, 1
424 add edi, 1
425blt1MMX8:
426 mov al, byte ptr [esi]
427 cmp al, dl
428 je bltEndMMX8
429 mov byte ptr [edi], al
430
431bltEndMMX8:
432 pop edx
433 pop ecx
434 pop ebx
435 pop eax
436 pop esi
437 pop edi
438 pop ebp
439 ret
440_BlitColorKey8MMX ENDP
441
442
443 PUBLIC _BlitColorKey16MMX
444; Now the same as BlitColorKey16 now with MMX
445;void BlitColorKey16MMX(char *dest, char *src, ULONG key, ULONG linesize)
446_BlitColorKey16MMX PROC NEAR
447 push ebp
448 mov ebp, esp
449 push edi
450 push esi
451 push ecx
452 push edx
453
454 mov edx, dword ptr [ebp+16] ; colorkey
455 mov edi, dword ptr [ebp+8] ; dest
456 mov ecx, dword ptr [ebp+20] ; linesize in pixel!
457
458 mov eax, edx
459 shl edx, 16;
460 mov dx, ax ; extend colorKey to 32 bit
461
462 mov esi, dword ptr [ebp+12] ; src
463 mov eax, ecx ; copy of linesize
464 shr ecx,2
465 movd mm4, edx
466 jz BltRemain16
467
468 movd mm5,edx ; Extend colorkey to 64 Bit
469 psllq mm4,32
470 por mm4,mm5
471
472bltLoopMMX16:
473 movq mm0,qword ptr [esi] ; get source dword
474 movq mm1,qword ptr [edi] ; get destination
475 movq mm2,mm0 ; copy source
476 pcmpeqw mm0,mm4 ; create mask in mm0
477 pand mm1,mm0 ; mask dest
478 add esi, 8 ; point to next source qword
479 pandn mm0,mm2 ; NOT mask AND source
480 por mm1,mm0 ; or them
481 movq qword ptr [edi], mm1 ; write back result
482 add edi, 8
483 dec ecx
484 jnz bltLoopMMX16
485
486BltRemain16:
487 and eax,3
488 jmp ds:JumpTable[eax*4]
489
490align 4
491
492JumpTable:
493 dd offset cs:bltEndMMX16
494 dd offset cs:blt1MMX16
495 dd offset cs:blt2MMX16
496 dd offset cs:blt3MMX16
497align 2
498
499blt3MMX16:
500 movd mm0, dword ptr[esi]
501 movd mm1, dword ptr[edi]
502 movq mm2,mm0 ; copy source
503 add esi,4
504 pcmpeqw mm0,mm4 ; create mask 16 bit
505 pand mm1,mm0 ; mask dest
506 pandn mm0,mm2 ; mask source
507 add edi,4
508 por mm1,mm0 ; or them
509 movd dword ptr[edi-4], mm1
510 jmp blt1MMX16
511
512blt2MMX16:
513 movd mm0, dword ptr[esi]
514 movd mm1, dword ptr[edi]
515 movq mm2,mm0 ; copy source
516 pcmpeqw mm0,mm4 ; create mask 16 bit
517 pand mm1,mm0 ; mask dest
518 pandn mm0,mm2 ; mask source
519 por mm1,mm0 ; or them
520 movd dword ptr [edi], mm1 ; write back result
521 jmp bltEndMMX16
522
523blt1MMX16:
524 mov ax, word ptr [esi] ; cmov ?
525 cmp ax,dx
526 je bltEndMMX16
527 mov word ptr [edi], ax
528
529
530bltEndMMX16:
531 pop edx
532 pop ecx
533 pop esi
534 pop edi
535 pop ebp
536 ret
537_BlitColorKey16MMX ENDP
538
539;
540; extern void __cdecl BltTransSrcRecMMX(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
541; ULONG ulDestPitch, ULONG ulSrcPitch, ULONG ulTransCol);
542
543 PUBLIC _BltTransSrcRecMMX
544_BltTransSrcRecMMX PROC NEAR
545 push ebp
546 mov ebp, esp
547 push edi
548 push esi
549 push eax
550 push ebx
551 push ecx
552 push edx
553
554EndTSBlt:
555 pop edx
556 pop ecx
557 pop ebx
558 pop eax
559 pop esi
560 pop edi
561 pop ebp
562 ret
563
564_BltTransSrcRecMMX ENDP
565
566
567 PUBLIC _BltRec
568;
569; extern void __cdecl BltRec(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
570; ULONG ulDestPitch, ULONG ulSrcPitch);
571_BltRec PROC NEAR
572 push ebp
573 mov ebp, esp
574 push edi
575 push esi
576 push eax
577 push ebx
578 push ecx
579 push edx
580
581 mov ecx, dword ptr [ebp+16] ; U ulBltWidth
582 mov esi, dword ptr [ebp+12] ; V src
583 mov ebx, ecx ; U
584 mov edx, dword ptr [ebp+20] ; V ulBltHeight
585 and ebx, 0Fh ; U ebx = # of bytes < 16
586 mov edi, dword ptr [ebp+8] ; V dest
587 cmp edx, 0
588 jz BltRecEnd ; height is zero so done
589 shr ecx, 4 ; U
590 jz SmallBlt ; Small (width < 16) rectangle done in special case
591 test ebx, ebx
592 jnz ComplexBlt ; ulBltWidth mod 16 is not 0
593
594;
595; Blitwidth is an multiple of 16
596;
597 mov ebx, dword ptr [ebp+24] ; ulDestPitch
598 mov eax, dword ptr [ebp+28] ; ulSrcPitch
599 sub ebx, dword ptr [ebp+16] ; adjust both widths
600 sub eax, dword ptr [ebp+16]
601 mov dword ptr [ebp+28], eax ; store adjusted SrcPitch
602 mov eax, ecx
603LineLoop:
604 FLD QWORD PTR [ESI]
605 FLD QWORD PTR [ESI+8]
606 FXCH
607 FSTP QWORD PTR [EDI]
608 FSTP QWORD PTR [EDI+8]
609 ADD ESI,16
610 ADD EDI,16
611 dec eax
612 jz LineLoop
613 dec edx
614 jz BltRecEnd
615 add ESI, dword ptr[ebp+28]
616 add EDI, ebx
617 mov eax, ecx
618 jmp LineLoop
619
620SmallBlt:
621 mov eax, dword ptr [ebp+28] ; ulSrcPitch
622 mov ecx, dword ptr [ebp+24] ; ulDestPitch
623 jmp ds:SmallJmpTable[ebx*4]
624SmallJmpTable:
625 dd cs:offset BltRecEnd ; BlitWidth is 0 done
626 dd cs:offset Rec1
627 dd cs:offset Rec2
628 dd cs:offset Rec3
629 dd cs:offset Rec4
630 dd cs:offset Rec5
631 dd cs:offset Rec6
632 dd cs:offset Rec7
633 dd cs:offset Rec8
634 dd cs:offset Rec9
635 dd cs:offset Rec10
636 dd cs:offset Rec11
637 dd cs:offset Rec12
638 dd cs:offset Rec13
639 dd cs:offset Rec14
640 dd cs:offset Rec15
641
642;One Pixel wide
643
644Rec1:
645 cmp edx,4
646 jb Rec1_0123
647 mov bl, byte ptr [esi]
648 add esi,eax
649 mov byte ptr [edi], bl
650 add edi,ecx
651 mov bl, byte ptr [esi]
652 add esi,eax
653 mov byte ptr [edi], bl
654 add edi,ecx
655 mov bl, byte ptr [esi]
656 add esi,eax
657 mov byte ptr [edi], bl
658 add edi,ecx
659 mov bl, byte ptr [esi]
660 add esi,eax
661 mov byte ptr [edi], bl
662 add edi,ecx
663 sub edx,4
664 jnz Rec1
665 jmp BltRecEnd
666Rec1_0123:
667 cmp edx,2
668 jz Rec1_2
669 jb Rec1_01
670; Must be 3 lines left
671 mov bl, byte ptr [esi]
672 add esi,eax
673 mov byte ptr [edi], bl
674 add edi,ecx
675 mov bl, byte ptr [esi]
676 add esi,eax
677 mov byte ptr [edi], bl
678 add edi,ecx
679 mov bl, byte ptr [esi]
680 mov byte ptr [edi], bl
681 jmp BltRecEnd
682Rec1_2:
683 mov bl, byte ptr [esi]
684 add esi,eax
685 mov byte ptr [edi], bl
686 add edi,ecx
687 mov bl, byte ptr [esi]
688 mov byte ptr [edi], bl
689 jmp BltRecEnd
690Rec1_01:
691 test edx,edx
692 jz BltRecEnd
693 mov bl, byte ptr [esi]
694 mov byte ptr [edi], bl
695 jmp BltRecEnd
696
697;2 Pixel Wide
698
699Rec2:
700 cmp edx,4
701 jb Rec2_0123
702 mov bx, word ptr [esi]
703 add esi,eax
704 mov word ptr [edi], bx
705 add edi,ecx
706 mov bx, word ptr [esi]
707 add esi,eax
708 mov word ptr [edi], bx
709 add edi,ecx
710 mov bx, word ptr [esi]
711 add esi,eax
712 mov word ptr [edi], bx
713 add edi,ecx
714 mov bx, word ptr [esi]
715 add esi,eax
716 mov word ptr [edi], bx
717 add edi,ecx
718 sub edx, 4
719 jnz Rec2
720 jmp BltRecEnd
721
722Rec2_0123:
723 cmp edx,2
724 jz Rec2_2
725 jb Rec2_01
726;3 lines left
727 mov bx, word ptr [esi]
728 add esi,eax
729 mov word ptr [edi], bx
730 add edi,ecx
731 mov bx, word ptr [esi]
732 add esi,eax
733 mov word ptr [edi], bx
734 add edi,ecx
735 mov bx, word ptr [esi]
736 mov word ptr [edi], bx
737 jmp BltRecEnd
738Rec2_2:
739 mov bx, word ptr [esi]
740 add esi,eax
741 mov word ptr [edi], bx
742 add edi,ecx
743 mov bx, word ptr [esi]
744 mov word ptr [edi], bx
745 jmp BltRecEnd
746Rec2_01:
747 test edx,edx
748 jz BltRecEnd
749 mov bx, word ptr [esi]
750 mov word ptr [edi], bx
751 jmp BltRecEnd
752
753; 3 Pixel Wide must check if it's better to read 4 bytes as
754; Intel might stall on reading 2 and 1 byte, but this takes more care as we
755; could create a pagefault on the last 3 pixel
756
757Rec3:
758 cmp edx,4
759 jb Rec3_0123
760 push edx
761 mov bx, word ptr [esi]
762 mov dl, byte ptr [esi+2]
763 add esi,eax
764 mov word ptr [edi], bx
765 mov byte ptr [edi+2], dl
766 add edi,ecx
767 mov bx, word ptr [esi]
768 mov dl, byte ptr [esi+2]
769 add esi,eax
770 mov word ptr [edi], bx
771 mov byte ptr [edi+2], dl
772 add edi,ecx
773 mov bx, word ptr [esi]
774 mov dl, byte ptr [esi+2]
775 add esi,eax
776 mov word ptr [edi], bx
777 mov byte ptr [edi+2], dl
778 add edi,ecx
779 mov bx, word ptr [esi]
780 mov dl, byte ptr [esi+2]
781 add esi,eax
782 mov word ptr [edi], bx
783 mov byte ptr [edi+2], dl
784 add edi,ecx
785 pop edx
786 sub edx,4
787 jnz Rec3
788 jmp BltRecEnd
789
790Rec3_0123:
791 cmp edx,2
792 jz Rec3_2
793 jb Rec3_01
794; Must be 3 lines left
795 mov bx, word ptr [esi]
796 mov dl, byte ptr [esi+2]
797 add esi,eax
798 mov word ptr [edi], bx
799 mov byte ptr [edi+2], dl
800 add edi,ecx
801 mov bx, word ptr [esi]
802 mov dl, byte ptr [esi+2]
803 add esi,eax
804 mov word ptr [edi], bx
805 mov byte ptr [edi+2], dl
806 add edi,ecx
807 mov bx, word ptr [esi]
808 mov dl, byte ptr [esi+2]
809 mov word ptr [edi], bx
810 mov byte ptr [edi+2], dl
811 jmp BltRecEnd
812Rec3_2:
813 mov bx, word ptr [esi]
814 mov dl, byte ptr [esi+2]
815 add esi,eax
816 mov word ptr [edi], bx
817 mov byte ptr [edi+2], dl
818 add edi,ecx
819 mov bx, word ptr [esi]
820 mov dl, byte ptr [esi+2]
821 mov word ptr [edi], bx
822 mov byte ptr [edi+2], dl
823 jmp BltRecEnd
824Rec3_01:
825 test edx,edx
826 jz BltRecEnd
827 mov bx, word ptr [esi]
828 mov dl, byte ptr [esi+2]
829 mov word ptr [edi], bx
830 mov byte ptr [edi+2], dl
831 jmp BltRecEnd
832
833; 4 Pixel Wide
834
835Rec4:
836 cmp edx,4
837 jb Rec4_0123
838 mov ebx, dword ptr [esi]
839 add esi,eax
840 mov dword ptr [edi], ebx
841 add edi,ecx
842 mov ebx, dword ptr [esi]
843 add esi,eax
844 mov dword ptr [edi], ebx
845 add edi,ecx
846 mov ebx, dword ptr [esi]
847 add esi,eax
848 mov dword ptr [edi], ebx
849 add edi,ecx
850 mov ebx, dword ptr [esi]
851 add esi,eax
852 mov dword ptr [edi], ebx
853 add edi,ecx
854 sub edx ,4
855 jnz Rec4
856 jmp BltRecEnd
857
858Rec4_0123:
859 cmp edx,2
860 jz Rec2_2
861 jb Rec2_01
862;3 lines left
863 mov ebx, dword ptr [esi]
864 add esi,eax
865 mov dword ptr [edi], ebx
866 add edi,ecx
867 mov ebx, dword ptr [esi]
868 add esi,eax
869 mov dword ptr [edi], ebx
870 add edi,ecx
871 mov ebx, dword ptr [esi]
872 mov dword ptr [edi], ebx
873 jmp BltRecEnd
874Rec4_2:
875 mov ebx, dword ptr [esi]
876 add esi,eax
877 mov dword ptr [edi], ebx
878 add edi,ecx
879 mov ebx, dword ptr [esi]
880 mov dword ptr [edi], ebx
881 jmp BltRecEnd
882Rec4_01:
883 test edx,edx
884 jz BltRecEnd
885 mov ebx, dword ptr [esi]
886 mov dword ptr [edi], ebx
887 jmp BltRecEnd
888
889; 5 Pixel Wide
890
891Rec5:
892 cmp edx,4
893 jb Rec5_0123
894 push edx
895 mov ebx, dword ptr [esi]
896 mov dl, byte ptr [esi+4]
897 add esi,eax
898 mov dword ptr [edi], ebx
899 mov byte ptr [edi+4], dl
900 add edi,ecx
901 mov ebx, dword ptr [esi]
902 mov dl, byte ptr [esi+4]
903 add esi,eax
904 mov dword ptr [edi], ebx
905 mov byte ptr [edi+4], dl
906 add edi,ecx
907 mov ebx, dword ptr [esi]
908 mov dl, byte ptr [esi+4]
909 add esi,eax
910 mov dword ptr [edi], ebx
911 mov byte ptr [edi+4], dl
912 add edi,ecx
913 mov ebx, dword ptr [esi]
914 mov dl, byte ptr [esi+4]
915 add esi,eax
916 mov dword ptr [edi], ebx
917 mov byte ptr [edi+4], dl
918 add edi,ecx
919 pop edx
920 sub edx ,4
921 jnz Rec5
922 jmp BltRecEnd
923Rec5_0123:
924 cmp edx,2
925 jz Rec5_2
926 jb Rec5_01
927; Must be 3 lines left
928 mov ebx, dword ptr [esi]
929 mov dl, byte ptr [esi+4]
930 add esi,eax
931 mov dword ptr [edi], ebx
932 mov byte ptr [edi+4], dl
933 add edi,ecx
934 mov ebx, dword ptr [esi]
935 mov dl, byte ptr [esi+4]
936 add esi,eax
937 mov dword ptr [edi], ebx
938 mov byte ptr [edi+4], dl
939 add edi,ecx
940 mov ebx, dword ptr [esi]
941 mov dl, byte ptr [esi+4]
942 mov dword ptr [edi], ebx
943 mov byte ptr [edi+4], dl
944 jmp BltRecEnd
945Rec5_2:
946 mov ebx, dword ptr [esi]
947 mov dl, byte ptr [esi+4]
948 add esi,eax
949 mov dword ptr [edi], ebx
950 mov byte ptr [edi+4], dl
951 add edi,ecx
952 mov ebx, dword ptr [esi]
953 mov dl, byte ptr [esi+4]
954 mov dword ptr [edi], ebx
955 mov byte ptr [edi+4], dl
956 jmp BltRecEnd
957Rec5_01:
958 test edx,edx
959 jz BltRecEnd
960 mov ebx, dword ptr [esi]
961 mov dl, byte ptr [esi+4]
962 mov dword ptr [edi], ebx
963 mov byte ptr [edi+4], dl
964 jmp BltRecEnd
965
966; 6 Pixel Wide
967
968Rec6:
969 cmp edx,4
970 jb Rec6_0123
971 push edx
972 mov ebx, dword ptr [esi]
973 mov dx, word ptr [esi+4]
974 add esi,eax
975 mov dword ptr [edi], ebx
976 mov word ptr [edi+4], dx
977 add edi,ecx
978 mov ebx, dword ptr [esi]
979 mov dx, word ptr [esi+4]
980 add esi,eax
981 mov dword ptr [edi], ebx
982 mov word ptr [edi+4], dx
983 add edi,ecx
984 mov ebx, dword ptr [esi]
985 mov dx, word ptr [esi+4]
986 add esi,eax
987 mov dword ptr [edi], ebx
988 mov word ptr [edi+4], dx
989 add edi,ecx
990 mov ebx, dword ptr [esi]
991 mov dx, word ptr [esi+4]
992 add esi,eax
993 mov dword ptr [edi], ebx
994 mov word ptr [edi+4], dx
995 add edi,ecx
996 pop edx
997 sub edx ,4
998 jnz Rec6
999 jmp BltRecEnd
1000Rec6_0123:
1001 cmp edx,2
1002 jz Rec6_2
1003 jb Rec6_01
1004; Must be 3 lines left
1005 mov ebx, dword ptr [esi]
1006 mov dx, word ptr [esi+4]
1007 add esi,eax
1008 mov dword ptr [edi], ebx
1009 mov word ptr [edi+4], dx
1010 add edi,ecx
1011 mov ebx, dword ptr [esi]
1012 mov dx, word ptr [esi+4]
1013 add esi,eax
1014 mov dword ptr [edi], ebx
1015 mov word ptr [edi+4], dx
1016 add edi,ecx
1017 mov ebx, dword ptr [esi]
1018 mov dx, word ptr [esi+4]
1019 mov dword ptr [edi], ebx
1020 mov word ptr [edi+4], dx
1021 jmp BltRecEnd
1022Rec6_2:
1023 mov ebx, dword ptr [esi]
1024 mov dx, word ptr [esi+4]
1025 add esi,eax
1026 mov dword ptr [edi], ebx
1027 mov word ptr [edi+4], dx
1028 add edi,ecx
1029 mov ebx, dword ptr [esi]
1030 mov dx, word ptr [esi+4]
1031 mov dword ptr [edi], ebx
1032 mov word ptr [edi+4], dx
1033 jmp BltRecEnd
1034Rec6_01:
1035 test edx,edx
1036 jz BltRecEnd
1037 mov ebx, dword ptr [esi]
1038 mov dx, word ptr [esi+4]
1039 mov dword ptr [edi], ebx
1040 mov word ptr [edi+4], dx
1041 jmp BltRecEnd
1042
1043; 7 Pixel Wide
1044
1045Rec7:
1046 cmp edx,4
1047 jb Rec6_0123
1048 push edx
1049 mov ebx, dword ptr [esi]
1050 mov dx, word ptr [esi+4]
1051 mov dword ptr [edi], ebx
1052 mov word ptr [edi+4], dx
1053 mov bl, byte ptr[esi+6]
1054 add esi,eax
1055 mov byte ptr[edi+6],bl
1056 add edi,ecx
1057 xor ebx,ebx ; clear ebx to avoid stalls
1058 mov ebx, dword ptr [esi]
1059 mov dx, word ptr [esi+4]
1060 mov dword ptr [edi], ebx
1061 mov word ptr [edi+4], dx
1062 mov bl, byte ptr[esi+6]
1063 add esi,eax
1064 mov byte ptr[edi+6],bl
1065 add edi,ecx
1066 xor ebx,ebx ; clear ebx to avoid stalls
1067 mov ebx, dword ptr [esi]
1068 mov dx, word ptr [esi+4]
1069 mov dword ptr [edi], ebx
1070 mov word ptr [edi+4], dx
1071 mov bl, byte ptr[esi+6]
1072 add esi,eax
1073 mov byte ptr[edi+6],bl
1074 add edi,ecx
1075 xor ebx,ebx ; clear ebx to avoid stalls
1076 mov ebx, dword ptr [esi]
1077 mov dx, word ptr [esi+4]
1078 mov dword ptr [edi], ebx
1079 mov word ptr [edi+4], dx
1080 mov bl, byte ptr[esi+6]
1081 add esi,eax
1082 mov byte ptr[edi+6],bl
1083 add edi,ecx
1084 xor ebx,ebx ; clear ebx to avoid stalls
1085 pop edx
1086 sub edx ,4
1087 jnz Rec7
1088 jmp BltRecEnd
1089Rec7_0123:
1090 cmp edx,2
1091 jz Rec7_2
1092 jb Rec7_01
1093; Must be 3 lines left
1094 mov ebx, dword ptr [esi]
1095 mov dx, word ptr [esi+4]
1096 mov dword ptr [edi], ebx
1097 mov word ptr [edi+4], dx
1098 mov bl, byte ptr[esi+6]
1099 add esi,eax
1100 mov byte ptr[edi+6],bl
1101 add edi,ecx
1102 xor ebx,ebx ; clear ebx to avoid stalls
1103 mov ebx, dword ptr [esi]
1104 mov dx, word ptr [esi+4]
1105 mov dword ptr [edi], ebx
1106 mov word ptr [edi+4], dx
1107 mov bl, byte ptr[esi+6]
1108 add esi,eax
1109 mov byte ptr[edi+6],bl
1110 add edi,ecx
1111 xor ebx,ebx ; clear ebx to avoid stalls
1112 mov ebx, dword ptr [esi]
1113 mov dx, word ptr [esi+4]
1114 mov dword ptr [edi], ebx
1115 mov word ptr [edi+4], dx
1116 mov bl, byte ptr[esi+6]
1117 mov byte ptr[edi+6],bl
1118 jmp BltRecEnd
1119Rec7_2:
1120 mov ebx, dword ptr [esi]
1121 mov dx, word ptr [esi+4]
1122 mov dword ptr [edi], ebx
1123 mov word ptr [edi+4], dx
1124 mov bl, byte ptr[esi+6]
1125 add esi,eax
1126 mov byte ptr[edi+6],bl
1127 add edi,ecx
1128 xor ebx,ebx ; clear ebx to avoid stalls
1129 mov ebx, dword ptr [esi]
1130 mov dx, word ptr [esi+4]
1131 mov dword ptr [edi], ebx
1132 mov word ptr [edi+4], dx
1133 mov bl, byte ptr[esi+6]
1134 mov byte ptr[edi+6],bl
1135 jmp BltRecEnd
1136Rec7_01:
1137 test edx,edx
1138 jz BltRecEnd
1139 mov ebx, dword ptr [esi]
1140 mov dx, word ptr [esi+4]
1141 mov dword ptr [edi], ebx
1142 mov word ptr [edi+4], dx
1143 mov bl, byte ptr[esi+6]
1144 mov byte ptr[edi+6],bl
1145 jmp BltRecEnd
1146
1147; 8 Pixel Wide
1148
1149Rec8:
1150 cmp edx,4
1151 jb Rec8_0123
1152 push edx
1153 mov ebx, dword ptr [esi]
1154 mov edx, dword ptr [esi+4]
1155 mov dword ptr [edi], ebx
1156 mov dword ptr [edi+4], edx
1157 add esi,eax
1158 add edi,ecx
1159 mov ebx, dword ptr [esi]
1160 mov edx, dword ptr [esi+4]
1161 mov dword ptr [edi], ebx
1162 mov dword ptr [edi+4], edx
1163 add esi,eax
1164 add edi,ecx
1165 mov ebx, dword ptr [esi]
1166 mov edx, dword ptr [esi+4]
1167 mov dword ptr [edi], ebx
1168 mov dword ptr [edi+4], edx
1169 add esi,eax
1170 add edi,ecx
1171 mov ebx, dword ptr [esi]
1172 mov edx, dword ptr [esi+4]
1173 mov dword ptr [edi], ebx
1174 mov dword ptr [edi+4], edx
1175 add esi,eax
1176 add edi,ecx
1177 pop edx
1178 sub edx ,4
1179 jnz Rec8
1180 jmp BltRecEnd
1181
1182Rec8_0123:
1183 cmp edx,2
1184 jz Rec8_2
1185 jb Rec8_01
1186;3 lines left
1187 mov ebx, dword ptr [esi]
1188 mov edx, dword ptr [esi+4]
1189 mov dword ptr [edi], ebx
1190 mov dword ptr [edi+4], edx
1191 add esi,eax
1192 add edi,ecx
1193 mov ebx, dword ptr [esi]
1194 mov edx, dword ptr [esi+4]
1195 mov dword ptr [edi], ebx
1196 mov dword ptr [edi+4], edx
1197 add esi,eax
1198 add edi,ecx
1199 mov ebx, dword ptr [esi]
1200 mov edx, dword ptr [esi+4]
1201 mov dword ptr [edi], ebx
1202 mov dword ptr [edi+4], edx
1203 jmp BltRecEnd
1204Rec8_2:
1205 mov ebx, dword ptr [esi]
1206 mov edx, dword ptr [esi+4]
1207 mov dword ptr [edi], ebx
1208 mov dword ptr [edi+4], edx
1209 add esi,eax
1210 add edi,ecx
1211 mov ebx, dword ptr [esi]
1212 mov edx, dword ptr [esi+4]
1213 mov dword ptr [edi], ebx
1214 mov dword ptr [edi+4], edx
1215 jmp BltRecEnd
1216Rec8_01:
1217 test edx,edx
1218 jz BltRecEnd
1219 mov ebx, dword ptr [esi]
1220 mov edx, dword ptr [esi+4]
1221 mov dword ptr [edi], ebx
1222 mov dword ptr [edi+4], edx
1223 jmp BltRecEnd
1224
1225; 9 Pixel Wide
1226
1227Rec9:
1228 cmp edx,4
1229 jb Rec9_0123
1230 push edx
1231 FLD QWORD PTR [ESI]
1232 mov bl, byte ptr [esi+8]
1233 FSTP QWORD PTR [EDI]
1234 mov byte ptr [edi+8], bl
1235 add esi,eax
1236 add edi,ecx
1237 FLD QWORD PTR [ESI]
1238 mov bl, byte ptr [esi+8]
1239 FSTP QWORD PTR [EDI]
1240 mov byte ptr [edi+8], bl
1241 add esi,eax
1242 add edi,ecx
1243 FLD QWORD PTR [ESI]
1244 mov bl, byte ptr [esi+8]
1245 FSTP QWORD PTR [EDI]
1246 mov byte ptr [edi+8], bl
1247 add esi,eax
1248 add edi,ecx
1249 FLD QWORD PTR [ESI]
1250 mov bl, byte ptr [esi+8]
1251 FSTP QWORD PTR [EDI]
1252 mov byte ptr [edi+8], bl
1253 add esi,eax
1254 add edi,ecx
1255 pop edx
1256 sub edx ,4
1257 jnz Rec9
1258 jmp BltRecEnd
1259
1260Rec9_0123:
1261 cmp edx,2
1262 jz Rec9_2
1263 jb Rec9_01
1264;3 lines left
1265 FLD QWORD PTR [ESI]
1266 mov bl, byte ptr [esi+8]
1267 FSTP QWORD PTR [EDI]
1268 mov byte ptr [edi+8], bl
1269 add esi,eax
1270 add edi,ecx
1271 FLD QWORD PTR [ESI]
1272 mov bl, byte ptr [esi+8]
1273 FSTP QWORD PTR [EDI]
1274 mov byte ptr [edi+8], bl
1275 add esi,eax
1276 add edi,ecx
1277 FLD QWORD PTR [ESI]
1278 mov bl, byte ptr [esi+8]
1279 FSTP QWORD PTR [EDI]
1280 mov byte ptr [edi+8], bl
1281 jmp BltRecEnd
1282Rec9_2:
1283 FLD QWORD PTR [ESI]
1284 mov bl, byte ptr [esi+8]
1285 FSTP QWORD PTR [EDI]
1286 mov byte ptr [edi+8], bl
1287 add esi,eax
1288 add edi,ecx
1289 FLD QWORD PTR [ESI]
1290 mov bl, byte ptr [esi+8]
1291 FSTP QWORD PTR [EDI]
1292 mov byte ptr [edi+8], bl
1293 jmp BltRecEnd
1294Rec9_01:
1295 test edx,edx
1296 jz BltRecEnd
1297 FLD QWORD PTR [ESI]
1298 mov bl, byte ptr [esi+8]
1299 FSTP QWORD PTR [EDI]
1300 mov byte ptr [edi+8], bl
1301 jmp BltRecEnd
1302
1303; 10 Pixel Wide
1304
1305Rec10:
1306 cmp edx,4
1307 jb Rec10_0123
1308 FLD QWORD PTR [ESI]
1309 mov bx, word ptr [esi+8]
1310 FSTP QWORD PTR [EDI]
1311 mov word ptr [edi+8], bx
1312 add esi,eax
1313 add edi,ecx
1314 FLD QWORD PTR [ESI]
1315 mov bx, word ptr [esi+8]
1316 FSTP QWORD PTR [EDI]
1317 mov word ptr [edi+8], bx
1318 add esi,eax
1319 add edi,ecx
1320 FLD QWORD PTR [ESI]
1321 mov bx, word ptr [esi+8]
1322 FSTP QWORD PTR [EDI]
1323 mov word ptr [edi+8], bx
1324 add esi,eax
1325 add edi,ecx
1326 FLD QWORD PTR [ESI]
1327 mov bx, word ptr [esi+8]
1328 FSTP QWORD PTR [EDI]
1329 mov word ptr [edi+8], bx
1330 add esi,eax
1331 add edi,ecx
1332 sub edx ,4
1333 jnz Rec10
1334 jmp BltRecEnd
1335
1336Rec10_0123:
1337 cmp edx,2
1338 jz Rec10_2
1339 jb Rec10_01
1340;3 lines left
1341 FLD QWORD PTR [ESI]
1342 mov bx, word ptr [esi+8]
1343 FSTP QWORD PTR [EDI]
1344 mov word ptr [edi+8], bx
1345 add esi,eax
1346 add edi,ecx
1347 FLD QWORD PTR [ESI]
1348 mov bx, word ptr [esi+8]
1349 FSTP QWORD PTR [EDI]
1350 mov word ptr [edi+8], bx
1351 add esi,eax
1352 add edi,ecx
1353 FLD QWORD PTR [ESI]
1354 mov bx, word ptr [esi+8]
1355 FSTP QWORD PTR [EDI]
1356 mov word ptr [edi+8], bx
1357 jmp BltRecEnd
1358Rec10_2:
1359 FLD QWORD PTR [ESI]
1360 mov bx, word ptr [esi+8]
1361 FSTP QWORD PTR [EDI]
1362 mov word ptr [edi+8], bx
1363 add esi,eax
1364 add edi,ecx
1365 FLD QWORD PTR [ESI]
1366 mov bx, word ptr [esi+8]
1367 FSTP QWORD PTR [EDI]
1368 mov word ptr [edi+8], bx
1369 jmp BltRecEnd
1370Rec10_01:
1371 test edx,edx
1372 jz BltRecEnd
1373 FLD QWORD PTR [ESI]
1374 mov bx, word ptr [esi+8]
1375 FSTP QWORD PTR [EDI]
1376 mov word ptr [edi+8], bx
1377 jmp BltRecEnd
1378
1379; 11 Pixel Wide
1380
1381Rec11:
1382 cmp edx,4
1383 jb Rec11_0123
1384 push edx
1385 FLD QWORD PTR [ESI]
1386 mov bx, word ptr [esi+8]
1387 mov dl, byte ptr [esi+10]
1388 FSTP QWORD PTR [EDI]
1389 mov word ptr [edi+8], bx
1390 mov byte ptr [edi+10], dl
1391 add esi,eax
1392 add edi,ecx
1393 FLD QWORD PTR [ESI]
1394 mov bx, word ptr [esi+8]
1395 mov dl, byte ptr [esi+10]
1396 FSTP QWORD PTR [EDI]
1397 mov word ptr [edi+8], bx
1398 mov byte ptr [edi+10], dl
1399 add esi,eax
1400 add edi,ecx
1401 FLD QWORD PTR [ESI]
1402 mov bx, word ptr [esi+8]
1403 mov dl, byte ptr [esi+10]
1404 FSTP QWORD PTR [EDI]
1405 mov word ptr [edi+8], bx
1406 mov byte ptr [edi+10], dl
1407 add esi,eax
1408 add edi,ecx
1409 FLD QWORD PTR [ESI]
1410 mov bx, word ptr [esi+8]
1411 mov dl, byte ptr [esi+10]
1412 FSTP QWORD PTR [EDI]
1413 mov word ptr [edi+8], bx
1414 mov byte ptr [edi+10], dl
1415 add esi,eax
1416 add edi,ecx
1417 pop edx
1418 sub edx ,4
1419 jnz Rec10
1420 jmp BltRecEnd
1421
1422Rec11_0123:
1423 cmp edx,2
1424 jz Rec11_2
1425 jb Rec11_01
1426;3 lines left
1427 FLD QWORD PTR [ESI]
1428 mov bx, word ptr [esi+8]
1429 mov dl, byte ptr [esi+10]
1430 FSTP QWORD PTR [EDI]
1431 mov word ptr [edi+8], bx
1432 mov byte ptr [edi+10], dl
1433 add esi,eax
1434 add edi,ecx
1435 FLD QWORD PTR [ESI]
1436 mov bx, word ptr [esi+8]
1437 mov dl, byte ptr [esi+10]
1438 FSTP QWORD PTR [EDI]
1439 mov word ptr [edi+8], bx
1440 mov byte ptr [edi+10], dl
1441 add esi,eax
1442 add edi,ecx
1443 FLD QWORD PTR [ESI]
1444 mov bx, word ptr [esi+8]
1445 mov dl, byte ptr [esi+10]
1446 FSTP QWORD PTR [EDI]
1447 mov word ptr [edi+8], bx
1448 mov byte ptr [edi+10], dl
1449 jmp BltRecEnd
1450Rec11_2:
1451 FLD QWORD PTR [ESI]
1452 mov bx, word ptr [esi+8]
1453 mov dl, byte ptr [esi+10]
1454 FSTP QWORD PTR [EDI]
1455 mov word ptr [edi+8], bx
1456 mov byte ptr [edi+10], dl
1457 add esi,eax
1458 add edi,ecx
1459 FLD QWORD PTR [ESI]
1460 mov bx, word ptr [esi+8]
1461 mov dl, byte ptr [esi+10]
1462 FSTP QWORD PTR [EDI]
1463 mov word ptr [edi+8], bx
1464 mov byte ptr [edi+10], dl
1465 jmp BltRecEnd
1466Rec11_01:
1467 test edx,edx
1468 jz BltRecEnd
1469 FLD QWORD PTR [ESI]
1470 mov bx, word ptr [esi+8]
1471 mov dl, byte ptr [esi+10]
1472 FSTP QWORD PTR [EDI]
1473 mov word ptr [edi+8], bx
1474 mov byte ptr [edi+10], dl
1475 jmp BltRecEnd
1476
1477; 12 Pixel Wide
1478
1479Rec12:
1480 cmp edx,4
1481 jb Rec12_0123
1482 FLD QWORD PTR [ESI]
1483 mov ebx, dword ptr [esi+8]
1484 FSTP QWORD PTR [EDI]
1485 mov dword ptr [edi+8], ebx
1486 add esi,eax
1487 add edi,ecx
1488 FLD QWORD PTR [ESI]
1489 mov ebx, dword ptr [esi+8]
1490 FSTP QWORD PTR [EDI]
1491 mov dword ptr [edi+8], ebx
1492 add esi,eax
1493 add edi,ecx
1494 FLD QWORD PTR [ESI]
1495 mov ebx, dword ptr [esi+8]
1496 FSTP QWORD PTR [EDI]
1497 mov dword ptr [edi+8], ebx
1498 add esi,eax
1499 add edi,ecx
1500 FLD QWORD PTR [ESI]
1501 mov ebx, dword ptr [esi+8]
1502 FSTP QWORD PTR [EDI]
1503 mov dword ptr [edi+8], ebx
1504 add esi,eax
1505 add edi,ecx
1506 sub edx ,4
1507 jnz Rec12
1508 jmp BltRecEnd
1509
1510Rec12_0123:
1511 cmp edx,2
1512 jz Rec12_2
1513 jb Rec12_01
1514;3 lines left
1515 FLD QWORD PTR [ESI]
1516 mov ebx, dword ptr [esi+8]
1517 FSTP QWORD PTR [EDI]
1518 mov dword ptr [edi+8], ebx
1519 add esi,eax
1520 add edi,ecx
1521 FLD QWORD PTR [ESI]
1522 mov ebx, dword ptr [esi+8]
1523 FSTP QWORD PTR [EDI]
1524 mov dword ptr [edi+8], ebx
1525 add esi,eax
1526 add edi,ecx
1527 FLD QWORD PTR [ESI]
1528 mov ebx, dword ptr [esi+8]
1529 FSTP QWORD PTR [EDI]
1530 mov dword ptr [edi+8], ebx
1531 jmp BltRecEnd
1532Rec12_2:
1533 FLD QWORD PTR [ESI]
1534 mov ebx, dword ptr [esi+8]
1535 FSTP QWORD PTR [EDI]
1536 mov dword ptr [edi+8], ebx
1537 add esi,eax
1538 add edi,ecx
1539 FLD QWORD PTR [ESI]
1540 mov ebx, dword ptr [esi+8]
1541 FSTP QWORD PTR [EDI]
1542 mov dword ptr [edi+8], ebx
1543 jmp BltRecEnd
1544Rec12_01:
1545 test edx,edx
1546 jz BltRecEnd
1547 FLD QWORD PTR [ESI]
1548 mov ebx, dword ptr [esi+8]
1549 FSTP QWORD PTR [EDI]
1550 mov dword ptr [edi+8], ebx
1551 jmp BltRecEnd
1552
1553; 13 Pixel Wide
1554
1555Rec13:
1556 cmp edx,4
1557 jb Rec13_0123
1558 push edx
1559 FLD QWORD PTR [ESI]
1560 mov ebx, dword ptr [esi+8]
1561 mov dl, byte ptr [esi+12]
1562 FSTP QWORD PTR [EDI]
1563 mov dword ptr [edi+8], ebx
1564 mov byte ptr [edi+12], dl
1565 add esi,eax
1566 add edi,ecx
1567 FLD QWORD PTR [ESI]
1568 mov ebx, dword ptr [esi+8]
1569 mov dl, byte ptr [esi+12]
1570 FSTP QWORD PTR [EDI]
1571 mov dword ptr [edi+8], ebx
1572 mov byte ptr [edi+12], dl
1573 add esi,eax
1574 add edi,ecx
1575 FLD QWORD PTR [ESI]
1576 mov ebx, dword ptr [esi+8]
1577 mov dl, byte ptr [esi+12]
1578 FSTP QWORD PTR [EDI]
1579 mov dword ptr [edi+8], ebx
1580 mov byte ptr [edi+12], dl
1581 add esi,eax
1582 add edi,ecx
1583 FLD QWORD PTR [ESI]
1584 mov ebx, dword ptr [esi+8]
1585 mov dl, byte ptr [esi+12]
1586 FSTP QWORD PTR [EDI]
1587 mov dword ptr [edi+8], ebx
1588 mov byte ptr [edi+12], dl
1589 add esi,eax
1590 add edi,ecx
1591 pop edx
1592 sub edx ,4
1593 jnz Rec13
1594 jmp BltRecEnd
1595
1596Rec13_0123:
1597 cmp edx,2
1598 jz Rec13_2
1599 jb Rec13_01
1600;3 lines left
1601 FLD QWORD PTR [ESI]
1602 mov ebx, dword ptr [esi+8]
1603 mov dl, byte ptr [esi+12]
1604 FSTP QWORD PTR [EDI]
1605 mov dword ptr [edi+8], ebx
1606 mov byte ptr [edi+12], dl
1607 add esi,eax
1608 add edi,ecx
1609 FLD QWORD PTR [ESI]
1610 mov ebx, dword ptr [esi+8]
1611 mov dl, byte ptr [esi+12]
1612 FSTP QWORD PTR [EDI]
1613 mov dword ptr [edi+8], ebx
1614 mov byte ptr [edi+12], dl
1615 add esi,eax
1616 add edi,ecx
1617 FLD QWORD PTR [ESI]
1618 mov ebx, dword ptr [esi+8]
1619 mov dl, byte ptr [esi+12]
1620 FSTP QWORD PTR [EDI]
1621 mov dword ptr [edi+8], ebx
1622 mov byte ptr [edi+12], dl
1623 jmp BltRecEnd
1624Rec13_2:
1625 FLD QWORD PTR [ESI]
1626 mov ebx, dword ptr [esi+8]
1627 mov dl, byte ptr [esi+12]
1628 FSTP QWORD PTR [EDI]
1629 mov dword ptr [edi+8], ebx
1630 mov byte ptr [edi+12], dl
1631 add esi,eax
1632 add edi,ecx
1633 FLD QWORD PTR [ESI]
1634 mov ebx, dword ptr [esi+8]
1635 mov dl, byte ptr [esi+12]
1636 FSTP QWORD PTR [EDI]
1637 mov dword ptr [edi+8], ebx
1638 mov byte ptr [edi+12], dl
1639 jmp BltRecEnd
1640Rec13_01:
1641 test edx,edx
1642 jz BltRecEnd
1643 FLD QWORD PTR [ESI]
1644 mov ebx, dword ptr [esi+8]
1645 mov dl, byte ptr [esi+12]
1646 FSTP QWORD PTR [EDI]
1647 mov dword ptr [edi+8], ebx
1648 mov byte ptr [edi+12], dl
1649 jmp BltRecEnd
1650
1651; 14 Pixel Wide
1652
1653Rec14:
1654 cmp edx,4
1655 jb Rec14_0123
1656 push edx
1657 FLD QWORD PTR [ESI]
1658 mov ebx, dword ptr [esi+8]
1659 mov dx, word ptr [esi+12]
1660 FSTP QWORD PTR [EDI]
1661 mov dword ptr [edi+8], ebx
1662 mov word ptr [edi+12], dx
1663 add esi,eax
1664 add edi,ecx
1665 FLD QWORD PTR [ESI]
1666 mov ebx, dword ptr [esi+8]
1667 mov dx, word ptr [esi+12]
1668 FSTP QWORD PTR [EDI]
1669 mov dword ptr [edi+8], ebx
1670 mov word ptr [edi+12], dx
1671 add esi,eax
1672 add edi,ecx
1673 FLD QWORD PTR [ESI]
1674 mov ebx, dword ptr [esi+8]
1675 mov dx, word ptr [esi+12]
1676 FSTP QWORD PTR [EDI]
1677 mov dword ptr [edi+8], ebx
1678 mov word ptr [edi+12], dx
1679 add esi,eax
1680 add edi,ecx
1681 FLD QWORD PTR [ESI]
1682 mov ebx, dword ptr [esi+8]
1683 mov dx, word ptr [esi+12]
1684 FSTP QWORD PTR [EDI]
1685 mov dword ptr [edi+8], ebx
1686 mov word ptr [edi+12], dx
1687 add esi,eax
1688 add edi,ecx
1689 pop edx
1690 sub edx ,4
1691 jnz Rec14
1692 jmp BltRecEnd
1693
1694Rec14_0123:
1695 cmp edx,2
1696 jz Rec14_2
1697 jb Rec14_01
1698;3 lines left
1699 FLD QWORD PTR [ESI]
1700 mov ebx, dword ptr [esi+8]
1701 mov dx, word ptr [esi+12]
1702 FSTP QWORD PTR [EDI]
1703 mov dword ptr [edi+8], ebx
1704 mov word ptr [edi+12], dx
1705 add esi,eax
1706 add edi,ecx
1707 FLD QWORD PTR [ESI]
1708 mov ebx, dword ptr [esi+8]
1709 mov dx, word ptr [esi+12]
1710 FSTP QWORD PTR [EDI]
1711 mov dword ptr [edi+8], ebx
1712 mov word ptr [edi+12], dx
1713 add esi,eax
1714 add edi,ecx
1715 FLD QWORD PTR [ESI]
1716 mov ebx, dword ptr [esi+8]
1717 mov dx, word ptr [esi+12]
1718 FSTP QWORD PTR [EDI]
1719 mov dword ptr [edi+8], ebx
1720 mov word ptr [edi+12], dx
1721 jmp BltRecEnd
1722Rec14_2:
1723 FLD QWORD PTR [ESI]
1724 mov ebx, dword ptr [esi+8]
1725 mov dx, word ptr [esi+12]
1726 FSTP QWORD PTR [EDI]
1727 mov dword ptr [edi+8], ebx
1728 mov word ptr [edi+12], dx
1729 add esi,eax
1730 add edi,ecx
1731 FLD QWORD PTR [ESI]
1732 mov ebx, dword ptr [esi+8]
1733 mov dx, word ptr [esi+12]
1734 FSTP QWORD PTR [EDI]
1735 mov dword ptr [edi+8], ebx
1736 mov word ptr [edi+12], dx
1737 jmp BltRecEnd
1738Rec14_01:
1739 test edx,edx
1740 jz BltRecEnd
1741 FLD QWORD PTR [ESI]
1742 mov ebx, dword ptr [esi+8]
1743 mov dx, word ptr [esi+12]
1744 FSTP QWORD PTR [EDI]
1745 mov dword ptr [edi+8], ebx
1746 mov word ptr [edi+12], dx
1747 jmp BltRecEnd
1748
1749; 15 Pixel Wide
1750
1751Rec15:
1752 cmp edx,4
1753 jb Rec15_0123
1754 push edx
1755 FLD QWORD PTR [ESI]
1756 mov ebx, dword ptr [esi+8]
1757 mov dx, word ptr [esi+12]
1758 FSTP QWORD PTR [EDI]
1759 mov dword ptr [edi+8], ebx
1760 mov bl, byte ptr[esi+14]
1761 mov word ptr [edi+12], dx
1762 add esi,eax
1763 mov byte ptr[edi+14], bl
1764 add edi,ecx
1765 FLD QWORD PTR [ESI]
1766 mov ebx, dword ptr [esi+8]
1767 mov dx, word ptr [esi+12]
1768 FSTP QWORD PTR [EDI]
1769 mov dword ptr [edi+8], ebx
1770 mov bl, byte ptr[esi+14]
1771 mov word ptr [edi+12], dx
1772 add esi,eax
1773 mov byte ptr[edi+14], bl
1774 add edi,ecx
1775 FLD QWORD PTR [ESI]
1776 mov ebx, dword ptr [esi+8]
1777 mov dx, word ptr [esi+12]
1778 FSTP QWORD PTR [EDI]
1779 mov dword ptr [edi+8], ebx
1780 mov bl, byte ptr[esi+14]
1781 mov word ptr [edi+12], dx
1782 add esi,eax
1783 mov byte ptr[edi+14], bl
1784 add edi,ecx
1785 FLD QWORD PTR [ESI]
1786 mov ebx, dword ptr [esi+8]
1787 mov dx, word ptr [esi+12]
1788 FSTP QWORD PTR [EDI]
1789 mov dword ptr [edi+8], ebx
1790 mov bl, byte ptr[esi+14]
1791 mov word ptr [edi+12], dx
1792 add esi,eax
1793 mov byte ptr[edi+14], bl
1794 add edi,ecx
1795 pop edx
1796 sub edx ,4
1797 jnz Rec15
1798 jmp BltRecEnd
1799
1800Rec15_0123:
1801 cmp edx,2
1802 jz Rec15_2
1803 jb Rec15_01
1804;3 lines left
1805 FLD QWORD PTR [ESI]
1806 mov ebx, dword ptr [esi+8]
1807 mov dx, word ptr [esi+12]
1808 FSTP QWORD PTR [EDI]
1809 mov dword ptr [edi+8], ebx
1810 mov bl, byte ptr[esi+14]
1811 mov word ptr [edi+12], dx
1812 add esi,eax
1813 mov byte ptr[edi+14], bl
1814 add edi,ecx
1815 FLD QWORD PTR [ESI]
1816 mov ebx, dword ptr [esi+8]
1817 mov dx, word ptr [esi+12]
1818 FSTP QWORD PTR [EDI]
1819 mov dword ptr [edi+8], ebx
1820 mov bl, byte ptr[esi+14]
1821 mov word ptr [edi+12], dx
1822 add esi,eax
1823 mov byte ptr[edi+14], bl
1824 add edi,ecx
1825 FLD QWORD PTR [ESI]
1826 mov ebx, dword ptr [esi+8]
1827 mov dx, word ptr [esi+12]
1828 FSTP QWORD PTR [EDI]
1829 mov dword ptr [edi+8], ebx
1830 mov bl, byte ptr[esi+14]
1831 mov word ptr [edi+12], dx
1832 mov byte ptr[edi+14], bl
1833 jmp BltRecEnd
1834Rec15_2:
1835 FLD QWORD PTR [ESI]
1836 mov ebx, dword ptr [esi+8]
1837 mov dx, word ptr [esi+12]
1838 FSTP QWORD PTR [EDI]
1839 mov dword ptr [edi+8], ebx
1840 mov bl, byte ptr[esi+14]
1841 mov word ptr [edi+12], dx
1842 add esi,eax
1843 mov byte ptr[edi+14], bl
1844 add edi,ecx
1845 FLD QWORD PTR [ESI]
1846 mov ebx, dword ptr [esi+8]
1847 mov dx, word ptr [esi+12]
1848 FSTP QWORD PTR [EDI]
1849 mov dword ptr [edi+8], ebx
1850 mov bl, byte ptr[esi+14]
1851 mov word ptr [edi+12], dx
1852 mov byte ptr[edi+14], bl
1853 jmp BltRecEnd
1854Rec15_01:
1855 test edx,edx
1856 jz BltRecEnd
1857 FLD QWORD PTR [ESI]
1858 mov ebx, dword ptr [esi+8]
1859 mov dx, word ptr [esi+12]
1860 FSTP QWORD PTR [EDI]
1861 mov dword ptr [edi+8], ebx
1862 mov bl, byte ptr[esi+14]
1863 mov word ptr [edi+12], dx
1864 mov byte ptr[edi+14], bl
1865 jmp BltRecEnd
1866
1867
1868ComplexBlt:
1869 ; Blit first the even rect then the rest
1870
1871 push dword ptr [ebp+28] ; ulSrcPitch
1872 push dword ptr [ebp+24] ; ulDestPitch
1873 push edx
1874 shl ecx,4
1875 push ecx
1876 push esi
1877 push edi
1878 call _BltRec
1879 sub esp,24
1880 add esi,ecx
1881 add edi,ecx
1882 push dword ptr [ebp+28] ; ulSrcPitch
1883 push dword ptr [ebp+24] ; ulDestPitch
1884 push edx
1885 push ebx
1886 push esi
1887 push edi
1888 call _BltRec
1889 sub esp,24
1890
1891BltRecEnd:
1892 pop edx
1893 pop ecx
1894 pop ebx
1895 pop eax
1896 pop esi
1897 pop edi
1898 pop ebp
1899 ret
1900_BltRec ENDP
1901
1902
1903 PUBLIC _CPUHasMMX
1904;
1905; int __cdecl CPUHasMMX()
1906; returns:
1907; 0 = NoMMX
1908; 1 = MMX
1909; 2 = MMX+CMov instuction
1910
1911_CPUHasMMX PROC NEAR
1912 push ebp
1913 mov ebp, esp
1914 push edi
1915 push esi
1916 push ebx
1917 push ecx
1918 push edx
1919
1920 pushfd
1921 pop eax
1922 mov ebx ,eax
1923 xor eax, 00200000h
1924 push eax
1925 popfd
1926 pushfd
1927 pop eax
1928 sub eax,ebx
1929 jz Return ; No CPUID => No MMX => return 0 in eax;
1930 mov eax, 1
1931 CPUID
1932 test edx,00800000h ; MMX Bit Set ?
1933 jz Return
1934 mov eax, 1
1935 test edx,00008000h ; Conditonal Mov Bit Set ?
1936 jz Return
1937 inc eax
1938Return:
1939; mov eax, 0 ; pretend no MMX is available
1940 pop edx
1941 pop ecx
1942 pop ebx
1943 pop esi
1944 pop edi
1945 pop ebp
1946 ret
1947_CPUHasMMX ENDP
1948
1949 PUBLIC _MemFlip
1950
1951;
1952; memcpy via FLD / FSTP MMX might even be faster but
1953; not present on every system
1954; to maximize the speed we copy 64 bytes in each loop
1955; and after the loop the rest left
1956;
1957;
1958;void __cdecl MemFlip(PBYTE dest, PBYTE src, ULONG Size);
1959
1960_MemFlip PROC NEAR
1961 push ebp
1962 mov ebp, esp
1963 push edi
1964 push esi
1965 push eax
1966 push ebx
1967 push ecx
1968
1969 mov eax , dword ptr [ebp+16] ; Size of Buffer
1970 mov edi , dword ptr [ebp+8] ; Destination
1971 mov ebx , eax
1972 mov esi , dword ptr [ebp+12] ; SourcePointer
1973
1974 and ebx , 0000003Fh ; Calc leftover bytes
1975 shr eax , 5 ; Calc Loops
1976
1977 jz COPYREMAIN ; Less then 64 to copy
1978ALIGN 4
1979
1980Loop64:
1981 FLD QWORD PTR [ESI] ; 1
1982 FLD QWORD PTR [ESI+8] ; 2
1983 FXCH ; Doesn't take any clocks
1984 FSTP QWORD PTR [EDI] ; 3,4 Clocks
1985 FSTP QWORD PTR [EDI+8] ; 5,6
1986 ADD ESI,16 ; 7 U Integer instruction can be executed parallel
1987 ADD EDI,16 ; 7 V Total clocks for copying 16 byte 7 clocks Rep Movs needs 20! for each 16 byte + 13 setup
1988 FLD QWORD PTR [ESI]
1989 FLD QWORD PTR [ESI+8]
1990 FXCH
1991 FSTP QWORD PTR [EDI]
1992 FSTP QWORD PTR [EDI+8]
1993 ADD ESI,16
1994 ADD EDI,16
1995 FLD QWORD PTR [ESI]
1996 FLD QWORD PTR [ESI+8]
1997 FXCH
1998 FSTP QWORD PTR [EDI]
1999 FSTP QWORD PTR [EDI+8]
2000 ADD ESI,16
2001 ADD EDI,16
2002 FLD QWORD PTR [ESI]
2003 FLD QWORD PTR [ESI+8]
2004 FXCH
2005 FSTP QWORD PTR [EDI]
2006 FSTP QWORD PTR [EDI+8]
2007 ADD ESI,16
2008 ADD EDI,16
2009 inc eax
2010 jnz Loop64
2011
2012COPYREMAIN:
2013 test ebx, ebx ; something left ?
2014 jz EndOffFlip
2015
2016 test ebx, 00000020h; at least 32 bytes left ?
2017 jz Test16
2018
2019 FLD QWORD PTR [ESI]
2020 FLD QWORD PTR [ESI+8]
2021 FXCH
2022 FSTP QWORD PTR [EDI]
2023 FSTP QWORD PTR [EDI+8]
2024 ADD ESI,16
2025 ADD EDI,16
2026 FLD QWORD PTR [ESI]
2027 FLD QWORD PTR [ESI+8]
2028 FXCH
2029 FSTP QWORD PTR [EDI]
2030 FSTP QWORD PTR [EDI+8]
2031 ADD ESI,16
2032 ADD EDI,16
2033 sub ebx, 00000020h
2034 jz EndOffFlip
2035
2036Test16:
2037
2038 test ebx, 00000010h; at least 16 bytes left ?
2039 jb Test8
2040
2041 FLD QWORD PTR [ESI]
2042 FLD QWORD PTR [ESI+8]
2043 FXCH
2044 FSTP QWORD PTR [EDI]
2045 FSTP QWORD PTR [EDI+8]
2046 ADD ESI,16
2047 ADD EDI,16
2048 sub ebx, 00000010h
2049 jz EndOffFlip
2050Test8:
2051
2052 test ebx, 00000008h; at least 8 bytes left ?
2053 jb Test4
2054 mov eax,[esi]
2055 mov ecx,[esi+4]
2056 mov [edi],eax
2057 mov [edi+4],ecx
2058 add esi, 8
2059 add edi, 8
2060 sub ebx, 8
2061 jz EndOffFlip
2062
2063Test4:
2064 test ebx, 00000004h; at least 4 bytes left ?
2065 jb Test2
2066 mov eax,[esi]
2067 sub ebx, 4
2068 mov [edi],eax
2069 add esi, 4
2070 add edi, 4
2071 test ebx, ebx ; something left ?
2072 jz EndOffFlip
2073
2074Test2:
2075 test ebx, 00000002h
2076 jb Copy1
2077 mov ax,[esi]
2078 sub ebx,2
2079 mov [edi],ax
2080 add esi,2
2081 add edi,2
2082 test ebx,ebx
2083 jz EndOffFlip
2084
2085Copy1:
2086 mov al,[esi]
2087 mov [edi],al
2088
2089EndOffFlip:
2090 pop ecx
2091 pop ebx
2092 pop eax
2093 pop esi
2094 pop edi
2095 pop ebp
2096 ret
2097_MemFlip ENDP
2098
2099CODE32 ENDS
2100
2101 END
Note: See TracBrowser for help on using the repository browser.