source: trunk/src/ddraw/asmutil.asm@ 211

Last change on this file since 211 was 211, checked in by hugh, 26 years ago

Include for DX6 verion of ddraw

File size: 39.9 KB
Line 
1; asmutil.asm Color key bit blitting for DirectDraw
2;
3; Copyright 1998 Sander van Leeuwen
4; 1999 Markus Montkowski
5
6 NAME asmutil
7.586p
8.MMX
9
10CODE32 SEGMENT DWORD PUBLIC USE32 'CODE'
11 ASSUME CS:FLAT ,DS:FLAT,SS:FLAT
12
13 PUBLIC _BlitColorKey8
14
15; endpos = destbuf + blitlinesize;
16; while(destbuf < endpos) {
17; if(*srcbuf == colorkey) {
18; destbuf++;
19; }
20; else *destbuf++ = *srcbuf;
21; srcbuf++;
22; }
23; destbuf += (destscanlinesize-blitlinesize);
24; srcbuf += (srcscanlinesize-blitlinesize);
25;void BlitColorKey8(char *dest, char *src, ULONG key, ULONG linesize)
26_BlitColorKey8 PROC NEAR
27 push ebp
28 mov ebp, esp
29 push edi
30 push esi
31 push eax
32 push ebx
33 push ecx
34 push edx
35
36 mov edi, dword ptr [ebp+8] ;dest
37 mov esi, dword ptr [ebp+12] ;src
38 mov ecx, dword ptr [ebp+20] ;linesize
39 mov edx, dword ptr [ebp+16] ;colorkey
40
41 and ecx, 3
42 mov dh , dl
43 push ecx ;do the remaining bytes afterwards
44 mov eax, edx
45 shl edx, 16
46 and eax, 0000FFFFh
47 mov ecx, dword ptr [ebp+20] ;linesize
48 or edx, eax ; edx now contains the colorkey in each byte
49 shr ecx, 2 ;linesize in dwords
50 jz blitremain ; less then 4 bytes
51 jmp blitStart
52blitloop:
53 add esi, 4
54 add edi, 4
55blitStart:
56 mov ebx, dword ptr [esi]
57 mov eax, dword ptr [edi]
58 cmp ebx, edx ; All 4 bytes transparent?
59 jz TTTT
60 cmp bx, dx ; lower 2 bytes transparent ?
61 jz XXTT
62 cmp bl, dl ; lower byte trans
63 jz XXOT
64 mov al, bl
65 cmp bh, dh ; upper Byte transparent then skip copy
66 jz XXTT
67XXOT:
68 mov ah, bh
69XXTT: ; handle upper 2 pixel
70 ror eax, 16
71 ror ebx, 16
72 cmp bx,dx
73 jz skipbyte4
74 cmp bl, dl
75 je skipbyte3
76 mov al, bl
77skipbyte3:
78 cmp bh, dl
79 je skipbyte4
80 mov ah, bh
81skipbyte4:
82 ror eax, 16
83 mov dword ptr [edi], eax
84TTTT:
85 dec ecx
86 jz blitloop
87blitremain:
88 pop ecx
89 cmp ecx, 2
90 ja blit3
91 jz blit2
92 test ecx,ecx
93 jz endofblit
94 mov eax, dword ptr[esi]
95 mov ebx, dword ptr [edi]
96 rol eax, 8
97 rol ebx, 8
98 cmp al,dl
99 jz endofblit
100 mov bl,al
101 ror ebx, 8
102 mov dword ptr[edi],ebx
103 jmp endofblit
104
105blit3:
106 mov eax, dword ptr[esi]
107 mov ebx, dword ptr [edi]
108 ror eax, 16
109 ror ebx, 16
110 cmp ax, dx
111 jz TTX
112 cmp ah,dh
113 jz TXX
114 mov bh,ah
115TXX:
116 cmp al,dl
117 jz TTX
118 mov bl,al
119TTX:
120 ror eax, 16
121 ror ebx, 16
122 cmp ah,dh
123 jz Cpyback
124 mov dh,ah
125Cpyback:
126 mov dword ptr [edi], ebx
127 jmp endofblit
128
129blit2:
130 mov eax, dword ptr[esi]
131 mov ebx, dword ptr [edi]
132 ror eax, 16
133 ror ebx, 16
134 cmp ax, dx ; both bytes transparent ?
135 jz endofblit
136 cmp ah,dh
137 jz TX
138 mov bh,ah
139TX:
140 cmp al,dl
141 jz OT
142 mov bl,al
143OT:
144 ror ebx, 16
145 mov dword ptr[edi], ebx
146
147endofblit:
148 pop edx
149 pop ecx
150 pop ebx
151 pop eax
152 pop esi
153 pop edi
154 pop ebp
155 ret
156_BlitColorKey8 ENDP
157
158 PUBLIC _BlitColorKey16
159
160; endpos = destbuf + blitlinesize;
161; while(destbuf < endpos) {
162; if(*srcbuf == colorkey) {
163; destbuf++;
164; }
165; else *destbuf++ = *srcbuf;
166; srcbuf++;
167; }
168; destbuf += (destscanlinesize-blitlinesize);
169; srcbuf += (srcscanlinesize-blitlinesize);
170;void BlitColorKey16(char *dest, char *src, ULONG key, ULONG linesize)
171_BlitColorKey16 PROC NEAR
172 push ebp
173 mov ebp, esp
174 push edi
175 push esi
176 push eax
177 push ebx
178 push ecx
179 push edx
180
181 mov edi, dword ptr [ebp+8] ;dest
182 mov esi, dword ptr [ebp+12] ;src
183 mov ecx, dword ptr [ebp+20] ;linesize
184 mov edx, dword ptr [ebp+16] ;colorkey
185
186 mov eax, edx
187 shl edx, 16;
188 and eax, 0000FFFFh
189 or edx,eax ; create dwColorKey
190 shr ecx, 1 ; linesize in dwords
191 jz OnePixel
192
193blitloop16:
194 mov eax, dword ptr [esi]
195 mov ebx, dword ptr [edi]
196 add esi , 4
197 cmp eax, edx ; are both pixel transparent?
198 je LoopUp ; Yes, then Jump to loopend
199 cmp ax,dx ; Is lower pixel transparent
200 je DrawOT ; Yes So We got OT (OPAQUE/Transparent
201 mov bx,ax ; No so copy the lower pixel
202 ror eax,16 ;
203 cmp ax,dx ; Is higher pixel transparent
204 je CopyBack ;
205DrawOT:
206 ror ebx,16
207 mov bx,ax
208 ror ebx,16
209CopyBack:
210 mov dword ptr[edi], ebx ; copy back the result in ebx
211LoopUp:
212 mov ebx, dword ptr [ebp+20] ; V load this this in case we are done
213 add edi , 4 ; U
214OnePixel:
215 dec ecx
216 jnz blitloop16
217 test ebx, 1 ; Do we have an odd linesize
218 jz endofblit16
219 mov eax, dword ptr [esi]
220 mov ebx, dword ptr [edi]
221 ror eax,16
222 ror ebx,16
223 cmp ax, dx
224 je endofblit16 ; last pixel is transparent
225 mov bx,ax ; No so copy the lower pixel
226 ror ebx,16
227 mov dword ptr[edi], ebx ; copy back the result in ebx
228
229endofblit16:
230 pop edx
231 pop ecx
232 pop ebx
233 pop eax
234 pop esi
235 pop edi
236 pop ebp
237 ret
238_BlitColorKey16 ENDP
239
240
241 PUBLIC _BlitColorKey8MMX
242; Now the same as BlitColorKey8 now with MMX
243;void BlitColorKey8MMX(char *dest, char *src, ULONG key, ULONG linesize)
244
245_BlitColorKey8MMX PROC NEAR
246 push ebp
247 mov ebp, esp
248 push edi
249 push esi
250 push eax
251 push ebx
252 push ecx
253 push edx
254
255 mov edx, [ebp+16] ;colorkey
256 mov edi, [ebp+8] ;dest
257 mov dh,dl
258 mov esi, [ebp+12] ;src
259 mov ax,dx
260 mov ecx, dword ptr [ebp+20] ;linesize
261 shr edx,16
262 mov dx,ax
263 movd mm4,edx
264 movd mm5,edx
265 psllq mm4,32
266 por mm4,mm5
267 shr ecx,3
268 jz BltRemain8
269
270bltLoopMMX8:
271 movq mm0, [esi] ; get source qword
272 movq mm1, [edi] ; get dest qword
273 movq mm2,mm0 ; copy source
274 PCMPEQB mm0,mm4 ; create mask
275 pand mm1,mm0 ; mask dest
276 pandn mm2,mm0 ; mask source
277 por mm1,mm2 ; or them
278 movq qword ptr [edi], mm1 ; write back result
279 add esi, 8
280 add edi, 8
281 dec ecx
282 jnz bltLoopMMX8
283BltRemain8:
284 mov eax, dword ptr [ebp+20];
285 and eax, 7
286 jmp ds:JmpTable[eax*4]
287
288align 4
289
290JmpTable:
291 dd offset bltEndMMX8
292 dd offset blt1MMX8
293 dd offset blt2MMX8
294 dd offset blt3MMX8
295 dd offset blt4MMX8
296 dd offset blt5MMX8
297 dd offset blt6MMX8
298 dd offset blt7MMX8
299align 2
300;
301; Maybe it would be faster for 7-5 to load a qword into mm0/mm1
302; but we might cross a page and so I guess this is saver
303;
304blt7MMX8:
305 movd mm0, dword ptr[esi]
306 mov ax, word ptr[esi+4]
307 mov bx, word ptr[edi+4]
308 movd mm1, dword ptr[edi]
309 psllq mm0,32
310 shl eax,8
311 shl ebx,8
312 mov al, byte ptr[esi+6]
313 mov bl, byte ptr[edi+6]
314 movd mm0,eax
315 psllq mm1,32
316 movd mm1,ebx
317 movq mm2,mm0 ; copy source
318 PCMPEQB mm0,mm4 ; create mask
319 pand mm1,mm0 ; mask dest
320 pandn mm2,mm0 ; mask source
321 por mm1,mm2 ; or them
322 movd eax, mm1
323 psrlq mm1,32
324 mov byte ptr[edi+6], al
325 movd dword ptr[edi], mm1
326 shr eax,8
327 mov word ptr[edi+4],ax
328 jmp bltEndMMX8
329
330blt6MMX8:
331 movd mm0, dword ptr[esi]
332 mov ax, word ptr[esi+4]
333 mov bx, word ptr[edi+4]
334 movd mm1, dword ptr[edi]
335 psllq mm0,32
336 psllq mm1,32
337 movd mm0,eax
338 movd mm1,ebx
339 movq mm2,mm0 ; copy source
340 pcmpeqb mm0,mm4 ; create mask
341 pand mm1,mm0 ; mask dest
342 pandn mm2,mm0 ; mask source
343 por mm1,mm2 ; or them
344 movd eax, mm1
345 psrlq mm1,32
346 mov word ptr[edi+4],ax
347 movd dword ptr[edi], mm1
348 jmp bltEndMMX8
349
350blt5MMX8:
351 movd mm0, dword ptr[esi]
352 movd mm1, dword ptr[edi]
353 movq mm2,mm0 ; copy source
354 pcmpeqb mm0,mm4 ; create mask
355 pand mm1,mm0 ; mask dest
356 add esi, 4;
357 pandn mm2,mm0 ; mask source
358 por mm1,mm2 ; or them
359 movd dword ptr[edi], mm1
360 add edi,4
361 jmp blt1MMX8
362
363blt4MMX8:
364 movd mm0, dword ptr[esi]
365 movd mm1, dword ptr[edi]
366 movq mm2,mm0 ; copy source
367 pcmpeqb mm0,mm4 ; create mask
368 pand mm1,mm0 ; mask dest
369 pandn mm2,mm0 ; mask source
370 por mm1,mm2 ; or them
371 movd dword ptr [edi], mm1 ; write back result
372 jmp bltEndMMX8
373;
374; loading a dword into mm0/mm1 might be faster for 3-2...
375;
376blt3MMX8:
377 mov ax , word ptr [esi]
378 mov bx , word ptr [edi]
379 shl eax,8 ; 3 Pixel left to blit
380 shl ebx,8 ; so shift the buffers
381 mov al,byte ptr[esi+2]
382 mov bl,byte ptr[edi+2]
383 movd mm0,eax
384 movd mm1,ebx
385 movq mm2,mm0
386 pcmpeqb mm0,mm4 ; create mask
387 pand mm1,mm0 ; mask dest
388 pandn mm2,mm0 ; mask source
389 por mm1,mm2 ; or them
390 movd eax, mm1 ; write back result
391 mov byte ptr[edi+2],al
392 shr eax,8
393 mov word ptr[edi],ax
394 jmp bltEndMMX8
395
396blt2MMX8:
397 mov al, byte ptr [esi]
398 cmp al,dl
399 je blt1aMMX8
400 mov byte ptr [edi], al
401 mov bl , byte ptr [esi+1]
402 cmp bl,dl
403 je bltEndMMX8
404 mov byte ptr [edi+1], bl
405 jmp bltEndMMX8
406blt1aMMX8:
407 add esi,1
408 add edi,1
409blt1MMX8:
410 mov al, byte ptr [esi]
411 cmp al,dl
412 je bltEndMMX8
413 mov byte ptr [edi], al
414
415bltEndMMX8:
416 pop edx
417 pop ecx
418 pop ebx
419 pop eax
420 pop esi
421 pop edi
422 pop ebp
423 ret
424_BlitColorKey8MMX ENDP
425
426
427 PUBLIC _BlitColorKey16MMX
428; Now the same as BlitColorKey16 now with MMX
429;void BlitColorKey16MMX(char *dest, char *src, ULONG key, ULONG linesize)
430_BlitColorKey16MMX PROC NEAR
431 push ebp
432 mov ebp, esp
433 push edi
434 push esi
435 push ecx
436 push edx
437
438 mov edx, dword ptr [ebp+16] ; colorkey
439 mov edi, dword ptr [ebp+8] ; dest
440 mov eax, dword ptr [ebp+16] ; colorkey
441 shr edx,16;
442 mov ecx, dword ptr [ebp+20] ; linesize in pixel!
443 mov dx,ax ; extend colorkey to 32 Bit
444 mov esi, dword ptr [ebp+12] ; src
445 mov eax, ecx ; copy of linesize
446 shr ecx,2
447 movd mm4, edx
448 jz BltRemain16
449
450 movd mm5,edx ; Extend colorkey to 64 Bit
451 psllq mm4,32
452 por mm4,mm5
453
454bltLoopMMX16:
455 movq mm0,qword ptr [esi] ; get source dword
456 movq mm1,qword ptr [edi] ; get destination
457 movq mm2,mm0 ; copy source
458 pcmpeqw mm0,mm4 ; create mask
459 pand mm1,mm0 ; mask dest
460 add esi, 8
461 pandn mm2,mm0 ; mask source
462 por mm1,mm2 ; or them
463 movq qword ptr [edi], mm1 ; write back result
464 add edi, 8
465 dec ecx
466 jnz bltLoopMMX16
467
468BltRemain16:
469 and eax,3
470 jmp ds:JumpTable[eax*4]
471
472align 4
473
474JumpTable:
475 dd offset bltEndMMX16
476 dd offset blt1MMX16
477 dd offset blt2MMX16
478 dd offset blt3MMX16
479align 2
480
481blt3MMX16:
482 movd mm0, dword ptr[esi]
483 movd mm1, dword ptr[edi]
484 movq mm2,mm0 ; copy source
485 add esi,4
486 pcmpeqw mm0,mm4 ; create mask 16 bit
487 pand mm1,mm0 ; mask dest
488 pandn mm2,mm0 ; mask source
489 add edi,4
490 por mm1,mm2 ; or them
491 movd dword ptr[edi-4], mm1
492 jmp blt1MMX16
493
494blt2MMX16:
495 movd mm0, dword ptr[esi]
496 movd mm1, dword ptr[edi]
497 movq mm2,mm0 ; copy source
498 pcmpeqw mm0,mm4 ; create mask 16 bit
499 pand mm1,mm0 ; mask dest
500 pandn mm2,mm0 ; mask source
501 por mm1,mm2 ; or them
502 movd dword ptr [edi], mm1 ; write back result
503 jmp bltEndMMX16
504
505blt1MMX16:
506 mov ax, word ptr [esi] ; cmov ?
507 cmp ax,dx
508 je bltEndMMX16
509 mov word ptr [edi], ax
510
511
512bltEndMMX16:
513 pop edx
514 pop ecx
515 pop esi
516 pop edi
517 pop ebp
518 ret
519_BlitColorKey16MMX ENDP
520
521;
522; extern void __cdecl BltTransSrcRecMMX(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
523; ULONG ulDestPitch, ULONG ulSrcPitch, ULONG ulTransCol);
524
525 PUBLIC _BltTransSrcRecMMX
526_BltTransSrcRecMMX PROC NEAR
527 push ebp
528 mov ebp, esp
529 push edi
530 push esi
531 push eax
532 push ebx
533 push ecx
534 push edx
535
536EndTSBlt:
537 pop edx
538 pop ecx
539 pop ebx
540 pop eax
541 pop esi
542 pop edi
543 pop ebp
544 ret
545
546_BltTransSrcRecMMX ENDP
547
548
549 PUBLIC _BltRec
550;
551; extern void __cdecl BltRec(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
552; ULONG ulDestPitch, ULONG ulSrcPitch);
553_BltRec PROC NEAR
554 push ebp
555 mov ebp, esp
556 push edi
557 push esi
558 push eax
559 push ebx
560 push ecx
561 push edx
562
563 mov ecx, dword ptr [ebp+16] ; U ulBltWidth
564 mov esi, dword ptr [ebp+12] ; V src
565 mov ebx, ecx ; U
566 mov edx, dword ptr [ebp+20] ; V ulBltHeight
567 and ebx, 0Fh ; U ebx = # of bytes < 16
568 mov edi, dword ptr [ebp+8] ; V dest
569 cmp edx, 0
570 jz BltRecEnd ; height is zero so done
571 shr ecx, 4 ; U
572 jz SmallBlt ; Small (width < 16) rectangle done in special case
573 test ebx, ebx
574 jnz ComplexBlt ; ulBltWidth mod 16 is not 0
575
576;
577; Blitwidth is an multiple of 16
578;
579 mov ebx, dword ptr [ebp+24] ; ulDestPitch
580 mov eax, dword ptr [ebp+28] ; ulSrcPitch
581 sub ebx, dword ptr [ebp+16] ; adjust both widths
582 sub eax, dword ptr [ebp+16]
583 mov dword ptr [ebp+28], eax ; store adjusted SrcPitch
584 mov eax, ecx
585LineLoop:
586 FLD QWORD PTR [ESI]
587 FLD QWORD PTR [ESI+8]
588 FXCH
589 FSTP QWORD PTR [EDI]
590 FSTP QWORD PTR [EDI+8]
591 ADD ESI,16
592 ADD EDI,16
593 dec eax
594 jz LineLoop
595 dec edx
596 jz BltRecEnd
597 add ESI, dword ptr[ebp+28]
598 add EDI, ebx
599 mov eax, ecx
600 jmp LineLoop
601
602SmallBlt:
603 mov eax, dword ptr [ebp+28] ; ulSrcPitch
604 mov ecx, dword ptr [ebp+24] ; ulDestPitch
605 jmp ds:SmallJmpTable[ebx*4]
606SmallJmpTable:
607 dd offset BltRecEnd ; BlitWidth is 0 done
608 dd offset Rec1
609 dd offset Rec2
610 dd offset Rec3
611 dd offset Rec4
612 dd offset Rec5
613 dd offset Rec6
614 dd offset Rec7
615 dd offset Rec8
616 dd offset Rec9
617 dd offset Rec10
618 dd offset Rec11
619 dd offset Rec12
620 dd offset Rec13
621 dd offset Rec14
622 dd offset Rec15
623
624;One Pixel wide
625
626Rec1:
627 cmp edx,4
628 jb Rec1_0123
629 mov bl, byte ptr [esi]
630 add esi,eax
631 mov byte ptr [edi], bl
632 add edi,ecx
633 mov bl, byte ptr [esi]
634 add esi,eax
635 mov byte ptr [edi], bl
636 add edi,ecx
637 mov bl, byte ptr [esi]
638 add esi,eax
639 mov byte ptr [edi], bl
640 add edi,ecx
641 mov bl, byte ptr [esi]
642 add esi,eax
643 mov byte ptr [edi], bl
644 add edi,ecx
645 sub edx,4
646 jnz Rec1
647 jmp BltRecEnd
648Rec1_0123:
649 cmp edx,2
650 jz Rec1_2
651 jb Rec1_01
652; Must be 3 lines left
653 mov bl, byte ptr [esi]
654 add esi,eax
655 mov byte ptr [edi], bl
656 add edi,ecx
657 mov bl, byte ptr [esi]
658 add esi,eax
659 mov byte ptr [edi], bl
660 add edi,ecx
661 mov bl, byte ptr [esi]
662 mov byte ptr [edi], bl
663 jmp BltRecEnd
664Rec1_2:
665 mov bl, byte ptr [esi]
666 add esi,eax
667 mov byte ptr [edi], bl
668 add edi,ecx
669 mov bl, byte ptr [esi]
670 mov byte ptr [edi], bl
671 jmp BltRecEnd
672Rec1_01:
673 test edx,edx
674 jz BltRecEnd
675 mov bl, byte ptr [esi]
676 mov byte ptr [edi], bl
677 jmp BltRecEnd
678
679;2 Pixel Wide
680
681Rec2:
682 cmp edx,4
683 jb Rec2_0123
684 mov bx, word ptr [esi]
685 add esi,eax
686 mov word ptr [edi], bx
687 add edi,ecx
688 mov bx, word ptr [esi]
689 add esi,eax
690 mov word ptr [edi], bx
691 add edi,ecx
692 mov bx, word ptr [esi]
693 add esi,eax
694 mov word ptr [edi], bx
695 add edi,ecx
696 mov bx, word ptr [esi]
697 add esi,eax
698 mov word ptr [edi], bx
699 add edi,ecx
700 sub edx, 4
701 jnz Rec2
702 jmp BltRecEnd
703
704Rec2_0123:
705 cmp edx,2
706 jz Rec2_2
707 jb Rec2_01
708;3 lines left
709 mov bx, word ptr [esi]
710 add esi,eax
711 mov word ptr [edi], bx
712 add edi,ecx
713 mov bx, word ptr [esi]
714 add esi,eax
715 mov word ptr [edi], bx
716 add edi,ecx
717 mov bx, word ptr [esi]
718 mov word ptr [edi], bx
719 jmp BltRecEnd
720Rec2_2:
721 mov bx, word ptr [esi]
722 add esi,eax
723 mov word ptr [edi], bx
724 add edi,ecx
725 mov bx, word ptr [esi]
726 mov word ptr [edi], bx
727 jmp BltRecEnd
728Rec2_01:
729 test edx,edx
730 jz BltRecEnd
731 mov bx, word ptr [esi]
732 mov word ptr [edi], bx
733 jmp BltRecEnd
734
735; 3 Pixel Wide must check if it's better to read 4 bytes as
736; Intel might stall on reading 2 and 1 byte, but this takes more care as we
737; could create a pagefault on the last 3 pixel
738
739Rec3:
740 cmp edx,4
741 jb Rec3_0123
742 push edx
743 mov bx, word ptr [esi]
744 mov dl, byte ptr [esi+2]
745 add esi,eax
746 mov word ptr [edi], bx
747 mov byte ptr [edi+2], dl
748 add edi,ecx
749 mov bx, word ptr [esi]
750 mov dl, byte ptr [esi+2]
751 add esi,eax
752 mov word ptr [edi], bx
753 mov byte ptr [edi+2], dl
754 add edi,ecx
755 mov bx, word ptr [esi]
756 mov dl, byte ptr [esi+2]
757 add esi,eax
758 mov word ptr [edi], bx
759 mov byte ptr [edi+2], dl
760 add edi,ecx
761 mov bx, word ptr [esi]
762 mov dl, byte ptr [esi+2]
763 add esi,eax
764 mov word ptr [edi], bx
765 mov byte ptr [edi+2], dl
766 add edi,ecx
767 pop edx
768 sub edx,4
769 jnz Rec3
770 jmp BltRecEnd
771
772Rec3_0123:
773 cmp edx,2
774 jz Rec3_2
775 jb Rec3_01
776; Must be 3 lines left
777 mov bx, word ptr [esi]
778 mov dl, byte ptr [esi+2]
779 add esi,eax
780 mov word ptr [edi], bx
781 mov byte ptr [edi+2], dl
782 add edi,ecx
783 mov bx, word ptr [esi]
784 mov dl, byte ptr [esi+2]
785 add esi,eax
786 mov word ptr [edi], bx
787 mov byte ptr [edi+2], dl
788 add edi,ecx
789 mov bx, word ptr [esi]
790 mov dl, byte ptr [esi+2]
791 mov word ptr [edi], bx
792 mov byte ptr [edi+2], dl
793 jmp BltRecEnd
794Rec3_2:
795 mov bx, word ptr [esi]
796 mov dl, byte ptr [esi+2]
797 add esi,eax
798 mov word ptr [edi], bx
799 mov byte ptr [edi+2], dl
800 add edi,ecx
801 mov bx, word ptr [esi]
802 mov dl, byte ptr [esi+2]
803 mov word ptr [edi], bx
804 mov byte ptr [edi+2], dl
805 jmp BltRecEnd
806Rec3_01:
807 test edx,edx
808 jz BltRecEnd
809 mov bx, word ptr [esi]
810 mov dl, byte ptr [esi+2]
811 mov word ptr [edi], bx
812 mov byte ptr [edi+2], dl
813 jmp BltRecEnd
814
815; 4 Pixel Wide
816
817Rec4:
818 cmp edx,4
819 jb Rec4_0123
820 mov ebx, dword ptr [esi]
821 add esi,eax
822 mov dword ptr [edi], ebx
823 add edi,ecx
824 mov ebx, dword ptr [esi]
825 add esi,eax
826 mov dword ptr [edi], ebx
827 add edi,ecx
828 mov ebx, dword ptr [esi]
829 add esi,eax
830 mov dword ptr [edi], ebx
831 add edi,ecx
832 mov ebx, dword ptr [esi]
833 add esi,eax
834 mov dword ptr [edi], ebx
835 add edi,ecx
836 sub edx ,4
837 jnz Rec4
838 jmp BltRecEnd
839
840Rec4_0123:
841 cmp edx,2
842 jz Rec2_2
843 jb Rec2_01
844;3 lines left
845 mov ebx, dword ptr [esi]
846 add esi,eax
847 mov dword ptr [edi], ebx
848 add edi,ecx
849 mov ebx, dword ptr [esi]
850 add esi,eax
851 mov dword ptr [edi], ebx
852 add edi,ecx
853 mov ebx, dword ptr [esi]
854 mov dword ptr [edi], ebx
855 jmp BltRecEnd
856Rec4_2:
857 mov ebx, dword ptr [esi]
858 add esi,eax
859 mov dword ptr [edi], ebx
860 add edi,ecx
861 mov ebx, dword ptr [esi]
862 mov dword ptr [edi], ebx
863 jmp BltRecEnd
864Rec4_01:
865 test edx,edx
866 jz BltRecEnd
867 mov ebx, dword ptr [esi]
868 mov dword ptr [edi], ebx
869 jmp BltRecEnd
870
871; 5 Pixel Wide
872
873Rec5:
874 cmp edx,4
875 jb Rec5_0123
876 push edx
877 mov ebx, dword ptr [esi]
878 mov dl, byte ptr [esi+4]
879 add esi,eax
880 mov dword ptr [edi], ebx
881 mov byte ptr [edi+4], dl
882 add edi,ecx
883 mov ebx, dword ptr [esi]
884 mov dl, byte ptr [esi+4]
885 add esi,eax
886 mov dword ptr [edi], ebx
887 mov byte ptr [edi+4], dl
888 add edi,ecx
889 mov ebx, dword ptr [esi]
890 mov dl, byte ptr [esi+4]
891 add esi,eax
892 mov dword ptr [edi], ebx
893 mov byte ptr [edi+4], dl
894 add edi,ecx
895 mov ebx, dword ptr [esi]
896 mov dl, byte ptr [esi+4]
897 add esi,eax
898 mov dword ptr [edi], ebx
899 mov byte ptr [edi+4], dl
900 add edi,ecx
901 pop edx
902 sub edx ,4
903 jnz Rec5
904 jmp BltRecEnd
905Rec5_0123:
906 cmp edx,2
907 jz Rec5_2
908 jb Rec5_01
909; Must be 3 lines left
910 mov ebx, dword ptr [esi]
911 mov dl, byte ptr [esi+4]
912 add esi,eax
913 mov dword ptr [edi], ebx
914 mov byte ptr [edi+4], dl
915 add edi,ecx
916 mov ebx, dword ptr [esi]
917 mov dl, byte ptr [esi+4]
918 add esi,eax
919 mov dword ptr [edi], ebx
920 mov byte ptr [edi+4], dl
921 add edi,ecx
922 mov ebx, dword ptr [esi]
923 mov dl, byte ptr [esi+4]
924 mov dword ptr [edi], ebx
925 mov byte ptr [edi+4], dl
926 jmp BltRecEnd
927Rec5_2:
928 mov ebx, dword ptr [esi]
929 mov dl, byte ptr [esi+4]
930 add esi,eax
931 mov dword ptr [edi], ebx
932 mov byte ptr [edi+4], dl
933 add edi,ecx
934 mov ebx, dword ptr [esi]
935 mov dl, byte ptr [esi+4]
936 mov dword ptr [edi], ebx
937 mov byte ptr [edi+4], dl
938 jmp BltRecEnd
939Rec5_01:
940 test edx,edx
941 jz BltRecEnd
942 mov ebx, dword ptr [esi]
943 mov dl, byte ptr [esi+4]
944 mov dword ptr [edi], ebx
945 mov byte ptr [edi+4], dl
946 jmp BltRecEnd
947
948; 6 Pixel Wide
949
950Rec6:
951 cmp edx,4
952 jb Rec6_0123
953 push edx
954 mov ebx, dword ptr [esi]
955 mov dx, word ptr [esi+4]
956 add esi,eax
957 mov dword ptr [edi], ebx
958 mov word ptr [edi+4], dx
959 add edi,ecx
960 mov ebx, dword ptr [esi]
961 mov dx, word ptr [esi+4]
962 add esi,eax
963 mov dword ptr [edi], ebx
964 mov word ptr [edi+4], dx
965 add edi,ecx
966 mov ebx, dword ptr [esi]
967 mov dx, word ptr [esi+4]
968 add esi,eax
969 mov dword ptr [edi], ebx
970 mov word ptr [edi+4], dx
971 add edi,ecx
972 mov ebx, dword ptr [esi]
973 mov dx, word ptr [esi+4]
974 add esi,eax
975 mov dword ptr [edi], ebx
976 mov word ptr [edi+4], dx
977 add edi,ecx
978 pop edx
979 sub edx ,4
980 jnz Rec6
981 jmp BltRecEnd
982Rec6_0123:
983 cmp edx,2
984 jz Rec6_2
985 jb Rec6_01
986; Must be 3 lines left
987 mov ebx, dword ptr [esi]
988 mov dx, word ptr [esi+4]
989 add esi,eax
990 mov dword ptr [edi], ebx
991 mov word ptr [edi+4], dx
992 add edi,ecx
993 mov ebx, dword ptr [esi]
994 mov dx, word ptr [esi+4]
995 add esi,eax
996 mov dword ptr [edi], ebx
997 mov word ptr [edi+4], dx
998 add edi,ecx
999 mov ebx, dword ptr [esi]
1000 mov dx, word ptr [esi+4]
1001 mov dword ptr [edi], ebx
1002 mov word ptr [edi+4], dx
1003 jmp BltRecEnd
1004Rec6_2:
1005 mov ebx, dword ptr [esi]
1006 mov dx, word ptr [esi+4]
1007 add esi,eax
1008 mov dword ptr [edi], ebx
1009 mov word ptr [edi+4], dx
1010 add edi,ecx
1011 mov ebx, dword ptr [esi]
1012 mov dx, word ptr [esi+4]
1013 mov dword ptr [edi], ebx
1014 mov word ptr [edi+4], dx
1015 jmp BltRecEnd
1016Rec6_01:
1017 test edx,edx
1018 jz BltRecEnd
1019 mov ebx, dword ptr [esi]
1020 mov dx, word ptr [esi+4]
1021 mov dword ptr [edi], ebx
1022 mov word ptr [edi+4], dx
1023 jmp BltRecEnd
1024
1025; 7 Pixel Wide
1026
1027Rec7:
1028 cmp edx,4
1029 jb Rec6_0123
1030 push edx
1031 mov ebx, dword ptr [esi]
1032 mov dx, word ptr [esi+4]
1033 mov dword ptr [edi], ebx
1034 mov word ptr [edi+4], dx
1035 mov bl, byte ptr[esi+6]
1036 add esi,eax
1037 mov byte ptr[edi+6],bl
1038 add edi,ecx
1039 xor ebx,ebx ; clear ebx to avoid stalls
1040 mov ebx, dword ptr [esi]
1041 mov dx, word ptr [esi+4]
1042 mov dword ptr [edi], ebx
1043 mov word ptr [edi+4], dx
1044 mov bl, byte ptr[esi+6]
1045 add esi,eax
1046 mov byte ptr[edi+6],bl
1047 add edi,ecx
1048 xor ebx,ebx ; clear ebx to avoid stalls
1049 mov ebx, dword ptr [esi]
1050 mov dx, word ptr [esi+4]
1051 mov dword ptr [edi], ebx
1052 mov word ptr [edi+4], dx
1053 mov bl, byte ptr[esi+6]
1054 add esi,eax
1055 mov byte ptr[edi+6],bl
1056 add edi,ecx
1057 xor ebx,ebx ; clear ebx to avoid stalls
1058 mov ebx, dword ptr [esi]
1059 mov dx, word ptr [esi+4]
1060 mov dword ptr [edi], ebx
1061 mov word ptr [edi+4], dx
1062 mov bl, byte ptr[esi+6]
1063 add esi,eax
1064 mov byte ptr[edi+6],bl
1065 add edi,ecx
1066 xor ebx,ebx ; clear ebx to avoid stalls
1067 pop edx
1068 sub edx ,4
1069 jnz Rec7
1070 jmp BltRecEnd
1071Rec7_0123:
1072 cmp edx,2
1073 jz Rec7_2
1074 jb Rec7_01
1075; Must be 3 lines left
1076 mov ebx, dword ptr [esi]
1077 mov dx, word ptr [esi+4]
1078 mov dword ptr [edi], ebx
1079 mov word ptr [edi+4], dx
1080 mov bl, byte ptr[esi+6]
1081 add esi,eax
1082 mov byte ptr[edi+6],bl
1083 add edi,ecx
1084 xor ebx,ebx ; clear ebx to avoid stalls
1085 mov ebx, dword ptr [esi]
1086 mov dx, word ptr [esi+4]
1087 mov dword ptr [edi], ebx
1088 mov word ptr [edi+4], dx
1089 mov bl, byte ptr[esi+6]
1090 add esi,eax
1091 mov byte ptr[edi+6],bl
1092 add edi,ecx
1093 xor ebx,ebx ; clear ebx to avoid stalls
1094 mov ebx, dword ptr [esi]
1095 mov dx, word ptr [esi+4]
1096 mov dword ptr [edi], ebx
1097 mov word ptr [edi+4], dx
1098 mov bl, byte ptr[esi+6]
1099 mov byte ptr[edi+6],bl
1100 jmp BltRecEnd
1101Rec7_2:
1102 mov ebx, dword ptr [esi]
1103 mov dx, word ptr [esi+4]
1104 mov dword ptr [edi], ebx
1105 mov word ptr [edi+4], dx
1106 mov bl, byte ptr[esi+6]
1107 add esi,eax
1108 mov byte ptr[edi+6],bl
1109 add edi,ecx
1110 xor ebx,ebx ; clear ebx to avoid stalls
1111 mov ebx, dword ptr [esi]
1112 mov dx, word ptr [esi+4]
1113 mov dword ptr [edi], ebx
1114 mov word ptr [edi+4], dx
1115 mov bl, byte ptr[esi+6]
1116 mov byte ptr[edi+6],bl
1117 jmp BltRecEnd
1118Rec7_01:
1119 test edx,edx
1120 jz BltRecEnd
1121 mov ebx, dword ptr [esi]
1122 mov dx, word ptr [esi+4]
1123 mov dword ptr [edi], ebx
1124 mov word ptr [edi+4], dx
1125 mov bl, byte ptr[esi+6]
1126 mov byte ptr[edi+6],bl
1127 jmp BltRecEnd
1128
1129; 8 Pixel Wide
1130
1131Rec8:
1132 cmp edx,4
1133 jb Rec8_0123
1134 push edx
1135 mov ebx, dword ptr [esi]
1136 mov edx, dword ptr [esi+4]
1137 mov dword ptr [edi], ebx
1138 mov dword ptr [edi+4], edx
1139 add esi,eax
1140 add edi,ecx
1141 mov ebx, dword ptr [esi]
1142 mov edx, dword ptr [esi+4]
1143 mov dword ptr [edi], ebx
1144 mov dword ptr [edi+4], edx
1145 add esi,eax
1146 add edi,ecx
1147 mov ebx, dword ptr [esi]
1148 mov edx, dword ptr [esi+4]
1149 mov dword ptr [edi], ebx
1150 mov dword ptr [edi+4], edx
1151 add esi,eax
1152 add edi,ecx
1153 mov ebx, dword ptr [esi]
1154 mov edx, dword ptr [esi+4]
1155 mov dword ptr [edi], ebx
1156 mov dword ptr [edi+4], edx
1157 add esi,eax
1158 add edi,ecx
1159 pop edx
1160 sub edx ,4
1161 jnz Rec8
1162 jmp BltRecEnd
1163
1164Rec8_0123:
1165 cmp edx,2
1166 jz Rec8_2
1167 jb Rec8_01
1168;3 lines left
1169 mov ebx, dword ptr [esi]
1170 mov edx, dword ptr [esi+4]
1171 mov dword ptr [edi], ebx
1172 mov dword ptr [edi+4], edx
1173 add esi,eax
1174 add edi,ecx
1175 mov ebx, dword ptr [esi]
1176 mov edx, dword ptr [esi+4]
1177 mov dword ptr [edi], ebx
1178 mov dword ptr [edi+4], edx
1179 add esi,eax
1180 add edi,ecx
1181 mov ebx, dword ptr [esi]
1182 mov edx, dword ptr [esi+4]
1183 mov dword ptr [edi], ebx
1184 mov dword ptr [edi+4], edx
1185 jmp BltRecEnd
1186Rec8_2:
1187 mov ebx, dword ptr [esi]
1188 mov edx, dword ptr [esi+4]
1189 mov dword ptr [edi], ebx
1190 mov dword ptr [edi+4], edx
1191 add esi,eax
1192 add edi,ecx
1193 mov ebx, dword ptr [esi]
1194 mov edx, dword ptr [esi+4]
1195 mov dword ptr [edi], ebx
1196 mov dword ptr [edi+4], edx
1197 jmp BltRecEnd
1198Rec8_01:
1199 test edx,edx
1200 jz BltRecEnd
1201 mov ebx, dword ptr [esi]
1202 mov edx, dword ptr [esi+4]
1203 mov dword ptr [edi], ebx
1204 mov dword ptr [edi+4], edx
1205 jmp BltRecEnd
1206
1207; 9 Pixel Wide
1208
1209Rec9:
1210 cmp edx,4
1211 jb Rec9_0123
1212 push edx
1213 FLD QWORD PTR [ESI]
1214 mov bl, byte ptr [esi+8]
1215 FSTP QWORD PTR [EDI]
1216 mov byte ptr [edi+8], bl
1217 add esi,eax
1218 add edi,ecx
1219 FLD QWORD PTR [ESI]
1220 mov bl, byte ptr [esi+8]
1221 FSTP QWORD PTR [EDI]
1222 mov byte ptr [edi+8], bl
1223 add esi,eax
1224 add edi,ecx
1225 FLD QWORD PTR [ESI]
1226 mov bl, byte ptr [esi+8]
1227 FSTP QWORD PTR [EDI]
1228 mov byte ptr [edi+8], bl
1229 add esi,eax
1230 add edi,ecx
1231 FLD QWORD PTR [ESI]
1232 mov bl, byte ptr [esi+8]
1233 FSTP QWORD PTR [EDI]
1234 mov byte ptr [edi+8], bl
1235 add esi,eax
1236 add edi,ecx
1237 pop edx
1238 sub edx ,4
1239 jnz Rec9
1240 jmp BltRecEnd
1241
1242Rec9_0123:
1243 cmp edx,2
1244 jz Rec9_2
1245 jb Rec9_01
1246;3 lines left
1247 FLD QWORD PTR [ESI]
1248 mov bl, byte ptr [esi+8]
1249 FSTP QWORD PTR [EDI]
1250 mov byte ptr [edi+8], bl
1251 add esi,eax
1252 add edi,ecx
1253 FLD QWORD PTR [ESI]
1254 mov bl, byte ptr [esi+8]
1255 FSTP QWORD PTR [EDI]
1256 mov byte ptr [edi+8], bl
1257 add esi,eax
1258 add edi,ecx
1259 FLD QWORD PTR [ESI]
1260 mov bl, byte ptr [esi+8]
1261 FSTP QWORD PTR [EDI]
1262 mov byte ptr [edi+8], bl
1263 jmp BltRecEnd
1264Rec9_2:
1265 FLD QWORD PTR [ESI]
1266 mov bl, byte ptr [esi+8]
1267 FSTP QWORD PTR [EDI]
1268 mov byte ptr [edi+8], bl
1269 add esi,eax
1270 add edi,ecx
1271 FLD QWORD PTR [ESI]
1272 mov bl, byte ptr [esi+8]
1273 FSTP QWORD PTR [EDI]
1274 mov byte ptr [edi+8], bl
1275 jmp BltRecEnd
1276Rec9_01:
1277 test edx,edx
1278 jz BltRecEnd
1279 FLD QWORD PTR [ESI]
1280 mov bl, byte ptr [esi+8]
1281 FSTP QWORD PTR [EDI]
1282 mov byte ptr [edi+8], bl
1283 jmp BltRecEnd
1284
1285; 10 Pixel Wide
1286
1287Rec10:
1288 cmp edx,4
1289 jb Rec10_0123
1290 FLD QWORD PTR [ESI]
1291 mov bx, word ptr [esi+8]
1292 FSTP QWORD PTR [EDI]
1293 mov word ptr [edi+8], bx
1294 add esi,eax
1295 add edi,ecx
1296 FLD QWORD PTR [ESI]
1297 mov bx, word ptr [esi+8]
1298 FSTP QWORD PTR [EDI]
1299 mov word ptr [edi+8], bx
1300 add esi,eax
1301 add edi,ecx
1302 FLD QWORD PTR [ESI]
1303 mov bx, word ptr [esi+8]
1304 FSTP QWORD PTR [EDI]
1305 mov word ptr [edi+8], bx
1306 add esi,eax
1307 add edi,ecx
1308 FLD QWORD PTR [ESI]
1309 mov bx, word ptr [esi+8]
1310 FSTP QWORD PTR [EDI]
1311 mov word ptr [edi+8], bx
1312 add esi,eax
1313 add edi,ecx
1314 sub edx ,4
1315 jnz Rec10
1316 jmp BltRecEnd
1317
1318Rec10_0123:
1319 cmp edx,2
1320 jz Rec10_2
1321 jb Rec10_01
1322;3 lines left
1323 FLD QWORD PTR [ESI]
1324 mov bx, word ptr [esi+8]
1325 FSTP QWORD PTR [EDI]
1326 mov word ptr [edi+8], bx
1327 add esi,eax
1328 add edi,ecx
1329 FLD QWORD PTR [ESI]
1330 mov bx, word ptr [esi+8]
1331 FSTP QWORD PTR [EDI]
1332 mov word ptr [edi+8], bx
1333 add esi,eax
1334 add edi,ecx
1335 FLD QWORD PTR [ESI]
1336 mov bx, word ptr [esi+8]
1337 FSTP QWORD PTR [EDI]
1338 mov word ptr [edi+8], bx
1339 jmp BltRecEnd
1340Rec10_2:
1341 FLD QWORD PTR [ESI]
1342 mov bx, word ptr [esi+8]
1343 FSTP QWORD PTR [EDI]
1344 mov word ptr [edi+8], bx
1345 add esi,eax
1346 add edi,ecx
1347 FLD QWORD PTR [ESI]
1348 mov bx, word ptr [esi+8]
1349 FSTP QWORD PTR [EDI]
1350 mov word ptr [edi+8], bx
1351 jmp BltRecEnd
1352Rec10_01:
1353 test edx,edx
1354 jz BltRecEnd
1355 FLD QWORD PTR [ESI]
1356 mov bx, word ptr [esi+8]
1357 FSTP QWORD PTR [EDI]
1358 mov word ptr [edi+8], bx
1359 jmp BltRecEnd
1360
1361; 11 Pixel Wide
1362
1363Rec11:
1364 cmp edx,4
1365 jb Rec11_0123
1366 push edx
1367 FLD QWORD PTR [ESI]
1368 mov bx, word ptr [esi+8]
1369 mov dl, byte ptr [esi+10]
1370 FSTP QWORD PTR [EDI]
1371 mov word ptr [edi+8], bx
1372 mov byte ptr [edi+10], dl
1373 add esi,eax
1374 add edi,ecx
1375 FLD QWORD PTR [ESI]
1376 mov bx, word ptr [esi+8]
1377 mov dl, byte ptr [esi+10]
1378 FSTP QWORD PTR [EDI]
1379 mov word ptr [edi+8], bx
1380 mov byte ptr [edi+10], dl
1381 add esi,eax
1382 add edi,ecx
1383 FLD QWORD PTR [ESI]
1384 mov bx, word ptr [esi+8]
1385 mov dl, byte ptr [esi+10]
1386 FSTP QWORD PTR [EDI]
1387 mov word ptr [edi+8], bx
1388 mov byte ptr [edi+10], dl
1389 add esi,eax
1390 add edi,ecx
1391 FLD QWORD PTR [ESI]
1392 mov bx, word ptr [esi+8]
1393 mov dl, byte ptr [esi+10]
1394 FSTP QWORD PTR [EDI]
1395 mov word ptr [edi+8], bx
1396 mov byte ptr [edi+10], dl
1397 add esi,eax
1398 add edi,ecx
1399 pop edx
1400 sub edx ,4
1401 jnz Rec10
1402 jmp BltRecEnd
1403
1404Rec11_0123:
1405 cmp edx,2
1406 jz Rec11_2
1407 jb Rec11_01
1408;3 lines left
1409 FLD QWORD PTR [ESI]
1410 mov bx, word ptr [esi+8]
1411 mov dl, byte ptr [esi+10]
1412 FSTP QWORD PTR [EDI]
1413 mov word ptr [edi+8], bx
1414 mov byte ptr [edi+10], dl
1415 add esi,eax
1416 add edi,ecx
1417 FLD QWORD PTR [ESI]
1418 mov bx, word ptr [esi+8]
1419 mov dl, byte ptr [esi+10]
1420 FSTP QWORD PTR [EDI]
1421 mov word ptr [edi+8], bx
1422 mov byte ptr [edi+10], dl
1423 add esi,eax
1424 add edi,ecx
1425 FLD QWORD PTR [ESI]
1426 mov bx, word ptr [esi+8]
1427 mov dl, byte ptr [esi+10]
1428 FSTP QWORD PTR [EDI]
1429 mov word ptr [edi+8], bx
1430 mov byte ptr [edi+10], dl
1431 jmp BltRecEnd
1432Rec11_2:
1433 FLD QWORD PTR [ESI]
1434 mov bx, word ptr [esi+8]
1435 mov dl, byte ptr [esi+10]
1436 FSTP QWORD PTR [EDI]
1437 mov word ptr [edi+8], bx
1438 mov byte ptr [edi+10], dl
1439 add esi,eax
1440 add edi,ecx
1441 FLD QWORD PTR [ESI]
1442 mov bx, word ptr [esi+8]
1443 mov dl, byte ptr [esi+10]
1444 FSTP QWORD PTR [EDI]
1445 mov word ptr [edi+8], bx
1446 mov byte ptr [edi+10], dl
1447 jmp BltRecEnd
1448Rec11_01:
1449 test edx,edx
1450 jz BltRecEnd
1451 FLD QWORD PTR [ESI]
1452 mov bx, word ptr [esi+8]
1453 mov dl, byte ptr [esi+10]
1454 FSTP QWORD PTR [EDI]
1455 mov word ptr [edi+8], bx
1456 mov byte ptr [edi+10], dl
1457 jmp BltRecEnd
1458
1459; 12 Pixel Wide
1460
1461Rec12:
1462 cmp edx,4
1463 jb Rec12_0123
1464 FLD QWORD PTR [ESI]
1465 mov ebx, dword ptr [esi+8]
1466 FSTP QWORD PTR [EDI]
1467 mov dword ptr [edi+8], ebx
1468 add esi,eax
1469 add edi,ecx
1470 FLD QWORD PTR [ESI]
1471 mov ebx, dword ptr [esi+8]
1472 FSTP QWORD PTR [EDI]
1473 mov dword ptr [edi+8], ebx
1474 add esi,eax
1475 add edi,ecx
1476 FLD QWORD PTR [ESI]
1477 mov ebx, dword ptr [esi+8]
1478 FSTP QWORD PTR [EDI]
1479 mov dword ptr [edi+8], ebx
1480 add esi,eax
1481 add edi,ecx
1482 FLD QWORD PTR [ESI]
1483 mov ebx, dword ptr [esi+8]
1484 FSTP QWORD PTR [EDI]
1485 mov dword ptr [edi+8], ebx
1486 add esi,eax
1487 add edi,ecx
1488 sub edx ,4
1489 jnz Rec12
1490 jmp BltRecEnd
1491
1492Rec12_0123:
1493 cmp edx,2
1494 jz Rec12_2
1495 jb Rec12_01
1496;3 lines left
1497 FLD QWORD PTR [ESI]
1498 mov ebx, dword ptr [esi+8]
1499 FSTP QWORD PTR [EDI]
1500 mov dword ptr [edi+8], ebx
1501 add esi,eax
1502 add edi,ecx
1503 FLD QWORD PTR [ESI]
1504 mov ebx, dword ptr [esi+8]
1505 FSTP QWORD PTR [EDI]
1506 mov dword ptr [edi+8], ebx
1507 add esi,eax
1508 add edi,ecx
1509 FLD QWORD PTR [ESI]
1510 mov ebx, dword ptr [esi+8]
1511 FSTP QWORD PTR [EDI]
1512 mov dword ptr [edi+8], ebx
1513 jmp BltRecEnd
1514Rec12_2:
1515 FLD QWORD PTR [ESI]
1516 mov ebx, dword ptr [esi+8]
1517 FSTP QWORD PTR [EDI]
1518 mov dword ptr [edi+8], ebx
1519 add esi,eax
1520 add edi,ecx
1521 FLD QWORD PTR [ESI]
1522 mov ebx, dword ptr [esi+8]
1523 FSTP QWORD PTR [EDI]
1524 mov dword ptr [edi+8], ebx
1525 jmp BltRecEnd
1526Rec12_01:
1527 test edx,edx
1528 jz BltRecEnd
1529 FLD QWORD PTR [ESI]
1530 mov ebx, dword ptr [esi+8]
1531 FSTP QWORD PTR [EDI]
1532 mov dword ptr [edi+8], ebx
1533 jmp BltRecEnd
1534
1535; 13 Pixel Wide
1536
1537Rec13:
1538 cmp edx,4
1539 jb Rec13_0123
1540 push edx
1541 FLD QWORD PTR [ESI]
1542 mov ebx, dword ptr [esi+8]
1543 mov dl, byte ptr [esi+12]
1544 FSTP QWORD PTR [EDI]
1545 mov dword ptr [edi+8], ebx
1546 mov byte ptr [edi+12], dl
1547 add esi,eax
1548 add edi,ecx
1549 FLD QWORD PTR [ESI]
1550 mov ebx, dword ptr [esi+8]
1551 mov dl, byte ptr [esi+12]
1552 FSTP QWORD PTR [EDI]
1553 mov dword ptr [edi+8], ebx
1554 mov byte ptr [edi+12], dl
1555 add esi,eax
1556 add edi,ecx
1557 FLD QWORD PTR [ESI]
1558 mov ebx, dword ptr [esi+8]
1559 mov dl, byte ptr [esi+12]
1560 FSTP QWORD PTR [EDI]
1561 mov dword ptr [edi+8], ebx
1562 mov byte ptr [edi+12], dl
1563 add esi,eax
1564 add edi,ecx
1565 FLD QWORD PTR [ESI]
1566 mov ebx, dword ptr [esi+8]
1567 mov dl, byte ptr [esi+12]
1568 FSTP QWORD PTR [EDI]
1569 mov dword ptr [edi+8], ebx
1570 mov byte ptr [edi+12], dl
1571 add esi,eax
1572 add edi,ecx
1573 pop edx
1574 sub edx ,4
1575 jnz Rec13
1576 jmp BltRecEnd
1577
1578Rec13_0123:
1579 cmp edx,2
1580 jz Rec13_2
1581 jb Rec13_01
1582;3 lines left
1583 FLD QWORD PTR [ESI]
1584 mov ebx, dword ptr [esi+8]
1585 mov dl, byte ptr [esi+12]
1586 FSTP QWORD PTR [EDI]
1587 mov dword ptr [edi+8], ebx
1588 mov byte ptr [edi+12], dl
1589 add esi,eax
1590 add edi,ecx
1591 FLD QWORD PTR [ESI]
1592 mov ebx, dword ptr [esi+8]
1593 mov dl, byte ptr [esi+12]
1594 FSTP QWORD PTR [EDI]
1595 mov dword ptr [edi+8], ebx
1596 mov byte ptr [edi+12], dl
1597 add esi,eax
1598 add edi,ecx
1599 FLD QWORD PTR [ESI]
1600 mov ebx, dword ptr [esi+8]
1601 mov dl, byte ptr [esi+12]
1602 FSTP QWORD PTR [EDI]
1603 mov dword ptr [edi+8], ebx
1604 mov byte ptr [edi+12], dl
1605 jmp BltRecEnd
1606Rec13_2:
1607 FLD QWORD PTR [ESI]
1608 mov ebx, dword ptr [esi+8]
1609 mov dl, byte ptr [esi+12]
1610 FSTP QWORD PTR [EDI]
1611 mov dword ptr [edi+8], ebx
1612 mov byte ptr [edi+12], dl
1613 add esi,eax
1614 add edi,ecx
1615 FLD QWORD PTR [ESI]
1616 mov ebx, dword ptr [esi+8]
1617 mov dl, byte ptr [esi+12]
1618 FSTP QWORD PTR [EDI]
1619 mov dword ptr [edi+8], ebx
1620 mov byte ptr [edi+12], dl
1621 jmp BltRecEnd
1622Rec13_01:
1623 test edx,edx
1624 jz BltRecEnd
1625 FLD QWORD PTR [ESI]
1626 mov ebx, dword ptr [esi+8]
1627 mov dl, byte ptr [esi+12]
1628 FSTP QWORD PTR [EDI]
1629 mov dword ptr [edi+8], ebx
1630 mov byte ptr [edi+12], dl
1631 jmp BltRecEnd
1632
1633; 14 Pixel Wide
1634
1635Rec14:
1636 cmp edx,4
1637 jb Rec14_0123
1638 push edx
1639 FLD QWORD PTR [ESI]
1640 mov ebx, dword ptr [esi+8]
1641 mov dx, word ptr [esi+12]
1642 FSTP QWORD PTR [EDI]
1643 mov dword ptr [edi+8], ebx
1644 mov word ptr [edi+12], dx
1645 add esi,eax
1646 add edi,ecx
1647 FLD QWORD PTR [ESI]
1648 mov ebx, dword ptr [esi+8]
1649 mov dx, word ptr [esi+12]
1650 FSTP QWORD PTR [EDI]
1651 mov dword ptr [edi+8], ebx
1652 mov word ptr [edi+12], dx
1653 add esi,eax
1654 add edi,ecx
1655 FLD QWORD PTR [ESI]
1656 mov ebx, dword ptr [esi+8]
1657 mov dx, word ptr [esi+12]
1658 FSTP QWORD PTR [EDI]
1659 mov dword ptr [edi+8], ebx
1660 mov word ptr [edi+12], dx
1661 add esi,eax
1662 add edi,ecx
1663 FLD QWORD PTR [ESI]
1664 mov ebx, dword ptr [esi+8]
1665 mov dx, word ptr [esi+12]
1666 FSTP QWORD PTR [EDI]
1667 mov dword ptr [edi+8], ebx
1668 mov word ptr [edi+12], dx
1669 add esi,eax
1670 add edi,ecx
1671 pop edx
1672 sub edx ,4
1673 jnz Rec14
1674 jmp BltRecEnd
1675
1676Rec14_0123:
1677 cmp edx,2
1678 jz Rec14_2
1679 jb Rec14_01
1680;3 lines left
1681 FLD QWORD PTR [ESI]
1682 mov ebx, dword ptr [esi+8]
1683 mov dx, word ptr [esi+12]
1684 FSTP QWORD PTR [EDI]
1685 mov dword ptr [edi+8], ebx
1686 mov word ptr [edi+12], dx
1687 add esi,eax
1688 add edi,ecx
1689 FLD QWORD PTR [ESI]
1690 mov ebx, dword ptr [esi+8]
1691 mov dx, word ptr [esi+12]
1692 FSTP QWORD PTR [EDI]
1693 mov dword ptr [edi+8], ebx
1694 mov word ptr [edi+12], dx
1695 add esi,eax
1696 add edi,ecx
1697 FLD QWORD PTR [ESI]
1698 mov ebx, dword ptr [esi+8]
1699 mov dx, word ptr [esi+12]
1700 FSTP QWORD PTR [EDI]
1701 mov dword ptr [edi+8], ebx
1702 mov word ptr [edi+12], dx
1703 jmp BltRecEnd
1704Rec14_2:
1705 FLD QWORD PTR [ESI]
1706 mov ebx, dword ptr [esi+8]
1707 mov dx, word ptr [esi+12]
1708 FSTP QWORD PTR [EDI]
1709 mov dword ptr [edi+8], ebx
1710 mov word ptr [edi+12], dx
1711 add esi,eax
1712 add edi,ecx
1713 FLD QWORD PTR [ESI]
1714 mov ebx, dword ptr [esi+8]
1715 mov dx, word ptr [esi+12]
1716 FSTP QWORD PTR [EDI]
1717 mov dword ptr [edi+8], ebx
1718 mov word ptr [edi+12], dx
1719 jmp BltRecEnd
1720Rec14_01:
1721 test edx,edx
1722 jz BltRecEnd
1723 FLD QWORD PTR [ESI]
1724 mov ebx, dword ptr [esi+8]
1725 mov dx, word ptr [esi+12]
1726 FSTP QWORD PTR [EDI]
1727 mov dword ptr [edi+8], ebx
1728 mov word ptr [edi+12], dx
1729 jmp BltRecEnd
1730
1731; 15 Pixel Wide
1732
1733Rec15:
1734 cmp edx,4
1735 jb Rec15_0123
1736 push edx
1737 FLD QWORD PTR [ESI]
1738 mov ebx, dword ptr [esi+8]
1739 mov dx, word ptr [esi+12]
1740 FSTP QWORD PTR [EDI]
1741 mov dword ptr [edi+8], ebx
1742 mov bl, byte ptr[esi+14]
1743 mov word ptr [edi+12], dx
1744 add esi,eax
1745 mov byte ptr[edi+14], bl
1746 add edi,ecx
1747 FLD QWORD PTR [ESI]
1748 mov ebx, dword ptr [esi+8]
1749 mov dx, word ptr [esi+12]
1750 FSTP QWORD PTR [EDI]
1751 mov dword ptr [edi+8], ebx
1752 mov bl, byte ptr[esi+14]
1753 mov word ptr [edi+12], dx
1754 add esi,eax
1755 mov byte ptr[edi+14], bl
1756 add edi,ecx
1757 FLD QWORD PTR [ESI]
1758 mov ebx, dword ptr [esi+8]
1759 mov dx, word ptr [esi+12]
1760 FSTP QWORD PTR [EDI]
1761 mov dword ptr [edi+8], ebx
1762 mov bl, byte ptr[esi+14]
1763 mov word ptr [edi+12], dx
1764 add esi,eax
1765 mov byte ptr[edi+14], bl
1766 add edi,ecx
1767 FLD QWORD PTR [ESI]
1768 mov ebx, dword ptr [esi+8]
1769 mov dx, word ptr [esi+12]
1770 FSTP QWORD PTR [EDI]
1771 mov dword ptr [edi+8], ebx
1772 mov bl, byte ptr[esi+14]
1773 mov word ptr [edi+12], dx
1774 add esi,eax
1775 mov byte ptr[edi+14], bl
1776 add edi,ecx
1777 pop edx
1778 sub edx ,4
1779 jnz Rec15
1780 jmp BltRecEnd
1781
1782Rec15_0123:
1783 cmp edx,2
1784 jz Rec15_2
1785 jb Rec15_01
1786;3 lines left
1787 FLD QWORD PTR [ESI]
1788 mov ebx, dword ptr [esi+8]
1789 mov dx, word ptr [esi+12]
1790 FSTP QWORD PTR [EDI]
1791 mov dword ptr [edi+8], ebx
1792 mov bl, byte ptr[esi+14]
1793 mov word ptr [edi+12], dx
1794 add esi,eax
1795 mov byte ptr[edi+14], bl
1796 add edi,ecx
1797 FLD QWORD PTR [ESI]
1798 mov ebx, dword ptr [esi+8]
1799 mov dx, word ptr [esi+12]
1800 FSTP QWORD PTR [EDI]
1801 mov dword ptr [edi+8], ebx
1802 mov bl, byte ptr[esi+14]
1803 mov word ptr [edi+12], dx
1804 add esi,eax
1805 mov byte ptr[edi+14], bl
1806 add edi,ecx
1807 FLD QWORD PTR [ESI]
1808 mov ebx, dword ptr [esi+8]
1809 mov dx, word ptr [esi+12]
1810 FSTP QWORD PTR [EDI]
1811 mov dword ptr [edi+8], ebx
1812 mov bl, byte ptr[esi+14]
1813 mov word ptr [edi+12], dx
1814 mov byte ptr[edi+14], bl
1815 jmp BltRecEnd
1816Rec15_2:
1817 FLD QWORD PTR [ESI]
1818 mov ebx, dword ptr [esi+8]
1819 mov dx, word ptr [esi+12]
1820 FSTP QWORD PTR [EDI]
1821 mov dword ptr [edi+8], ebx
1822 mov bl, byte ptr[esi+14]
1823 mov word ptr [edi+12], dx
1824 add esi,eax
1825 mov byte ptr[edi+14], bl
1826 add edi,ecx
1827 FLD QWORD PTR [ESI]
1828 mov ebx, dword ptr [esi+8]
1829 mov dx, word ptr [esi+12]
1830 FSTP QWORD PTR [EDI]
1831 mov dword ptr [edi+8], ebx
1832 mov bl, byte ptr[esi+14]
1833 mov word ptr [edi+12], dx
1834 mov byte ptr[edi+14], bl
1835 jmp BltRecEnd
1836Rec15_01:
1837 test edx,edx
1838 jz BltRecEnd
1839 FLD QWORD PTR [ESI]
1840 mov ebx, dword ptr [esi+8]
1841 mov dx, word ptr [esi+12]
1842 FSTP QWORD PTR [EDI]
1843 mov dword ptr [edi+8], ebx
1844 mov bl, byte ptr[esi+14]
1845 mov word ptr [edi+12], dx
1846 mov byte ptr[edi+14], bl
1847 jmp BltRecEnd
1848
1849
1850ComplexBlt:
1851 ; Blit first the even rect then the rest
1852
1853 push dword ptr [ebp+28] ; ulSrcPitch
1854 push dword ptr [ebp+24] ; ulDestPitch
1855 push edx
1856 shl ecx,4
1857 push ecx
1858 push esi
1859 push edi
1860 call _BltRec
1861 sub esp,24
1862 add esi,ecx
1863 add edi,ecx
1864 push dword ptr [ebp+28] ; ulSrcPitch
1865 push dword ptr [ebp+24] ; ulDestPitch
1866 push edx
1867 push ebx
1868 push esi
1869 push edi
1870 call _BltRec
1871 sub esp,24
1872
1873BltRecEnd:
1874 pop edx
1875 pop ecx
1876 pop ebx
1877 pop eax
1878 pop esi
1879 pop edi
1880 pop ebp
1881 ret
1882_BltRec ENDP
1883
1884
1885 PUBLIC _CPUHasMMX
1886;
1887; int __cdecl CPUHasMMX()
1888; returns:
1889; 0 = NoMMX
1890; 1 = MMX
1891; 2 = MMX+CMov instuction
1892
1893_CPUHasMMX PROC NEAR
1894 push ebp
1895 mov ebp, esp
1896 push edi
1897 push esi
1898 push ebx
1899 push ecx
1900 push edx
1901
1902 pushfd
1903 pop eax
1904 mov ebx ,eax
1905 xor eax, 00200000h
1906 push eax
1907 popfd
1908 pushfd
1909 pop eax
1910 sub eax,ebx
1911 jz Return ; No CPUID => No MMX => return 0 in eax;
1912 mov eax, 1
1913 CPUID
1914 test edx,00800000h ; MMX Bit Set ?
1915 jz Return
1916 mov eax, 1
1917 test edx,00008000h ; Conditonal Mov Bit Set ?
1918 jz Return
1919 inc eax
1920Return:
1921 pop edx
1922 pop ecx
1923 pop ebx
1924 pop esi
1925 pop edi
1926 pop ebp
1927 ret
1928_CPUHasMMX ENDP
1929
1930 PUBLIC _MemFlip
1931
1932;
1933; memcpy via FLD / FSTP MMX might even be faster but
1934; not present on every system
1935; to maximize the speed we copy 64 bytes in each loop
1936; and after the loop the rest left
1937;
1938;
1939;void __cdecl MemFlip(PBYTE dest, PBYTE src, ULONG Size);
1940
1941_MemFlip PROC NEAR
1942 push ebp
1943 mov ebp, esp
1944 push edi
1945 push esi
1946 push eax
1947 push ebx
1948 push ecx
1949
1950 mov eax , dword ptr [ebp+16] ; Size of Buffer
1951 mov edi , dword ptr [ebp+8] ; Destination
1952 mov ebx , eax
1953 mov esi , dword ptr [ebp+12] ; SourcePointer
1954
1955 and ebx , 0000003Fh ; Calc leftover bytes
1956 shr eax , 5 ; Calc Loops
1957
1958 jz COPYREMAIN ; Less then 64 to copy
1959ALIGN 4
1960
1961Loop64:
1962 FLD QWORD PTR [ESI] ; 1
1963 FLD QWORD PTR [ESI+8] ; 2
1964 FXCH ; Doesn't take any clocks
1965 FSTP QWORD PTR [EDI] ; 3,4 Clocks
1966 FSTP QWORD PTR [EDI+8] ; 5,6
1967 ADD ESI,16 ; 7 U Integer instruction can be executed parallel
1968 ADD EDI,16 ; 7 V Total clocks for copying 16 byte 7 clocks Rep Movs needs 20! for each 16 byte + 13 setup
1969 FLD QWORD PTR [ESI]
1970 FLD QWORD PTR [ESI+8]
1971 FXCH
1972 FSTP QWORD PTR [EDI]
1973 FSTP QWORD PTR [EDI+8]
1974 ADD ESI,16
1975 ADD EDI,16
1976 FLD QWORD PTR [ESI]
1977 FLD QWORD PTR [ESI+8]
1978 FXCH
1979 FSTP QWORD PTR [EDI]
1980 FSTP QWORD PTR [EDI+8]
1981 ADD ESI,16
1982 ADD EDI,16
1983 FLD QWORD PTR [ESI]
1984 FLD QWORD PTR [ESI+8]
1985 FXCH
1986 FSTP QWORD PTR [EDI]
1987 FSTP QWORD PTR [EDI+8]
1988 ADD ESI,16
1989 ADD EDI,16
1990 inc eax
1991 jnz Loop64
1992
1993COPYREMAIN:
1994 test ebx, ebx ; something left ?
1995 jz EndOffFlip
1996
1997 test ebx, 00000020h; at least 32 bytes left ?
1998 jz Test16
1999
2000 FLD QWORD PTR [ESI]
2001 FLD QWORD PTR [ESI+8]
2002 FXCH
2003 FSTP QWORD PTR [EDI]
2004 FSTP QWORD PTR [EDI+8]
2005 ADD ESI,16
2006 ADD EDI,16
2007 FLD QWORD PTR [ESI]
2008 FLD QWORD PTR [ESI+8]
2009 FXCH
2010 FSTP QWORD PTR [EDI]
2011 FSTP QWORD PTR [EDI+8]
2012 ADD ESI,16
2013 ADD EDI,16
2014 sub ebx, 00000020h
2015 jz EndOffFlip
2016
2017Test16:
2018
2019 test ebx, 00000010h; at least 16 bytes left ?
2020 jb Test8
2021
2022 FLD QWORD PTR [ESI]
2023 FLD QWORD PTR [ESI+8]
2024 FXCH
2025 FSTP QWORD PTR [EDI]
2026 FSTP QWORD PTR [EDI+8]
2027 ADD ESI,16
2028 ADD EDI,16
2029 sub ebx, 00000010h
2030 jz EndOffFlip
2031Test8:
2032
2033 test ebx, 00000008h; at least 8 bytes left ?
2034 jb Test4
2035 mov eax,[esi]
2036 mov ecx,[esi+4]
2037 mov [edi],eax
2038 mov [edi+4],ecx
2039 add esi, 8
2040 add edi, 8
2041 sub ebx, 8
2042 jz EndOffFlip
2043
2044Test4:
2045 test ebx, 00000004h; at least 4 bytes left ?
2046 jb Test2
2047 mov eax,[esi]
2048 sub ebx, 4
2049 mov [edi],eax
2050 add esi, 4
2051 add edi, 4
2052 test ebx, ebx ; something left ?
2053 jz EndOffFlip
2054
2055Test2:
2056 test ebx, 00000002h
2057 jb Copy1
2058 mov ax,[esi]
2059 sub ebx,2
2060 mov [edi],ax
2061 add esi,2
2062 add edi,2
2063 test ebx,ebx
2064 jz EndOffFlip
2065
2066Copy1:
2067 mov al,[esi]
2068 mov [edi],al
2069
2070EndOffFlip:
2071 pop ecx
2072 pop ebx
2073 pop eax
2074 pop esi
2075 pop edi
2076 pop ebp
2077 ret
2078_MemFlip ENDP
2079
2080CODE32 ENDS
2081
2082 END
Note: See TracBrowser for help on using the repository browser.