source: trunk/src/ddraw/asmutil.asm@ 2912

Last change on this file since 2912 was 2638, checked in by hugh, 26 years ago

Bugfixes at many places

File size: 40.4 KB
Line 
1; $Id: asmutil.asm,v 1.6 2000-02-04 19:31:26 hugh Exp $
2
3;
4; asmutil.asm Color key bit blitting for DirectDraw
5;
6; Copyright 1998 Sander van Leeuwen
7; 1999 Markus Montkowski
8;
9; Project Odin Software License can be found in LICENSE.TXT
10;
11
12 NAME asmutil
13.586p
14.MMX
15
16CODE32 SEGMENT DWORD USE32 PUBLIC 'CODE'
17CODE32 ENDS
18DATA32 SEGMENT DWORD USE32 PUBLIC 'DATA'
19DATA32 ENDS
20CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
21CONST32 ENDS
22BSS32 SEGMENT DWORD USE32 PUBLIC 'BSS'
23BSS32 ENDS
24DGROUP GROUP CONST32, BSS32, DATA32
25 ASSUME CS:FLAT, DS:FLAT, SS:FLAT, ES:FLAT
26 DATA32 SEGMENT
27 DATA32 ENDS
28 BSS32 SEGMENT
29 BSS32 ENDS
30 CONST32 SEGMENT
31 CONST32 ENDS
32
33
34CODE32 SEGMENT
35
36 PUBLIC _BlitColorKey8
37
38; endpos = destbuf + blitlinesize;
39; while(destbuf < endpos) {
40; if(*srcbuf == colorkey) {
41; destbuf++;
42; }
43; else *destbuf++ = *srcbuf;
44; srcbuf++;
45; }
46; destbuf += (destscanlinesize-blitlinesize);
47; srcbuf += (srcscanlinesize-blitlinesize);
48;void BlitColorKey8(char *dest, char *src, ULONG key, ULONG linesize)
49_BlitColorKey8 PROC NEAR
50 push ebp
51 mov ebp, esp
52 push edi
53 push esi
54 push eax
55 push ebx
56 push ecx
57 push edx
58
59 mov edi, dword ptr [ebp+8] ;dest
60 mov esi, dword ptr [ebp+12] ;src
61 mov ecx, dword ptr [ebp+20] ;linesize
62 mov edx, dword ptr [ebp+16] ;colorkey
63
64 and ecx, 3
65 mov dh , dl
66 push ecx ;do the remaining bytes afterwards
67 mov eax, edx
68 shl edx, 16
69 and eax, 0000FFFFh
70 mov ecx, dword ptr [ebp+20] ;linesize
71 or edx, eax ; edx now contains the colorkey in each byte
72 shr ecx, 2 ;linesize in dwords
73 jz blitremain ; less then 4 bytes
74 jmp blitStart
75blitloop:
76 add esi, 4
77 add edi, 4
78blitStart:
79 mov ebx, dword ptr [esi]
80 mov eax, dword ptr [edi]
81 cmp ebx, edx ; All 4 bytes transparent?
82 jz TTTT
83 cmp bx, dx ; lower 2 bytes transparent ?
84 jz XXTT
85 cmp bl, dl ; lower byte trans
86 jz XXOT
87 mov al, bl
88 cmp bh, dh ; upper Byte transparent then skip copy
89 jz XXTT
90XXOT:
91 mov ah, bh
92XXTT: ; handle upper 2 pixel
93 ror eax, 16
94 ror ebx, 16
95 cmp bx,dx
96 jz skipbyte4
97 cmp bl, dl
98 je skipbyte3
99 mov al, bl
100skipbyte3:
101 cmp bh, dl
102 je skipbyte4
103 mov ah, bh
104skipbyte4:
105 ror eax, 16
106 mov dword ptr [edi], eax
107TTTT:
108 dec ecx
109 jz blitloop
110blitremain:
111 pop ecx
112 cmp ecx, 2
113 ja blit3
114 jz blit2
115 test ecx,ecx
116 jz endofblit
117 mov eax, dword ptr[esi]
118 mov ebx, dword ptr [edi]
119 rol eax, 8
120 rol ebx, 8
121 cmp al,dl
122 jz endofblit
123 mov bl,al
124 ror ebx, 8
125 mov dword ptr[edi],ebx
126 jmp endofblit
127
128blit3:
129 mov eax, dword ptr[esi]
130 mov ebx, dword ptr [edi]
131 ror eax, 16
132 ror ebx, 16
133 cmp ax, dx
134 jz TTX
135 cmp ah,dh
136 jz TXX
137 mov bh,ah
138TXX:
139 cmp al,dl
140 jz TTX
141 mov bl,al
142TTX:
143 ror eax, 16
144 ror ebx, 16
145 cmp ah,dh
146 jz Cpyback
147 mov dh,ah
148Cpyback:
149 mov dword ptr [edi], ebx
150 jmp endofblit
151
152blit2:
153 mov eax, dword ptr[esi]
154 mov ebx, dword ptr [edi]
155 ror eax, 16
156 ror ebx, 16
157 cmp ax, dx ; both bytes transparent ?
158 jz endofblit
159 cmp ah,dh
160 jz TX
161 mov bh,ah
162TX:
163 cmp al,dl
164 jz OT
165 mov bl,al
166OT:
167 ror ebx, 16
168 mov dword ptr[edi], ebx
169
170endofblit:
171 pop edx
172 pop ecx
173 pop ebx
174 pop eax
175 pop esi
176 pop edi
177 pop ebp
178 ret
179_BlitColorKey8 ENDP
180
181 PUBLIC _BlitColorKey16
182
183; endpos = destbuf + blitlinesize;
184; while(destbuf < endpos) {
185; if(*srcbuf == colorkey) {
186; destbuf++;
187; }
188; else *destbuf++ = *srcbuf;
189; srcbuf++;
190; }
191; destbuf += (destscanlinesize-blitlinesize);
192; srcbuf += (srcscanlinesize-blitlinesize);
193;void BlitColorKey16(char *dest, char *src, ULONG key, ULONG linesize)
194_BlitColorKey16 PROC NEAR
195 push ebp
196 mov ebp, esp
197 push edi
198 push esi
199 push eax
200 push ebx
201 push ecx
202 push edx
203
204 mov edi, dword ptr [ebp+8] ;dest
205 mov esi, dword ptr [ebp+12] ;src
206 mov ecx, dword ptr [ebp+20] ;linesize
207 mov edx, dword ptr [ebp+16] ;colorkey
208
209 mov eax, edx
210 shl edx, 16;
211 and eax, 0000FFFFh
212 or edx,eax ; create dwColorKey
213 shr ecx, 1 ; linesize in dwords
214 jz OnePixel
215
216blitloop16:
217 mov eax, dword ptr [esi]
218 mov ebx, dword ptr [edi]
219 add esi , 4
220 cmp eax, edx ; are both pixel transparent?
221 je LoopUp ; Yes, then Jump to loopend
222 cmp ax,dx ; Is lower pixel transparent
223 je DrawOT ; Yes So We got OT (OPAQUE/Transparent
224 mov bx,ax ; No so copy the lower pixel
225 ror eax,16 ;
226 cmp ax,dx ; Is higher pixel transparent
227 je CopyBack ;
228DrawOT:
229 ror ebx,16
230 mov bx,ax
231 ror ebx,16
232CopyBack:
233 mov dword ptr[edi], ebx ; copy back the result in ebx
234LoopUp:
235 mov ebx, dword ptr [ebp+20] ; V load this this in case we are done
236 add edi , 4 ; U
237OnePixel:
238 dec ecx
239 jnz blitloop16
240 test ebx, 1 ; Do we have an odd linesize
241 jz endofblit16
242 mov eax, dword ptr [esi]
243 mov ebx, dword ptr [edi]
244 ror eax,16
245 ror ebx,16
246 cmp ax, dx
247 je endofblit16 ; last pixel is transparent
248 mov bx,ax ; No so copy the lower pixel
249 ror ebx,16
250 mov dword ptr[edi], ebx ; copy back the result in ebx
251
252endofblit16:
253 pop edx
254 pop ecx
255 pop ebx
256 pop eax
257 pop esi
258 pop edi
259 pop ebp
260 ret
261_BlitColorKey16 ENDP
262
263
264 PUBLIC _BlitColorKey8MMX
265; Now the same as BlitColorKey8 now with MMX
266;void BlitColorKey8MMX(char *dest, char *src, ULONG key, ULONG linesize)
267
268_BlitColorKey8MMX PROC NEAR
269 push ebp
270 mov ebp, esp
271 push edi
272 push esi
273 push eax
274 push ebx
275 push ecx
276 push edx
277
278 mov edx, [ebp+16] ;colorkey
279 mov edi, [ebp+8] ;dest
280 mov dh,dl
281 mov esi, [ebp+12] ;src
282 mov ax,dx
283 mov ecx, dword ptr [ebp+20] ;linesize
284 shr edx,16
285 mov dx,ax
286 movd mm4,edx
287 movd mm5,edx
288 psllq mm4,32
289 por mm4,mm5
290 shr ecx,3
291 jz BltRemain8
292
293bltLoopMMX8:
294 movq mm0, [esi] ; get source qword
295 movq mm1, [edi] ; get dest qword
296 movq mm2,mm0 ; copy source
297 PCMPEQB mm0,mm4 ; create mask
298 pand mm1,mm0 ; mask dest
299 pandn mm2,mm0 ; mask source
300 por mm1,mm2 ; or them
301 movq qword ptr [edi], mm1 ; write back result
302 add esi, 8
303 add edi, 8
304 dec ecx
305 jnz bltLoopMMX8
306BltRemain8:
307 mov eax, dword ptr [ebp+20];
308 and eax, 7
309 jmp ds:JmpTable[eax*4]
310
311align 4
312
313JmpTable:
314 dd offset cs:bltEndMMX8
315 dd offset cs:blt1MMX8
316 dd offset cs:blt2MMX8
317 dd offset cs:blt3MMX8
318 dd offset cs:blt4MMX8
319 dd offset cs:blt5MMX8
320 dd offset cs:blt6MMX8
321 dd offset cs:blt7MMX8
322align 2
323;
324; Maybe it would be faster for 7-5 to load a qword into mm0/mm1
325; but we might cross a page and so I guess this is saver
326;
327blt7MMX8:
328 movd mm0, dword ptr[esi]
329 mov ax, word ptr[esi+4]
330 mov bx, word ptr[edi+4]
331 movd mm1, dword ptr[edi]
332 psllq mm0,32
333 shl eax,8
334 shl ebx,8
335 mov al, byte ptr[esi+6]
336 mov bl, byte ptr[edi+6]
337 movd mm0,eax
338 psllq mm1,32
339 movd mm1,ebx
340 movq mm2,mm0 ; copy source
341 PCMPEQB mm0,mm4 ; create mask
342 pand mm1,mm0 ; mask dest
343 pandn mm2,mm0 ; mask source
344 por mm1,mm2 ; or them
345 movd eax, mm1
346 psrlq mm1,32
347 mov byte ptr[edi+6], al
348 movd dword ptr[edi], mm1
349 shr eax,8
350 mov word ptr[edi+4],ax
351 jmp bltEndMMX8
352
353blt6MMX8:
354 movd mm0, dword ptr[esi]
355 mov ax, word ptr[esi+4]
356 mov bx, word ptr[edi+4]
357 movd mm1, dword ptr[edi]
358 psllq mm0,32
359 psllq mm1,32
360 movd mm0,eax
361 movd mm1,ebx
362 movq mm2,mm0 ; copy source
363 pcmpeqb mm0,mm4 ; create mask
364 pand mm1,mm0 ; mask dest
365 pandn mm2,mm0 ; mask source
366 por mm1,mm2 ; or them
367 movd eax, mm1
368 psrlq mm1,32
369 mov word ptr[edi+4],ax
370 movd dword ptr[edi], mm1
371 jmp bltEndMMX8
372
373blt5MMX8:
374 movd mm0, dword ptr[esi]
375 movd mm1, dword ptr[edi]
376 movq mm2,mm0 ; copy source
377 pcmpeqb mm0,mm4 ; create mask
378 pand mm1,mm0 ; mask dest
379 add esi, 4;
380 pandn mm2,mm0 ; mask source
381 por mm1,mm2 ; or them
382 movd dword ptr[edi], mm1
383 add edi,4
384 jmp blt1MMX8
385
386blt4MMX8:
387 movd mm0, dword ptr[esi]
388 movd mm1, dword ptr[edi]
389 movq mm2,mm0 ; copy source
390 pcmpeqb mm0,mm4 ; create mask
391 pand mm1,mm0 ; mask dest
392 pandn mm2,mm0 ; mask source
393 por mm1,mm2 ; or them
394 movd dword ptr [edi], mm1 ; write back result
395 jmp bltEndMMX8
396;
397; loading a dword into mm0/mm1 might be faster for 3-2...
398;
399blt3MMX8:
400 mov ax , word ptr [esi]
401 mov bx , word ptr [edi]
402 shl eax,8 ; 3 Pixel left to blit
403 shl ebx,8 ; so shift the buffers
404 mov al,byte ptr[esi+2]
405 mov bl,byte ptr[edi+2]
406 movd mm0,eax
407 movd mm1,ebx
408 movq mm2,mm0
409 pcmpeqb mm0,mm4 ; create mask
410 pand mm1,mm0 ; mask dest
411 pandn mm2,mm0 ; mask source
412 por mm1,mm2 ; or them
413 movd eax, mm1 ; write back result
414 mov byte ptr[edi+2],al
415 shr eax,8
416 mov word ptr[edi],ax
417 jmp bltEndMMX8
418
419blt2MMX8:
420 mov al, byte ptr [esi]
421 cmp al,dl
422 je blt1aMMX8
423 mov byte ptr [edi], al
424 mov bl , byte ptr [esi+1]
425 cmp bl,dl
426 je bltEndMMX8
427 mov byte ptr [edi+1], bl
428 jmp bltEndMMX8
429blt1aMMX8:
430 add esi,1
431 add edi,1
432blt1MMX8:
433 mov al, byte ptr [esi]
434 cmp al,dl
435 je bltEndMMX8
436 mov byte ptr [edi], al
437
438bltEndMMX8:
439 pop edx
440 pop ecx
441 pop ebx
442 pop eax
443 pop esi
444 pop edi
445 pop ebp
446 ret
447_BlitColorKey8MMX ENDP
448
449
450 PUBLIC _BlitColorKey16MMX
451; Now the same as BlitColorKey16 now with MMX
452;void BlitColorKey16MMX(char *dest, char *src, ULONG key, ULONG linesize)
453_BlitColorKey16MMX PROC NEAR
454 push ebp
455 mov ebp, esp
456 push edi
457 push esi
458 push ecx
459 push edx
460
461 mov edx, dword ptr [ebp+16] ; colorkey
462 mov edi, dword ptr [ebp+8] ; dest
463 mov eax, dword ptr [ebp+16] ; colorkey
464 shr edx,16;
465 mov ecx, dword ptr [ebp+20] ; linesize in pixel!
466 mov dx,ax ; extend colorkey to 32 Bit
467 mov esi, dword ptr [ebp+12] ; src
468 mov eax, ecx ; copy of linesize
469 shr ecx,2
470 movd mm4, edx
471 jz BltRemain16
472
473 movd mm5,edx ; Extend colorkey to 64 Bit
474 psllq mm4,32
475 por mm4,mm5
476
477bltLoopMMX16:
478 movq mm0,qword ptr [esi] ; get source dword
479 movq mm1,qword ptr [edi] ; get destination
480 movq mm2,mm0 ; copy source
481 pcmpeqw mm0,mm4 ; create mask
482 pand mm1,mm0 ; mask dest
483 add esi, 8
484 pandn mm2,mm0 ; mask source
485 por mm1,mm2 ; or them
486 movq qword ptr [edi], mm1 ; write back result
487 add edi, 8
488 dec ecx
489 jnz bltLoopMMX16
490
491BltRemain16:
492 and eax,3
493 jmp ds:JumpTable[eax*4]
494
495align 4
496
497JumpTable:
498 dd offset cs:bltEndMMX16
499 dd offset cs:blt1MMX16
500 dd offset cs:blt2MMX16
501 dd offset cs:blt3MMX16
502align 2
503
504blt3MMX16:
505 movd mm0, dword ptr[esi]
506 movd mm1, dword ptr[edi]
507 movq mm2,mm0 ; copy source
508 add esi,4
509 pcmpeqw mm0,mm4 ; create mask 16 bit
510 pand mm1,mm0 ; mask dest
511 pandn mm2,mm0 ; mask source
512 add edi,4
513 por mm1,mm2 ; or them
514 movd dword ptr[edi-4], mm1
515 jmp blt1MMX16
516
517blt2MMX16:
518 movd mm0, dword ptr[esi]
519 movd mm1, dword ptr[edi]
520 movq mm2,mm0 ; copy source
521 pcmpeqw mm0,mm4 ; create mask 16 bit
522 pand mm1,mm0 ; mask dest
523 pandn mm2,mm0 ; mask source
524 por mm1,mm2 ; or them
525 movd dword ptr [edi], mm1 ; write back result
526 jmp bltEndMMX16
527
528blt1MMX16:
529 mov ax, word ptr [esi] ; cmov ?
530 cmp ax,dx
531 je bltEndMMX16
532 mov word ptr [edi], ax
533
534
535bltEndMMX16:
536 pop edx
537 pop ecx
538 pop esi
539 pop edi
540 pop ebp
541 ret
542_BlitColorKey16MMX ENDP
543
544;
545; extern void __cdecl BltTransSrcRecMMX(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
546; ULONG ulDestPitch, ULONG ulSrcPitch, ULONG ulTransCol);
547
548 PUBLIC _BltTransSrcRecMMX
549_BltTransSrcRecMMX PROC NEAR
550 push ebp
551 mov ebp, esp
552 push edi
553 push esi
554 push eax
555 push ebx
556 push ecx
557 push edx
558
559EndTSBlt:
560 pop edx
561 pop ecx
562 pop ebx
563 pop eax
564 pop esi
565 pop edi
566 pop ebp
567 ret
568
569_BltTransSrcRecMMX ENDP
570
571
572 PUBLIC _BltRec
573;
574; extern void __cdecl BltRec(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
575; ULONG ulDestPitch, ULONG ulSrcPitch);
576_BltRec PROC NEAR
577 push ebp
578 mov ebp, esp
579 push edi
580 push esi
581 push eax
582 push ebx
583 push ecx
584 push edx
585
586 mov ecx, dword ptr [ebp+16] ; U ulBltWidth
587 mov esi, dword ptr [ebp+12] ; V src
588 mov ebx, ecx ; U
589 mov edx, dword ptr [ebp+20] ; V ulBltHeight
590 and ebx, 0Fh ; U ebx = # of bytes < 16
591 mov edi, dword ptr [ebp+8] ; V dest
592 cmp edx, 0
593 jz BltRecEnd ; height is zero so done
594 shr ecx, 4 ; U
595 jz SmallBlt ; Small (width < 16) rectangle done in special case
596 test ebx, ebx
597 jnz ComplexBlt ; ulBltWidth mod 16 is not 0
598
599;
600; Blitwidth is an multiple of 16
601;
602 mov ebx, dword ptr [ebp+24] ; ulDestPitch
603 mov eax, dword ptr [ebp+28] ; ulSrcPitch
604 sub ebx, dword ptr [ebp+16] ; adjust both widths
605 sub eax, dword ptr [ebp+16]
606 mov dword ptr [ebp+28], eax ; store adjusted SrcPitch
607 mov eax, ecx
608LineLoop:
609 FLD QWORD PTR [ESI]
610 FLD QWORD PTR [ESI+8]
611 FXCH
612 FSTP QWORD PTR [EDI]
613 FSTP QWORD PTR [EDI+8]
614 ADD ESI,16
615 ADD EDI,16
616 dec eax
617 jz LineLoop
618 dec edx
619 jz BltRecEnd
620 add ESI, dword ptr[ebp+28]
621 add EDI, ebx
622 mov eax, ecx
623 jmp LineLoop
624
625SmallBlt:
626 mov eax, dword ptr [ebp+28] ; ulSrcPitch
627 mov ecx, dword ptr [ebp+24] ; ulDestPitch
628 jmp ds:SmallJmpTable[ebx*4]
629SmallJmpTable:
630 dd cs:offset BltRecEnd ; BlitWidth is 0 done
631 dd cs:offset Rec1
632 dd cs:offset Rec2
633 dd cs:offset Rec3
634 dd cs:offset Rec4
635 dd cs:offset Rec5
636 dd cs:offset Rec6
637 dd cs:offset Rec7
638 dd cs:offset Rec8
639 dd cs:offset Rec9
640 dd cs:offset Rec10
641 dd cs:offset Rec11
642 dd cs:offset Rec12
643 dd cs:offset Rec13
644 dd cs:offset Rec14
645 dd cs:offset Rec15
646
647;One Pixel wide
648
649Rec1:
650 cmp edx,4
651 jb Rec1_0123
652 mov bl, byte ptr [esi]
653 add esi,eax
654 mov byte ptr [edi], bl
655 add edi,ecx
656 mov bl, byte ptr [esi]
657 add esi,eax
658 mov byte ptr [edi], bl
659 add edi,ecx
660 mov bl, byte ptr [esi]
661 add esi,eax
662 mov byte ptr [edi], bl
663 add edi,ecx
664 mov bl, byte ptr [esi]
665 add esi,eax
666 mov byte ptr [edi], bl
667 add edi,ecx
668 sub edx,4
669 jnz Rec1
670 jmp BltRecEnd
671Rec1_0123:
672 cmp edx,2
673 jz Rec1_2
674 jb Rec1_01
675; Must be 3 lines left
676 mov bl, byte ptr [esi]
677 add esi,eax
678 mov byte ptr [edi], bl
679 add edi,ecx
680 mov bl, byte ptr [esi]
681 add esi,eax
682 mov byte ptr [edi], bl
683 add edi,ecx
684 mov bl, byte ptr [esi]
685 mov byte ptr [edi], bl
686 jmp BltRecEnd
687Rec1_2:
688 mov bl, byte ptr [esi]
689 add esi,eax
690 mov byte ptr [edi], bl
691 add edi,ecx
692 mov bl, byte ptr [esi]
693 mov byte ptr [edi], bl
694 jmp BltRecEnd
695Rec1_01:
696 test edx,edx
697 jz BltRecEnd
698 mov bl, byte ptr [esi]
699 mov byte ptr [edi], bl
700 jmp BltRecEnd
701
702;2 Pixel Wide
703
704Rec2:
705 cmp edx,4
706 jb Rec2_0123
707 mov bx, word ptr [esi]
708 add esi,eax
709 mov word ptr [edi], bx
710 add edi,ecx
711 mov bx, word ptr [esi]
712 add esi,eax
713 mov word ptr [edi], bx
714 add edi,ecx
715 mov bx, word ptr [esi]
716 add esi,eax
717 mov word ptr [edi], bx
718 add edi,ecx
719 mov bx, word ptr [esi]
720 add esi,eax
721 mov word ptr [edi], bx
722 add edi,ecx
723 sub edx, 4
724 jnz Rec2
725 jmp BltRecEnd
726
727Rec2_0123:
728 cmp edx,2
729 jz Rec2_2
730 jb Rec2_01
731;3 lines left
732 mov bx, word ptr [esi]
733 add esi,eax
734 mov word ptr [edi], bx
735 add edi,ecx
736 mov bx, word ptr [esi]
737 add esi,eax
738 mov word ptr [edi], bx
739 add edi,ecx
740 mov bx, word ptr [esi]
741 mov word ptr [edi], bx
742 jmp BltRecEnd
743Rec2_2:
744 mov bx, word ptr [esi]
745 add esi,eax
746 mov word ptr [edi], bx
747 add edi,ecx
748 mov bx, word ptr [esi]
749 mov word ptr [edi], bx
750 jmp BltRecEnd
751Rec2_01:
752 test edx,edx
753 jz BltRecEnd
754 mov bx, word ptr [esi]
755 mov word ptr [edi], bx
756 jmp BltRecEnd
757
758; 3 Pixel Wide must check if it's better to read 4 bytes as
759; Intel might stall on reading 2 and 1 byte, but this takes more care as we
760; could create a pagefault on the last 3 pixel
761
762Rec3:
763 cmp edx,4
764 jb Rec3_0123
765 push edx
766 mov bx, word ptr [esi]
767 mov dl, byte ptr [esi+2]
768 add esi,eax
769 mov word ptr [edi], bx
770 mov byte ptr [edi+2], dl
771 add edi,ecx
772 mov bx, word ptr [esi]
773 mov dl, byte ptr [esi+2]
774 add esi,eax
775 mov word ptr [edi], bx
776 mov byte ptr [edi+2], dl
777 add edi,ecx
778 mov bx, word ptr [esi]
779 mov dl, byte ptr [esi+2]
780 add esi,eax
781 mov word ptr [edi], bx
782 mov byte ptr [edi+2], dl
783 add edi,ecx
784 mov bx, word ptr [esi]
785 mov dl, byte ptr [esi+2]
786 add esi,eax
787 mov word ptr [edi], bx
788 mov byte ptr [edi+2], dl
789 add edi,ecx
790 pop edx
791 sub edx,4
792 jnz Rec3
793 jmp BltRecEnd
794
795Rec3_0123:
796 cmp edx,2
797 jz Rec3_2
798 jb Rec3_01
799; Must be 3 lines left
800 mov bx, word ptr [esi]
801 mov dl, byte ptr [esi+2]
802 add esi,eax
803 mov word ptr [edi], bx
804 mov byte ptr [edi+2], dl
805 add edi,ecx
806 mov bx, word ptr [esi]
807 mov dl, byte ptr [esi+2]
808 add esi,eax
809 mov word ptr [edi], bx
810 mov byte ptr [edi+2], dl
811 add edi,ecx
812 mov bx, word ptr [esi]
813 mov dl, byte ptr [esi+2]
814 mov word ptr [edi], bx
815 mov byte ptr [edi+2], dl
816 jmp BltRecEnd
817Rec3_2:
818 mov bx, word ptr [esi]
819 mov dl, byte ptr [esi+2]
820 add esi,eax
821 mov word ptr [edi], bx
822 mov byte ptr [edi+2], dl
823 add edi,ecx
824 mov bx, word ptr [esi]
825 mov dl, byte ptr [esi+2]
826 mov word ptr [edi], bx
827 mov byte ptr [edi+2], dl
828 jmp BltRecEnd
829Rec3_01:
830 test edx,edx
831 jz BltRecEnd
832 mov bx, word ptr [esi]
833 mov dl, byte ptr [esi+2]
834 mov word ptr [edi], bx
835 mov byte ptr [edi+2], dl
836 jmp BltRecEnd
837
838; 4 Pixel Wide
839
840Rec4:
841 cmp edx,4
842 jb Rec4_0123
843 mov ebx, dword ptr [esi]
844 add esi,eax
845 mov dword ptr [edi], ebx
846 add edi,ecx
847 mov ebx, dword ptr [esi]
848 add esi,eax
849 mov dword ptr [edi], ebx
850 add edi,ecx
851 mov ebx, dword ptr [esi]
852 add esi,eax
853 mov dword ptr [edi], ebx
854 add edi,ecx
855 mov ebx, dword ptr [esi]
856 add esi,eax
857 mov dword ptr [edi], ebx
858 add edi,ecx
859 sub edx ,4
860 jnz Rec4
861 jmp BltRecEnd
862
863Rec4_0123:
864 cmp edx,2
865 jz Rec2_2
866 jb Rec2_01
867;3 lines left
868 mov ebx, dword ptr [esi]
869 add esi,eax
870 mov dword ptr [edi], ebx
871 add edi,ecx
872 mov ebx, dword ptr [esi]
873 add esi,eax
874 mov dword ptr [edi], ebx
875 add edi,ecx
876 mov ebx, dword ptr [esi]
877 mov dword ptr [edi], ebx
878 jmp BltRecEnd
879Rec4_2:
880 mov ebx, dword ptr [esi]
881 add esi,eax
882 mov dword ptr [edi], ebx
883 add edi,ecx
884 mov ebx, dword ptr [esi]
885 mov dword ptr [edi], ebx
886 jmp BltRecEnd
887Rec4_01:
888 test edx,edx
889 jz BltRecEnd
890 mov ebx, dword ptr [esi]
891 mov dword ptr [edi], ebx
892 jmp BltRecEnd
893
894; 5 Pixel Wide
895
896Rec5:
897 cmp edx,4
898 jb Rec5_0123
899 push edx
900 mov ebx, dword ptr [esi]
901 mov dl, byte ptr [esi+4]
902 add esi,eax
903 mov dword ptr [edi], ebx
904 mov byte ptr [edi+4], dl
905 add edi,ecx
906 mov ebx, dword ptr [esi]
907 mov dl, byte ptr [esi+4]
908 add esi,eax
909 mov dword ptr [edi], ebx
910 mov byte ptr [edi+4], dl
911 add edi,ecx
912 mov ebx, dword ptr [esi]
913 mov dl, byte ptr [esi+4]
914 add esi,eax
915 mov dword ptr [edi], ebx
916 mov byte ptr [edi+4], dl
917 add edi,ecx
918 mov ebx, dword ptr [esi]
919 mov dl, byte ptr [esi+4]
920 add esi,eax
921 mov dword ptr [edi], ebx
922 mov byte ptr [edi+4], dl
923 add edi,ecx
924 pop edx
925 sub edx ,4
926 jnz Rec5
927 jmp BltRecEnd
928Rec5_0123:
929 cmp edx,2
930 jz Rec5_2
931 jb Rec5_01
932; Must be 3 lines left
933 mov ebx, dword ptr [esi]
934 mov dl, byte ptr [esi+4]
935 add esi,eax
936 mov dword ptr [edi], ebx
937 mov byte ptr [edi+4], dl
938 add edi,ecx
939 mov ebx, dword ptr [esi]
940 mov dl, byte ptr [esi+4]
941 add esi,eax
942 mov dword ptr [edi], ebx
943 mov byte ptr [edi+4], dl
944 add edi,ecx
945 mov ebx, dword ptr [esi]
946 mov dl, byte ptr [esi+4]
947 mov dword ptr [edi], ebx
948 mov byte ptr [edi+4], dl
949 jmp BltRecEnd
950Rec5_2:
951 mov ebx, dword ptr [esi]
952 mov dl, byte ptr [esi+4]
953 add esi,eax
954 mov dword ptr [edi], ebx
955 mov byte ptr [edi+4], dl
956 add edi,ecx
957 mov ebx, dword ptr [esi]
958 mov dl, byte ptr [esi+4]
959 mov dword ptr [edi], ebx
960 mov byte ptr [edi+4], dl
961 jmp BltRecEnd
962Rec5_01:
963 test edx,edx
964 jz BltRecEnd
965 mov ebx, dword ptr [esi]
966 mov dl, byte ptr [esi+4]
967 mov dword ptr [edi], ebx
968 mov byte ptr [edi+4], dl
969 jmp BltRecEnd
970
971; 6 Pixel Wide
972
973Rec6:
974 cmp edx,4
975 jb Rec6_0123
976 push edx
977 mov ebx, dword ptr [esi]
978 mov dx, word ptr [esi+4]
979 add esi,eax
980 mov dword ptr [edi], ebx
981 mov word ptr [edi+4], dx
982 add edi,ecx
983 mov ebx, dword ptr [esi]
984 mov dx, word ptr [esi+4]
985 add esi,eax
986 mov dword ptr [edi], ebx
987 mov word ptr [edi+4], dx
988 add edi,ecx
989 mov ebx, dword ptr [esi]
990 mov dx, word ptr [esi+4]
991 add esi,eax
992 mov dword ptr [edi], ebx
993 mov word ptr [edi+4], dx
994 add edi,ecx
995 mov ebx, dword ptr [esi]
996 mov dx, word ptr [esi+4]
997 add esi,eax
998 mov dword ptr [edi], ebx
999 mov word ptr [edi+4], dx
1000 add edi,ecx
1001 pop edx
1002 sub edx ,4
1003 jnz Rec6
1004 jmp BltRecEnd
1005Rec6_0123:
1006 cmp edx,2
1007 jz Rec6_2
1008 jb Rec6_01
1009; Must be 3 lines left
1010 mov ebx, dword ptr [esi]
1011 mov dx, word ptr [esi+4]
1012 add esi,eax
1013 mov dword ptr [edi], ebx
1014 mov word ptr [edi+4], dx
1015 add edi,ecx
1016 mov ebx, dword ptr [esi]
1017 mov dx, word ptr [esi+4]
1018 add esi,eax
1019 mov dword ptr [edi], ebx
1020 mov word ptr [edi+4], dx
1021 add edi,ecx
1022 mov ebx, dword ptr [esi]
1023 mov dx, word ptr [esi+4]
1024 mov dword ptr [edi], ebx
1025 mov word ptr [edi+4], dx
1026 jmp BltRecEnd
1027Rec6_2:
1028 mov ebx, dword ptr [esi]
1029 mov dx, word ptr [esi+4]
1030 add esi,eax
1031 mov dword ptr [edi], ebx
1032 mov word ptr [edi+4], dx
1033 add edi,ecx
1034 mov ebx, dword ptr [esi]
1035 mov dx, word ptr [esi+4]
1036 mov dword ptr [edi], ebx
1037 mov word ptr [edi+4], dx
1038 jmp BltRecEnd
1039Rec6_01:
1040 test edx,edx
1041 jz BltRecEnd
1042 mov ebx, dword ptr [esi]
1043 mov dx, word ptr [esi+4]
1044 mov dword ptr [edi], ebx
1045 mov word ptr [edi+4], dx
1046 jmp BltRecEnd
1047
1048; 7 Pixel Wide
1049
1050Rec7:
1051 cmp edx,4
1052 jb Rec6_0123
1053 push edx
1054 mov ebx, dword ptr [esi]
1055 mov dx, word ptr [esi+4]
1056 mov dword ptr [edi], ebx
1057 mov word ptr [edi+4], dx
1058 mov bl, byte ptr[esi+6]
1059 add esi,eax
1060 mov byte ptr[edi+6],bl
1061 add edi,ecx
1062 xor ebx,ebx ; clear ebx to avoid stalls
1063 mov ebx, dword ptr [esi]
1064 mov dx, word ptr [esi+4]
1065 mov dword ptr [edi], ebx
1066 mov word ptr [edi+4], dx
1067 mov bl, byte ptr[esi+6]
1068 add esi,eax
1069 mov byte ptr[edi+6],bl
1070 add edi,ecx
1071 xor ebx,ebx ; clear ebx to avoid stalls
1072 mov ebx, dword ptr [esi]
1073 mov dx, word ptr [esi+4]
1074 mov dword ptr [edi], ebx
1075 mov word ptr [edi+4], dx
1076 mov bl, byte ptr[esi+6]
1077 add esi,eax
1078 mov byte ptr[edi+6],bl
1079 add edi,ecx
1080 xor ebx,ebx ; clear ebx to avoid stalls
1081 mov ebx, dword ptr [esi]
1082 mov dx, word ptr [esi+4]
1083 mov dword ptr [edi], ebx
1084 mov word ptr [edi+4], dx
1085 mov bl, byte ptr[esi+6]
1086 add esi,eax
1087 mov byte ptr[edi+6],bl
1088 add edi,ecx
1089 xor ebx,ebx ; clear ebx to avoid stalls
1090 pop edx
1091 sub edx ,4
1092 jnz Rec7
1093 jmp BltRecEnd
1094Rec7_0123:
1095 cmp edx,2
1096 jz Rec7_2
1097 jb Rec7_01
1098; Must be 3 lines left
1099 mov ebx, dword ptr [esi]
1100 mov dx, word ptr [esi+4]
1101 mov dword ptr [edi], ebx
1102 mov word ptr [edi+4], dx
1103 mov bl, byte ptr[esi+6]
1104 add esi,eax
1105 mov byte ptr[edi+6],bl
1106 add edi,ecx
1107 xor ebx,ebx ; clear ebx to avoid stalls
1108 mov ebx, dword ptr [esi]
1109 mov dx, word ptr [esi+4]
1110 mov dword ptr [edi], ebx
1111 mov word ptr [edi+4], dx
1112 mov bl, byte ptr[esi+6]
1113 add esi,eax
1114 mov byte ptr[edi+6],bl
1115 add edi,ecx
1116 xor ebx,ebx ; clear ebx to avoid stalls
1117 mov ebx, dword ptr [esi]
1118 mov dx, word ptr [esi+4]
1119 mov dword ptr [edi], ebx
1120 mov word ptr [edi+4], dx
1121 mov bl, byte ptr[esi+6]
1122 mov byte ptr[edi+6],bl
1123 jmp BltRecEnd
1124Rec7_2:
1125 mov ebx, dword ptr [esi]
1126 mov dx, word ptr [esi+4]
1127 mov dword ptr [edi], ebx
1128 mov word ptr [edi+4], dx
1129 mov bl, byte ptr[esi+6]
1130 add esi,eax
1131 mov byte ptr[edi+6],bl
1132 add edi,ecx
1133 xor ebx,ebx ; clear ebx to avoid stalls
1134 mov ebx, dword ptr [esi]
1135 mov dx, word ptr [esi+4]
1136 mov dword ptr [edi], ebx
1137 mov word ptr [edi+4], dx
1138 mov bl, byte ptr[esi+6]
1139 mov byte ptr[edi+6],bl
1140 jmp BltRecEnd
1141Rec7_01:
1142 test edx,edx
1143 jz BltRecEnd
1144 mov ebx, dword ptr [esi]
1145 mov dx, word ptr [esi+4]
1146 mov dword ptr [edi], ebx
1147 mov word ptr [edi+4], dx
1148 mov bl, byte ptr[esi+6]
1149 mov byte ptr[edi+6],bl
1150 jmp BltRecEnd
1151
1152; 8 Pixel Wide
1153
1154Rec8:
1155 cmp edx,4
1156 jb Rec8_0123
1157 push edx
1158 mov ebx, dword ptr [esi]
1159 mov edx, dword ptr [esi+4]
1160 mov dword ptr [edi], ebx
1161 mov dword ptr [edi+4], edx
1162 add esi,eax
1163 add edi,ecx
1164 mov ebx, dword ptr [esi]
1165 mov edx, dword ptr [esi+4]
1166 mov dword ptr [edi], ebx
1167 mov dword ptr [edi+4], edx
1168 add esi,eax
1169 add edi,ecx
1170 mov ebx, dword ptr [esi]
1171 mov edx, dword ptr [esi+4]
1172 mov dword ptr [edi], ebx
1173 mov dword ptr [edi+4], edx
1174 add esi,eax
1175 add edi,ecx
1176 mov ebx, dword ptr [esi]
1177 mov edx, dword ptr [esi+4]
1178 mov dword ptr [edi], ebx
1179 mov dword ptr [edi+4], edx
1180 add esi,eax
1181 add edi,ecx
1182 pop edx
1183 sub edx ,4
1184 jnz Rec8
1185 jmp BltRecEnd
1186
1187Rec8_0123:
1188 cmp edx,2
1189 jz Rec8_2
1190 jb Rec8_01
1191;3 lines left
1192 mov ebx, dword ptr [esi]
1193 mov edx, dword ptr [esi+4]
1194 mov dword ptr [edi], ebx
1195 mov dword ptr [edi+4], edx
1196 add esi,eax
1197 add edi,ecx
1198 mov ebx, dword ptr [esi]
1199 mov edx, dword ptr [esi+4]
1200 mov dword ptr [edi], ebx
1201 mov dword ptr [edi+4], edx
1202 add esi,eax
1203 add edi,ecx
1204 mov ebx, dword ptr [esi]
1205 mov edx, dword ptr [esi+4]
1206 mov dword ptr [edi], ebx
1207 mov dword ptr [edi+4], edx
1208 jmp BltRecEnd
1209Rec8_2:
1210 mov ebx, dword ptr [esi]
1211 mov edx, dword ptr [esi+4]
1212 mov dword ptr [edi], ebx
1213 mov dword ptr [edi+4], edx
1214 add esi,eax
1215 add edi,ecx
1216 mov ebx, dword ptr [esi]
1217 mov edx, dword ptr [esi+4]
1218 mov dword ptr [edi], ebx
1219 mov dword ptr [edi+4], edx
1220 jmp BltRecEnd
1221Rec8_01:
1222 test edx,edx
1223 jz BltRecEnd
1224 mov ebx, dword ptr [esi]
1225 mov edx, dword ptr [esi+4]
1226 mov dword ptr [edi], ebx
1227 mov dword ptr [edi+4], edx
1228 jmp BltRecEnd
1229
1230; 9 Pixel Wide
1231
1232Rec9:
1233 cmp edx,4
1234 jb Rec9_0123
1235 push edx
1236 FLD QWORD PTR [ESI]
1237 mov bl, byte ptr [esi+8]
1238 FSTP QWORD PTR [EDI]
1239 mov byte ptr [edi+8], bl
1240 add esi,eax
1241 add edi,ecx
1242 FLD QWORD PTR [ESI]
1243 mov bl, byte ptr [esi+8]
1244 FSTP QWORD PTR [EDI]
1245 mov byte ptr [edi+8], bl
1246 add esi,eax
1247 add edi,ecx
1248 FLD QWORD PTR [ESI]
1249 mov bl, byte ptr [esi+8]
1250 FSTP QWORD PTR [EDI]
1251 mov byte ptr [edi+8], bl
1252 add esi,eax
1253 add edi,ecx
1254 FLD QWORD PTR [ESI]
1255 mov bl, byte ptr [esi+8]
1256 FSTP QWORD PTR [EDI]
1257 mov byte ptr [edi+8], bl
1258 add esi,eax
1259 add edi,ecx
1260 pop edx
1261 sub edx ,4
1262 jnz Rec9
1263 jmp BltRecEnd
1264
1265Rec9_0123:
1266 cmp edx,2
1267 jz Rec9_2
1268 jb Rec9_01
1269;3 lines left
1270 FLD QWORD PTR [ESI]
1271 mov bl, byte ptr [esi+8]
1272 FSTP QWORD PTR [EDI]
1273 mov byte ptr [edi+8], bl
1274 add esi,eax
1275 add edi,ecx
1276 FLD QWORD PTR [ESI]
1277 mov bl, byte ptr [esi+8]
1278 FSTP QWORD PTR [EDI]
1279 mov byte ptr [edi+8], bl
1280 add esi,eax
1281 add edi,ecx
1282 FLD QWORD PTR [ESI]
1283 mov bl, byte ptr [esi+8]
1284 FSTP QWORD PTR [EDI]
1285 mov byte ptr [edi+8], bl
1286 jmp BltRecEnd
1287Rec9_2:
1288 FLD QWORD PTR [ESI]
1289 mov bl, byte ptr [esi+8]
1290 FSTP QWORD PTR [EDI]
1291 mov byte ptr [edi+8], bl
1292 add esi,eax
1293 add edi,ecx
1294 FLD QWORD PTR [ESI]
1295 mov bl, byte ptr [esi+8]
1296 FSTP QWORD PTR [EDI]
1297 mov byte ptr [edi+8], bl
1298 jmp BltRecEnd
1299Rec9_01:
1300 test edx,edx
1301 jz BltRecEnd
1302 FLD QWORD PTR [ESI]
1303 mov bl, byte ptr [esi+8]
1304 FSTP QWORD PTR [EDI]
1305 mov byte ptr [edi+8], bl
1306 jmp BltRecEnd
1307
1308; 10 Pixel Wide
1309
1310Rec10:
1311 cmp edx,4
1312 jb Rec10_0123
1313 FLD QWORD PTR [ESI]
1314 mov bx, word ptr [esi+8]
1315 FSTP QWORD PTR [EDI]
1316 mov word ptr [edi+8], bx
1317 add esi,eax
1318 add edi,ecx
1319 FLD QWORD PTR [ESI]
1320 mov bx, word ptr [esi+8]
1321 FSTP QWORD PTR [EDI]
1322 mov word ptr [edi+8], bx
1323 add esi,eax
1324 add edi,ecx
1325 FLD QWORD PTR [ESI]
1326 mov bx, word ptr [esi+8]
1327 FSTP QWORD PTR [EDI]
1328 mov word ptr [edi+8], bx
1329 add esi,eax
1330 add edi,ecx
1331 FLD QWORD PTR [ESI]
1332 mov bx, word ptr [esi+8]
1333 FSTP QWORD PTR [EDI]
1334 mov word ptr [edi+8], bx
1335 add esi,eax
1336 add edi,ecx
1337 sub edx ,4
1338 jnz Rec10
1339 jmp BltRecEnd
1340
1341Rec10_0123:
1342 cmp edx,2
1343 jz Rec10_2
1344 jb Rec10_01
1345;3 lines left
1346 FLD QWORD PTR [ESI]
1347 mov bx, word ptr [esi+8]
1348 FSTP QWORD PTR [EDI]
1349 mov word ptr [edi+8], bx
1350 add esi,eax
1351 add edi,ecx
1352 FLD QWORD PTR [ESI]
1353 mov bx, word ptr [esi+8]
1354 FSTP QWORD PTR [EDI]
1355 mov word ptr [edi+8], bx
1356 add esi,eax
1357 add edi,ecx
1358 FLD QWORD PTR [ESI]
1359 mov bx, word ptr [esi+8]
1360 FSTP QWORD PTR [EDI]
1361 mov word ptr [edi+8], bx
1362 jmp BltRecEnd
1363Rec10_2:
1364 FLD QWORD PTR [ESI]
1365 mov bx, word ptr [esi+8]
1366 FSTP QWORD PTR [EDI]
1367 mov word ptr [edi+8], bx
1368 add esi,eax
1369 add edi,ecx
1370 FLD QWORD PTR [ESI]
1371 mov bx, word ptr [esi+8]
1372 FSTP QWORD PTR [EDI]
1373 mov word ptr [edi+8], bx
1374 jmp BltRecEnd
1375Rec10_01:
1376 test edx,edx
1377 jz BltRecEnd
1378 FLD QWORD PTR [ESI]
1379 mov bx, word ptr [esi+8]
1380 FSTP QWORD PTR [EDI]
1381 mov word ptr [edi+8], bx
1382 jmp BltRecEnd
1383
1384; 11 Pixel Wide
1385
1386Rec11:
1387 cmp edx,4
1388 jb Rec11_0123
1389 push edx
1390 FLD QWORD PTR [ESI]
1391 mov bx, word ptr [esi+8]
1392 mov dl, byte ptr [esi+10]
1393 FSTP QWORD PTR [EDI]
1394 mov word ptr [edi+8], bx
1395 mov byte ptr [edi+10], dl
1396 add esi,eax
1397 add edi,ecx
1398 FLD QWORD PTR [ESI]
1399 mov bx, word ptr [esi+8]
1400 mov dl, byte ptr [esi+10]
1401 FSTP QWORD PTR [EDI]
1402 mov word ptr [edi+8], bx
1403 mov byte ptr [edi+10], dl
1404 add esi,eax
1405 add edi,ecx
1406 FLD QWORD PTR [ESI]
1407 mov bx, word ptr [esi+8]
1408 mov dl, byte ptr [esi+10]
1409 FSTP QWORD PTR [EDI]
1410 mov word ptr [edi+8], bx
1411 mov byte ptr [edi+10], dl
1412 add esi,eax
1413 add edi,ecx
1414 FLD QWORD PTR [ESI]
1415 mov bx, word ptr [esi+8]
1416 mov dl, byte ptr [esi+10]
1417 FSTP QWORD PTR [EDI]
1418 mov word ptr [edi+8], bx
1419 mov byte ptr [edi+10], dl
1420 add esi,eax
1421 add edi,ecx
1422 pop edx
1423 sub edx ,4
1424 jnz Rec10
1425 jmp BltRecEnd
1426
1427Rec11_0123:
1428 cmp edx,2
1429 jz Rec11_2
1430 jb Rec11_01
1431;3 lines left
1432 FLD QWORD PTR [ESI]
1433 mov bx, word ptr [esi+8]
1434 mov dl, byte ptr [esi+10]
1435 FSTP QWORD PTR [EDI]
1436 mov word ptr [edi+8], bx
1437 mov byte ptr [edi+10], dl
1438 add esi,eax
1439 add edi,ecx
1440 FLD QWORD PTR [ESI]
1441 mov bx, word ptr [esi+8]
1442 mov dl, byte ptr [esi+10]
1443 FSTP QWORD PTR [EDI]
1444 mov word ptr [edi+8], bx
1445 mov byte ptr [edi+10], dl
1446 add esi,eax
1447 add edi,ecx
1448 FLD QWORD PTR [ESI]
1449 mov bx, word ptr [esi+8]
1450 mov dl, byte ptr [esi+10]
1451 FSTP QWORD PTR [EDI]
1452 mov word ptr [edi+8], bx
1453 mov byte ptr [edi+10], dl
1454 jmp BltRecEnd
1455Rec11_2:
1456 FLD QWORD PTR [ESI]
1457 mov bx, word ptr [esi+8]
1458 mov dl, byte ptr [esi+10]
1459 FSTP QWORD PTR [EDI]
1460 mov word ptr [edi+8], bx
1461 mov byte ptr [edi+10], dl
1462 add esi,eax
1463 add edi,ecx
1464 FLD QWORD PTR [ESI]
1465 mov bx, word ptr [esi+8]
1466 mov dl, byte ptr [esi+10]
1467 FSTP QWORD PTR [EDI]
1468 mov word ptr [edi+8], bx
1469 mov byte ptr [edi+10], dl
1470 jmp BltRecEnd
1471Rec11_01:
1472 test edx,edx
1473 jz BltRecEnd
1474 FLD QWORD PTR [ESI]
1475 mov bx, word ptr [esi+8]
1476 mov dl, byte ptr [esi+10]
1477 FSTP QWORD PTR [EDI]
1478 mov word ptr [edi+8], bx
1479 mov byte ptr [edi+10], dl
1480 jmp BltRecEnd
1481
1482; 12 Pixel Wide
1483
1484Rec12:
1485 cmp edx,4
1486 jb Rec12_0123
1487 FLD QWORD PTR [ESI]
1488 mov ebx, dword ptr [esi+8]
1489 FSTP QWORD PTR [EDI]
1490 mov dword ptr [edi+8], ebx
1491 add esi,eax
1492 add edi,ecx
1493 FLD QWORD PTR [ESI]
1494 mov ebx, dword ptr [esi+8]
1495 FSTP QWORD PTR [EDI]
1496 mov dword ptr [edi+8], ebx
1497 add esi,eax
1498 add edi,ecx
1499 FLD QWORD PTR [ESI]
1500 mov ebx, dword ptr [esi+8]
1501 FSTP QWORD PTR [EDI]
1502 mov dword ptr [edi+8], ebx
1503 add esi,eax
1504 add edi,ecx
1505 FLD QWORD PTR [ESI]
1506 mov ebx, dword ptr [esi+8]
1507 FSTP QWORD PTR [EDI]
1508 mov dword ptr [edi+8], ebx
1509 add esi,eax
1510 add edi,ecx
1511 sub edx ,4
1512 jnz Rec12
1513 jmp BltRecEnd
1514
1515Rec12_0123:
1516 cmp edx,2
1517 jz Rec12_2
1518 jb Rec12_01
1519;3 lines left
1520 FLD QWORD PTR [ESI]
1521 mov ebx, dword ptr [esi+8]
1522 FSTP QWORD PTR [EDI]
1523 mov dword ptr [edi+8], ebx
1524 add esi,eax
1525 add edi,ecx
1526 FLD QWORD PTR [ESI]
1527 mov ebx, dword ptr [esi+8]
1528 FSTP QWORD PTR [EDI]
1529 mov dword ptr [edi+8], ebx
1530 add esi,eax
1531 add edi,ecx
1532 FLD QWORD PTR [ESI]
1533 mov ebx, dword ptr [esi+8]
1534 FSTP QWORD PTR [EDI]
1535 mov dword ptr [edi+8], ebx
1536 jmp BltRecEnd
1537Rec12_2:
1538 FLD QWORD PTR [ESI]
1539 mov ebx, dword ptr [esi+8]
1540 FSTP QWORD PTR [EDI]
1541 mov dword ptr [edi+8], ebx
1542 add esi,eax
1543 add edi,ecx
1544 FLD QWORD PTR [ESI]
1545 mov ebx, dword ptr [esi+8]
1546 FSTP QWORD PTR [EDI]
1547 mov dword ptr [edi+8], ebx
1548 jmp BltRecEnd
1549Rec12_01:
1550 test edx,edx
1551 jz BltRecEnd
1552 FLD QWORD PTR [ESI]
1553 mov ebx, dword ptr [esi+8]
1554 FSTP QWORD PTR [EDI]
1555 mov dword ptr [edi+8], ebx
1556 jmp BltRecEnd
1557
1558; 13 Pixel Wide
1559
1560Rec13:
1561 cmp edx,4
1562 jb Rec13_0123
1563 push edx
1564 FLD QWORD PTR [ESI]
1565 mov ebx, dword ptr [esi+8]
1566 mov dl, byte ptr [esi+12]
1567 FSTP QWORD PTR [EDI]
1568 mov dword ptr [edi+8], ebx
1569 mov byte ptr [edi+12], dl
1570 add esi,eax
1571 add edi,ecx
1572 FLD QWORD PTR [ESI]
1573 mov ebx, dword ptr [esi+8]
1574 mov dl, byte ptr [esi+12]
1575 FSTP QWORD PTR [EDI]
1576 mov dword ptr [edi+8], ebx
1577 mov byte ptr [edi+12], dl
1578 add esi,eax
1579 add edi,ecx
1580 FLD QWORD PTR [ESI]
1581 mov ebx, dword ptr [esi+8]
1582 mov dl, byte ptr [esi+12]
1583 FSTP QWORD PTR [EDI]
1584 mov dword ptr [edi+8], ebx
1585 mov byte ptr [edi+12], dl
1586 add esi,eax
1587 add edi,ecx
1588 FLD QWORD PTR [ESI]
1589 mov ebx, dword ptr [esi+8]
1590 mov dl, byte ptr [esi+12]
1591 FSTP QWORD PTR [EDI]
1592 mov dword ptr [edi+8], ebx
1593 mov byte ptr [edi+12], dl
1594 add esi,eax
1595 add edi,ecx
1596 pop edx
1597 sub edx ,4
1598 jnz Rec13
1599 jmp BltRecEnd
1600
1601Rec13_0123:
1602 cmp edx,2
1603 jz Rec13_2
1604 jb Rec13_01
1605;3 lines left
1606 FLD QWORD PTR [ESI]
1607 mov ebx, dword ptr [esi+8]
1608 mov dl, byte ptr [esi+12]
1609 FSTP QWORD PTR [EDI]
1610 mov dword ptr [edi+8], ebx
1611 mov byte ptr [edi+12], dl
1612 add esi,eax
1613 add edi,ecx
1614 FLD QWORD PTR [ESI]
1615 mov ebx, dword ptr [esi+8]
1616 mov dl, byte ptr [esi+12]
1617 FSTP QWORD PTR [EDI]
1618 mov dword ptr [edi+8], ebx
1619 mov byte ptr [edi+12], dl
1620 add esi,eax
1621 add edi,ecx
1622 FLD QWORD PTR [ESI]
1623 mov ebx, dword ptr [esi+8]
1624 mov dl, byte ptr [esi+12]
1625 FSTP QWORD PTR [EDI]
1626 mov dword ptr [edi+8], ebx
1627 mov byte ptr [edi+12], dl
1628 jmp BltRecEnd
1629Rec13_2:
1630 FLD QWORD PTR [ESI]
1631 mov ebx, dword ptr [esi+8]
1632 mov dl, byte ptr [esi+12]
1633 FSTP QWORD PTR [EDI]
1634 mov dword ptr [edi+8], ebx
1635 mov byte ptr [edi+12], dl
1636 add esi,eax
1637 add edi,ecx
1638 FLD QWORD PTR [ESI]
1639 mov ebx, dword ptr [esi+8]
1640 mov dl, byte ptr [esi+12]
1641 FSTP QWORD PTR [EDI]
1642 mov dword ptr [edi+8], ebx
1643 mov byte ptr [edi+12], dl
1644 jmp BltRecEnd
1645Rec13_01:
1646 test edx,edx
1647 jz BltRecEnd
1648 FLD QWORD PTR [ESI]
1649 mov ebx, dword ptr [esi+8]
1650 mov dl, byte ptr [esi+12]
1651 FSTP QWORD PTR [EDI]
1652 mov dword ptr [edi+8], ebx
1653 mov byte ptr [edi+12], dl
1654 jmp BltRecEnd
1655
1656; 14 Pixel Wide
1657
1658Rec14:
1659 cmp edx,4
1660 jb Rec14_0123
1661 push edx
1662 FLD QWORD PTR [ESI]
1663 mov ebx, dword ptr [esi+8]
1664 mov dx, word ptr [esi+12]
1665 FSTP QWORD PTR [EDI]
1666 mov dword ptr [edi+8], ebx
1667 mov word ptr [edi+12], dx
1668 add esi,eax
1669 add edi,ecx
1670 FLD QWORD PTR [ESI]
1671 mov ebx, dword ptr [esi+8]
1672 mov dx, word ptr [esi+12]
1673 FSTP QWORD PTR [EDI]
1674 mov dword ptr [edi+8], ebx
1675 mov word ptr [edi+12], dx
1676 add esi,eax
1677 add edi,ecx
1678 FLD QWORD PTR [ESI]
1679 mov ebx, dword ptr [esi+8]
1680 mov dx, word ptr [esi+12]
1681 FSTP QWORD PTR [EDI]
1682 mov dword ptr [edi+8], ebx
1683 mov word ptr [edi+12], dx
1684 add esi,eax
1685 add edi,ecx
1686 FLD QWORD PTR [ESI]
1687 mov ebx, dword ptr [esi+8]
1688 mov dx, word ptr [esi+12]
1689 FSTP QWORD PTR [EDI]
1690 mov dword ptr [edi+8], ebx
1691 mov word ptr [edi+12], dx
1692 add esi,eax
1693 add edi,ecx
1694 pop edx
1695 sub edx ,4
1696 jnz Rec14
1697 jmp BltRecEnd
1698
1699Rec14_0123:
1700 cmp edx,2
1701 jz Rec14_2
1702 jb Rec14_01
1703;3 lines left
1704 FLD QWORD PTR [ESI]
1705 mov ebx, dword ptr [esi+8]
1706 mov dx, word ptr [esi+12]
1707 FSTP QWORD PTR [EDI]
1708 mov dword ptr [edi+8], ebx
1709 mov word ptr [edi+12], dx
1710 add esi,eax
1711 add edi,ecx
1712 FLD QWORD PTR [ESI]
1713 mov ebx, dword ptr [esi+8]
1714 mov dx, word ptr [esi+12]
1715 FSTP QWORD PTR [EDI]
1716 mov dword ptr [edi+8], ebx
1717 mov word ptr [edi+12], dx
1718 add esi,eax
1719 add edi,ecx
1720 FLD QWORD PTR [ESI]
1721 mov ebx, dword ptr [esi+8]
1722 mov dx, word ptr [esi+12]
1723 FSTP QWORD PTR [EDI]
1724 mov dword ptr [edi+8], ebx
1725 mov word ptr [edi+12], dx
1726 jmp BltRecEnd
1727Rec14_2:
1728 FLD QWORD PTR [ESI]
1729 mov ebx, dword ptr [esi+8]
1730 mov dx, word ptr [esi+12]
1731 FSTP QWORD PTR [EDI]
1732 mov dword ptr [edi+8], ebx
1733 mov word ptr [edi+12], dx
1734 add esi,eax
1735 add edi,ecx
1736 FLD QWORD PTR [ESI]
1737 mov ebx, dword ptr [esi+8]
1738 mov dx, word ptr [esi+12]
1739 FSTP QWORD PTR [EDI]
1740 mov dword ptr [edi+8], ebx
1741 mov word ptr [edi+12], dx
1742 jmp BltRecEnd
1743Rec14_01:
1744 test edx,edx
1745 jz BltRecEnd
1746 FLD QWORD PTR [ESI]
1747 mov ebx, dword ptr [esi+8]
1748 mov dx, word ptr [esi+12]
1749 FSTP QWORD PTR [EDI]
1750 mov dword ptr [edi+8], ebx
1751 mov word ptr [edi+12], dx
1752 jmp BltRecEnd
1753
1754; 15 Pixel Wide
1755
1756Rec15:
1757 cmp edx,4
1758 jb Rec15_0123
1759 push edx
1760 FLD QWORD PTR [ESI]
1761 mov ebx, dword ptr [esi+8]
1762 mov dx, word ptr [esi+12]
1763 FSTP QWORD PTR [EDI]
1764 mov dword ptr [edi+8], ebx
1765 mov bl, byte ptr[esi+14]
1766 mov word ptr [edi+12], dx
1767 add esi,eax
1768 mov byte ptr[edi+14], bl
1769 add edi,ecx
1770 FLD QWORD PTR [ESI]
1771 mov ebx, dword ptr [esi+8]
1772 mov dx, word ptr [esi+12]
1773 FSTP QWORD PTR [EDI]
1774 mov dword ptr [edi+8], ebx
1775 mov bl, byte ptr[esi+14]
1776 mov word ptr [edi+12], dx
1777 add esi,eax
1778 mov byte ptr[edi+14], bl
1779 add edi,ecx
1780 FLD QWORD PTR [ESI]
1781 mov ebx, dword ptr [esi+8]
1782 mov dx, word ptr [esi+12]
1783 FSTP QWORD PTR [EDI]
1784 mov dword ptr [edi+8], ebx
1785 mov bl, byte ptr[esi+14]
1786 mov word ptr [edi+12], dx
1787 add esi,eax
1788 mov byte ptr[edi+14], bl
1789 add edi,ecx
1790 FLD QWORD PTR [ESI]
1791 mov ebx, dword ptr [esi+8]
1792 mov dx, word ptr [esi+12]
1793 FSTP QWORD PTR [EDI]
1794 mov dword ptr [edi+8], ebx
1795 mov bl, byte ptr[esi+14]
1796 mov word ptr [edi+12], dx
1797 add esi,eax
1798 mov byte ptr[edi+14], bl
1799 add edi,ecx
1800 pop edx
1801 sub edx ,4
1802 jnz Rec15
1803 jmp BltRecEnd
1804
1805Rec15_0123:
1806 cmp edx,2
1807 jz Rec15_2
1808 jb Rec15_01
1809;3 lines left
1810 FLD QWORD PTR [ESI]
1811 mov ebx, dword ptr [esi+8]
1812 mov dx, word ptr [esi+12]
1813 FSTP QWORD PTR [EDI]
1814 mov dword ptr [edi+8], ebx
1815 mov bl, byte ptr[esi+14]
1816 mov word ptr [edi+12], dx
1817 add esi,eax
1818 mov byte ptr[edi+14], bl
1819 add edi,ecx
1820 FLD QWORD PTR [ESI]
1821 mov ebx, dword ptr [esi+8]
1822 mov dx, word ptr [esi+12]
1823 FSTP QWORD PTR [EDI]
1824 mov dword ptr [edi+8], ebx
1825 mov bl, byte ptr[esi+14]
1826 mov word ptr [edi+12], dx
1827 add esi,eax
1828 mov byte ptr[edi+14], bl
1829 add edi,ecx
1830 FLD QWORD PTR [ESI]
1831 mov ebx, dword ptr [esi+8]
1832 mov dx, word ptr [esi+12]
1833 FSTP QWORD PTR [EDI]
1834 mov dword ptr [edi+8], ebx
1835 mov bl, byte ptr[esi+14]
1836 mov word ptr [edi+12], dx
1837 mov byte ptr[edi+14], bl
1838 jmp BltRecEnd
1839Rec15_2:
1840 FLD QWORD PTR [ESI]
1841 mov ebx, dword ptr [esi+8]
1842 mov dx, word ptr [esi+12]
1843 FSTP QWORD PTR [EDI]
1844 mov dword ptr [edi+8], ebx
1845 mov bl, byte ptr[esi+14]
1846 mov word ptr [edi+12], dx
1847 add esi,eax
1848 mov byte ptr[edi+14], bl
1849 add edi,ecx
1850 FLD QWORD PTR [ESI]
1851 mov ebx, dword ptr [esi+8]
1852 mov dx, word ptr [esi+12]
1853 FSTP QWORD PTR [EDI]
1854 mov dword ptr [edi+8], ebx
1855 mov bl, byte ptr[esi+14]
1856 mov word ptr [edi+12], dx
1857 mov byte ptr[edi+14], bl
1858 jmp BltRecEnd
1859Rec15_01:
1860 test edx,edx
1861 jz BltRecEnd
1862 FLD QWORD PTR [ESI]
1863 mov ebx, dword ptr [esi+8]
1864 mov dx, word ptr [esi+12]
1865 FSTP QWORD PTR [EDI]
1866 mov dword ptr [edi+8], ebx
1867 mov bl, byte ptr[esi+14]
1868 mov word ptr [edi+12], dx
1869 mov byte ptr[edi+14], bl
1870 jmp BltRecEnd
1871
1872
1873ComplexBlt:
1874 ; Blit first the even rect then the rest
1875
1876 push dword ptr [ebp+28] ; ulSrcPitch
1877 push dword ptr [ebp+24] ; ulDestPitch
1878 push edx
1879 shl ecx,4
1880 push ecx
1881 push esi
1882 push edi
1883 call _BltRec
1884 sub esp,24
1885 add esi,ecx
1886 add edi,ecx
1887 push dword ptr [ebp+28] ; ulSrcPitch
1888 push dword ptr [ebp+24] ; ulDestPitch
1889 push edx
1890 push ebx
1891 push esi
1892 push edi
1893 call _BltRec
1894 sub esp,24
1895
1896BltRecEnd:
1897 pop edx
1898 pop ecx
1899 pop ebx
1900 pop eax
1901 pop esi
1902 pop edi
1903 pop ebp
1904 ret
1905_BltRec ENDP
1906
1907
1908 PUBLIC _CPUHasMMX
1909;
1910; int __cdecl CPUHasMMX()
1911; returns:
1912; 0 = NoMMX
1913; 1 = MMX
1914; 2 = MMX+CMov instuction
1915
1916_CPUHasMMX PROC NEAR
1917 push ebp
1918 mov ebp, esp
1919 push edi
1920 push esi
1921 push ebx
1922 push ecx
1923 push edx
1924
1925 pushfd
1926 pop eax
1927 mov ebx ,eax
1928 xor eax, 00200000h
1929 push eax
1930 popfd
1931 pushfd
1932 pop eax
1933 sub eax,ebx
1934 jz Return ; No CPUID => No MMX => return 0 in eax;
1935 mov eax, 1
1936 CPUID
1937 test edx,00800000h ; MMX Bit Set ?
1938 jz Return
1939 mov eax, 1
1940 test edx,00008000h ; Conditonal Mov Bit Set ?
1941 jz Return
1942 inc eax
1943Return:
1944 pop edx
1945 pop ecx
1946 pop ebx
1947 pop esi
1948 pop edi
1949 pop ebp
1950 ret
1951_CPUHasMMX ENDP
1952
1953 PUBLIC _MemFlip
1954
1955;
1956; memcpy via FLD / FSTP MMX might even be faster but
1957; not present on every system
1958; to maximize the speed we copy 64 bytes in each loop
1959; and after the loop the rest left
1960;
1961;
1962;void __cdecl MemFlip(PBYTE dest, PBYTE src, ULONG Size);
1963
1964_MemFlip PROC NEAR
1965 push ebp
1966 mov ebp, esp
1967 push edi
1968 push esi
1969 push eax
1970 push ebx
1971 push ecx
1972
1973 mov eax , dword ptr [ebp+16] ; Size of Buffer
1974 mov edi , dword ptr [ebp+8] ; Destination
1975 mov ebx , eax
1976 mov esi , dword ptr [ebp+12] ; SourcePointer
1977
1978 and ebx , 0000003Fh ; Calc leftover bytes
1979 shr eax , 5 ; Calc Loops
1980
1981 jz COPYREMAIN ; Less then 64 to copy
1982ALIGN 4
1983
1984Loop64:
1985 FLD QWORD PTR [ESI] ; 1
1986 FLD QWORD PTR [ESI+8] ; 2
1987 FXCH ; Doesn't take any clocks
1988 FSTP QWORD PTR [EDI] ; 3,4 Clocks
1989 FSTP QWORD PTR [EDI+8] ; 5,6
1990 ADD ESI,16 ; 7 U Integer instruction can be executed parallel
1991 ADD EDI,16 ; 7 V Total clocks for copying 16 byte 7 clocks Rep Movs needs 20! for each 16 byte + 13 setup
1992 FLD QWORD PTR [ESI]
1993 FLD QWORD PTR [ESI+8]
1994 FXCH
1995 FSTP QWORD PTR [EDI]
1996 FSTP QWORD PTR [EDI+8]
1997 ADD ESI,16
1998 ADD EDI,16
1999 FLD QWORD PTR [ESI]
2000 FLD QWORD PTR [ESI+8]
2001 FXCH
2002 FSTP QWORD PTR [EDI]
2003 FSTP QWORD PTR [EDI+8]
2004 ADD ESI,16
2005 ADD EDI,16
2006 FLD QWORD PTR [ESI]
2007 FLD QWORD PTR [ESI+8]
2008 FXCH
2009 FSTP QWORD PTR [EDI]
2010 FSTP QWORD PTR [EDI+8]
2011 ADD ESI,16
2012 ADD EDI,16
2013 inc eax
2014 jnz Loop64
2015
2016COPYREMAIN:
2017 test ebx, ebx ; something left ?
2018 jz EndOffFlip
2019
2020 test ebx, 00000020h; at least 32 bytes left ?
2021 jz Test16
2022
2023 FLD QWORD PTR [ESI]
2024 FLD QWORD PTR [ESI+8]
2025 FXCH
2026 FSTP QWORD PTR [EDI]
2027 FSTP QWORD PTR [EDI+8]
2028 ADD ESI,16
2029 ADD EDI,16
2030 FLD QWORD PTR [ESI]
2031 FLD QWORD PTR [ESI+8]
2032 FXCH
2033 FSTP QWORD PTR [EDI]
2034 FSTP QWORD PTR [EDI+8]
2035 ADD ESI,16
2036 ADD EDI,16
2037 sub ebx, 00000020h
2038 jz EndOffFlip
2039
2040Test16:
2041
2042 test ebx, 00000010h; at least 16 bytes left ?
2043 jb Test8
2044
2045 FLD QWORD PTR [ESI]
2046 FLD QWORD PTR [ESI+8]
2047 FXCH
2048 FSTP QWORD PTR [EDI]
2049 FSTP QWORD PTR [EDI+8]
2050 ADD ESI,16
2051 ADD EDI,16
2052 sub ebx, 00000010h
2053 jz EndOffFlip
2054Test8:
2055
2056 test ebx, 00000008h; at least 8 bytes left ?
2057 jb Test4
2058 mov eax,[esi]
2059 mov ecx,[esi+4]
2060 mov [edi],eax
2061 mov [edi+4],ecx
2062 add esi, 8
2063 add edi, 8
2064 sub ebx, 8
2065 jz EndOffFlip
2066
2067Test4:
2068 test ebx, 00000004h; at least 4 bytes left ?
2069 jb Test2
2070 mov eax,[esi]
2071 sub ebx, 4
2072 mov [edi],eax
2073 add esi, 4
2074 add edi, 4
2075 test ebx, ebx ; something left ?
2076 jz EndOffFlip
2077
2078Test2:
2079 test ebx, 00000002h
2080 jb Copy1
2081 mov ax,[esi]
2082 sub ebx,2
2083 mov [edi],ax
2084 add esi,2
2085 add edi,2
2086 test ebx,ebx
2087 jz EndOffFlip
2088
2089Copy1:
2090 mov al,[esi]
2091 mov [edi],al
2092
2093EndOffFlip:
2094 pop ecx
2095 pop ebx
2096 pop eax
2097 pop esi
2098 pop edi
2099 pop ebp
2100 ret
2101_MemFlip ENDP
2102
2103CODE32 ENDS
2104
2105 END
Note: See TracBrowser for help on using the repository browser.