source: trunk/src/ddraw/asmutil.asm@ 2511

Last change on this file since 2511 was 2174, checked in by hugh, 26 years ago

Added ODIn lic header with ID to all files where missing

Cleaned up Surface handling by movefing colorfill and
colorconversion into own files and use functionpointers
setup during creation.

updated makefile to add files

removed inhertiance from IBASE in DDrectangle class

File size: 40.0 KB
Line 
1; $Id: asmutil.asm,v 1.5 1999-12-21 01:28:19 hugh Exp $
2
3;
4; asmutil.asm Color key bit blitting for DirectDraw
5;
6; Copyright 1998 Sander van Leeuwen
7; 1999 Markus Montkowski
8;
9; Project Odin Software License can be found in LICENSE.TXT
10;
11
12 NAME asmutil
13.586p
14.MMX
15
16CODE32 SEGMENT DWORD PUBLIC USE32 'CODE'
17 ASSUME CS:FLAT ,DS:FLAT,SS:FLAT
18
19 PUBLIC _BlitColorKey8
20
21; endpos = destbuf + blitlinesize;
22; while(destbuf < endpos) {
23; if(*srcbuf == colorkey) {
24; destbuf++;
25; }
26; else *destbuf++ = *srcbuf;
27; srcbuf++;
28; }
29; destbuf += (destscanlinesize-blitlinesize);
30; srcbuf += (srcscanlinesize-blitlinesize);
31;void BlitColorKey8(char *dest, char *src, ULONG key, ULONG linesize)
32_BlitColorKey8 PROC NEAR
33 push ebp
34 mov ebp, esp
35 push edi
36 push esi
37 push eax
38 push ebx
39 push ecx
40 push edx
41
42 mov edi, dword ptr [ebp+8] ;dest
43 mov esi, dword ptr [ebp+12] ;src
44 mov ecx, dword ptr [ebp+20] ;linesize
45 mov edx, dword ptr [ebp+16] ;colorkey
46
47 and ecx, 3
48 mov dh , dl
49 push ecx ;do the remaining bytes afterwards
50 mov eax, edx
51 shl edx, 16
52 and eax, 0000FFFFh
53 mov ecx, dword ptr [ebp+20] ;linesize
54 or edx, eax ; edx now contains the colorkey in each byte
55 shr ecx, 2 ;linesize in dwords
56 jz blitremain ; less then 4 bytes
57 jmp blitStart
58blitloop:
59 add esi, 4
60 add edi, 4
61blitStart:
62 mov ebx, dword ptr [esi]
63 mov eax, dword ptr [edi]
64 cmp ebx, edx ; All 4 bytes transparent?
65 jz TTTT
66 cmp bx, dx ; lower 2 bytes transparent ?
67 jz XXTT
68 cmp bl, dl ; lower byte trans
69 jz XXOT
70 mov al, bl
71 cmp bh, dh ; upper Byte transparent then skip copy
72 jz XXTT
73XXOT:
74 mov ah, bh
75XXTT: ; handle upper 2 pixel
76 ror eax, 16
77 ror ebx, 16
78 cmp bx,dx
79 jz skipbyte4
80 cmp bl, dl
81 je skipbyte3
82 mov al, bl
83skipbyte3:
84 cmp bh, dl
85 je skipbyte4
86 mov ah, bh
87skipbyte4:
88 ror eax, 16
89 mov dword ptr [edi], eax
90TTTT:
91 dec ecx
92 jz blitloop
93blitremain:
94 pop ecx
95 cmp ecx, 2
96 ja blit3
97 jz blit2
98 test ecx,ecx
99 jz endofblit
100 mov eax, dword ptr[esi]
101 mov ebx, dword ptr [edi]
102 rol eax, 8
103 rol ebx, 8
104 cmp al,dl
105 jz endofblit
106 mov bl,al
107 ror ebx, 8
108 mov dword ptr[edi],ebx
109 jmp endofblit
110
111blit3:
112 mov eax, dword ptr[esi]
113 mov ebx, dword ptr [edi]
114 ror eax, 16
115 ror ebx, 16
116 cmp ax, dx
117 jz TTX
118 cmp ah,dh
119 jz TXX
120 mov bh,ah
121TXX:
122 cmp al,dl
123 jz TTX
124 mov bl,al
125TTX:
126 ror eax, 16
127 ror ebx, 16
128 cmp ah,dh
129 jz Cpyback
130 mov dh,ah
131Cpyback:
132 mov dword ptr [edi], ebx
133 jmp endofblit
134
135blit2:
136 mov eax, dword ptr[esi]
137 mov ebx, dword ptr [edi]
138 ror eax, 16
139 ror ebx, 16
140 cmp ax, dx ; both bytes transparent ?
141 jz endofblit
142 cmp ah,dh
143 jz TX
144 mov bh,ah
145TX:
146 cmp al,dl
147 jz OT
148 mov bl,al
149OT:
150 ror ebx, 16
151 mov dword ptr[edi], ebx
152
153endofblit:
154 pop edx
155 pop ecx
156 pop ebx
157 pop eax
158 pop esi
159 pop edi
160 pop ebp
161 ret
162_BlitColorKey8 ENDP
163
164 PUBLIC _BlitColorKey16
165
166; endpos = destbuf + blitlinesize;
167; while(destbuf < endpos) {
168; if(*srcbuf == colorkey) {
169; destbuf++;
170; }
171; else *destbuf++ = *srcbuf;
172; srcbuf++;
173; }
174; destbuf += (destscanlinesize-blitlinesize);
175; srcbuf += (srcscanlinesize-blitlinesize);
176;void BlitColorKey16(char *dest, char *src, ULONG key, ULONG linesize)
177_BlitColorKey16 PROC NEAR
178 push ebp
179 mov ebp, esp
180 push edi
181 push esi
182 push eax
183 push ebx
184 push ecx
185 push edx
186
187 mov edi, dword ptr [ebp+8] ;dest
188 mov esi, dword ptr [ebp+12] ;src
189 mov ecx, dword ptr [ebp+20] ;linesize
190 mov edx, dword ptr [ebp+16] ;colorkey
191
192 mov eax, edx
193 shl edx, 16;
194 and eax, 0000FFFFh
195 or edx,eax ; create dwColorKey
196 shr ecx, 1 ; linesize in dwords
197 jz OnePixel
198
199blitloop16:
200 mov eax, dword ptr [esi]
201 mov ebx, dword ptr [edi]
202 add esi , 4
203 cmp eax, edx ; are both pixel transparent?
204 je LoopUp ; Yes, then Jump to loopend
205 cmp ax,dx ; Is lower pixel transparent
206 je DrawOT ; Yes So We got OT (OPAQUE/Transparent
207 mov bx,ax ; No so copy the lower pixel
208 ror eax,16 ;
209 cmp ax,dx ; Is higher pixel transparent
210 je CopyBack ;
211DrawOT:
212 ror ebx,16
213 mov bx,ax
214 ror ebx,16
215CopyBack:
216 mov dword ptr[edi], ebx ; copy back the result in ebx
217LoopUp:
218 mov ebx, dword ptr [ebp+20] ; V load this this in case we are done
219 add edi , 4 ; U
220OnePixel:
221 dec ecx
222 jnz blitloop16
223 test ebx, 1 ; Do we have an odd linesize
224 jz endofblit16
225 mov eax, dword ptr [esi]
226 mov ebx, dword ptr [edi]
227 ror eax,16
228 ror ebx,16
229 cmp ax, dx
230 je endofblit16 ; last pixel is transparent
231 mov bx,ax ; No so copy the lower pixel
232 ror ebx,16
233 mov dword ptr[edi], ebx ; copy back the result in ebx
234
235endofblit16:
236 pop edx
237 pop ecx
238 pop ebx
239 pop eax
240 pop esi
241 pop edi
242 pop ebp
243 ret
244_BlitColorKey16 ENDP
245
246
247 PUBLIC _BlitColorKey8MMX
248; Now the same as BlitColorKey8 now with MMX
249;void BlitColorKey8MMX(char *dest, char *src, ULONG key, ULONG linesize)
250
251_BlitColorKey8MMX PROC NEAR
252 push ebp
253 mov ebp, esp
254 push edi
255 push esi
256 push eax
257 push ebx
258 push ecx
259 push edx
260
261 mov edx, [ebp+16] ;colorkey
262 mov edi, [ebp+8] ;dest
263 mov dh,dl
264 mov esi, [ebp+12] ;src
265 mov ax,dx
266 mov ecx, dword ptr [ebp+20] ;linesize
267 shr edx,16
268 mov dx,ax
269 movd mm4,edx
270 movd mm5,edx
271 psllq mm4,32
272 por mm4,mm5
273 shr ecx,3
274 jz BltRemain8
275
276bltLoopMMX8:
277 movq mm0, [esi] ; get source qword
278 movq mm1, [edi] ; get dest qword
279 movq mm2,mm0 ; copy source
280 PCMPEQB mm0,mm4 ; create mask
281 pand mm1,mm0 ; mask dest
282 pandn mm2,mm0 ; mask source
283 por mm1,mm2 ; or them
284 movq qword ptr [edi], mm1 ; write back result
285 add esi, 8
286 add edi, 8
287 dec ecx
288 jnz bltLoopMMX8
289BltRemain8:
290 mov eax, dword ptr [ebp+20];
291 and eax, 7
292 jmp ds:JmpTable[eax*4]
293
294align 4
295
296JmpTable:
297 dd offset bltEndMMX8
298 dd offset blt1MMX8
299 dd offset blt2MMX8
300 dd offset blt3MMX8
301 dd offset blt4MMX8
302 dd offset blt5MMX8
303 dd offset blt6MMX8
304 dd offset blt7MMX8
305align 2
306;
307; Maybe it would be faster for 7-5 to load a qword into mm0/mm1
308; but we might cross a page and so I guess this is saver
309;
310blt7MMX8:
311 movd mm0, dword ptr[esi]
312 mov ax, word ptr[esi+4]
313 mov bx, word ptr[edi+4]
314 movd mm1, dword ptr[edi]
315 psllq mm0,32
316 shl eax,8
317 shl ebx,8
318 mov al, byte ptr[esi+6]
319 mov bl, byte ptr[edi+6]
320 movd mm0,eax
321 psllq mm1,32
322 movd mm1,ebx
323 movq mm2,mm0 ; copy source
324 PCMPEQB mm0,mm4 ; create mask
325 pand mm1,mm0 ; mask dest
326 pandn mm2,mm0 ; mask source
327 por mm1,mm2 ; or them
328 movd eax, mm1
329 psrlq mm1,32
330 mov byte ptr[edi+6], al
331 movd dword ptr[edi], mm1
332 shr eax,8
333 mov word ptr[edi+4],ax
334 jmp bltEndMMX8
335
336blt6MMX8:
337 movd mm0, dword ptr[esi]
338 mov ax, word ptr[esi+4]
339 mov bx, word ptr[edi+4]
340 movd mm1, dword ptr[edi]
341 psllq mm0,32
342 psllq mm1,32
343 movd mm0,eax
344 movd mm1,ebx
345 movq mm2,mm0 ; copy source
346 pcmpeqb mm0,mm4 ; create mask
347 pand mm1,mm0 ; mask dest
348 pandn mm2,mm0 ; mask source
349 por mm1,mm2 ; or them
350 movd eax, mm1
351 psrlq mm1,32
352 mov word ptr[edi+4],ax
353 movd dword ptr[edi], mm1
354 jmp bltEndMMX8
355
356blt5MMX8:
357 movd mm0, dword ptr[esi]
358 movd mm1, dword ptr[edi]
359 movq mm2,mm0 ; copy source
360 pcmpeqb mm0,mm4 ; create mask
361 pand mm1,mm0 ; mask dest
362 add esi, 4;
363 pandn mm2,mm0 ; mask source
364 por mm1,mm2 ; or them
365 movd dword ptr[edi], mm1
366 add edi,4
367 jmp blt1MMX8
368
369blt4MMX8:
370 movd mm0, dword ptr[esi]
371 movd mm1, dword ptr[edi]
372 movq mm2,mm0 ; copy source
373 pcmpeqb mm0,mm4 ; create mask
374 pand mm1,mm0 ; mask dest
375 pandn mm2,mm0 ; mask source
376 por mm1,mm2 ; or them
377 movd dword ptr [edi], mm1 ; write back result
378 jmp bltEndMMX8
379;
380; loading a dword into mm0/mm1 might be faster for 3-2...
381;
382blt3MMX8:
383 mov ax , word ptr [esi]
384 mov bx , word ptr [edi]
385 shl eax,8 ; 3 Pixel left to blit
386 shl ebx,8 ; so shift the buffers
387 mov al,byte ptr[esi+2]
388 mov bl,byte ptr[edi+2]
389 movd mm0,eax
390 movd mm1,ebx
391 movq mm2,mm0
392 pcmpeqb mm0,mm4 ; create mask
393 pand mm1,mm0 ; mask dest
394 pandn mm2,mm0 ; mask source
395 por mm1,mm2 ; or them
396 movd eax, mm1 ; write back result
397 mov byte ptr[edi+2],al
398 shr eax,8
399 mov word ptr[edi],ax
400 jmp bltEndMMX8
401
402blt2MMX8:
403 mov al, byte ptr [esi]
404 cmp al,dl
405 je blt1aMMX8
406 mov byte ptr [edi], al
407 mov bl , byte ptr [esi+1]
408 cmp bl,dl
409 je bltEndMMX8
410 mov byte ptr [edi+1], bl
411 jmp bltEndMMX8
412blt1aMMX8:
413 add esi,1
414 add edi,1
415blt1MMX8:
416 mov al, byte ptr [esi]
417 cmp al,dl
418 je bltEndMMX8
419 mov byte ptr [edi], al
420
421bltEndMMX8:
422 pop edx
423 pop ecx
424 pop ebx
425 pop eax
426 pop esi
427 pop edi
428 pop ebp
429 ret
430_BlitColorKey8MMX ENDP
431
432
433 PUBLIC _BlitColorKey16MMX
434; Now the same as BlitColorKey16 now with MMX
435;void BlitColorKey16MMX(char *dest, char *src, ULONG key, ULONG linesize)
436_BlitColorKey16MMX PROC NEAR
437 push ebp
438 mov ebp, esp
439 push edi
440 push esi
441 push ecx
442 push edx
443
444 mov edx, dword ptr [ebp+16] ; colorkey
445 mov edi, dword ptr [ebp+8] ; dest
446 mov eax, dword ptr [ebp+16] ; colorkey
447 shr edx,16;
448 mov ecx, dword ptr [ebp+20] ; linesize in pixel!
449 mov dx,ax ; extend colorkey to 32 Bit
450 mov esi, dword ptr [ebp+12] ; src
451 mov eax, ecx ; copy of linesize
452 shr ecx,2
453 movd mm4, edx
454 jz BltRemain16
455
456 movd mm5,edx ; Extend colorkey to 64 Bit
457 psllq mm4,32
458 por mm4,mm5
459
460bltLoopMMX16:
461 movq mm0,qword ptr [esi] ; get source dword
462 movq mm1,qword ptr [edi] ; get destination
463 movq mm2,mm0 ; copy source
464 pcmpeqw mm0,mm4 ; create mask
465 pand mm1,mm0 ; mask dest
466 add esi, 8
467 pandn mm2,mm0 ; mask source
468 por mm1,mm2 ; or them
469 movq qword ptr [edi], mm1 ; write back result
470 add edi, 8
471 dec ecx
472 jnz bltLoopMMX16
473
474BltRemain16:
475 and eax,3
476 jmp ds:JumpTable[eax*4]
477
478align 4
479
480JumpTable:
481 dd offset bltEndMMX16
482 dd offset blt1MMX16
483 dd offset blt2MMX16
484 dd offset blt3MMX16
485align 2
486
487blt3MMX16:
488 movd mm0, dword ptr[esi]
489 movd mm1, dword ptr[edi]
490 movq mm2,mm0 ; copy source
491 add esi,4
492 pcmpeqw mm0,mm4 ; create mask 16 bit
493 pand mm1,mm0 ; mask dest
494 pandn mm2,mm0 ; mask source
495 add edi,4
496 por mm1,mm2 ; or them
497 movd dword ptr[edi-4], mm1
498 jmp blt1MMX16
499
500blt2MMX16:
501 movd mm0, dword ptr[esi]
502 movd mm1, dword ptr[edi]
503 movq mm2,mm0 ; copy source
504 pcmpeqw mm0,mm4 ; create mask 16 bit
505 pand mm1,mm0 ; mask dest
506 pandn mm2,mm0 ; mask source
507 por mm1,mm2 ; or them
508 movd dword ptr [edi], mm1 ; write back result
509 jmp bltEndMMX16
510
511blt1MMX16:
512 mov ax, word ptr [esi] ; cmov ?
513 cmp ax,dx
514 je bltEndMMX16
515 mov word ptr [edi], ax
516
517
518bltEndMMX16:
519 pop edx
520 pop ecx
521 pop esi
522 pop edi
523 pop ebp
524 ret
525_BlitColorKey16MMX ENDP
526
527;
528; extern void __cdecl BltTransSrcRecMMX(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
529; ULONG ulDestPitch, ULONG ulSrcPitch, ULONG ulTransCol);
530
531 PUBLIC _BltTransSrcRecMMX
532_BltTransSrcRecMMX PROC NEAR
533 push ebp
534 mov ebp, esp
535 push edi
536 push esi
537 push eax
538 push ebx
539 push ecx
540 push edx
541
542EndTSBlt:
543 pop edx
544 pop ecx
545 pop ebx
546 pop eax
547 pop esi
548 pop edi
549 pop ebp
550 ret
551
552_BltTransSrcRecMMX ENDP
553
554
555 PUBLIC _BltRec
556;
557; extern void __cdecl BltRec(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
558; ULONG ulDestPitch, ULONG ulSrcPitch);
559_BltRec PROC NEAR
560 push ebp
561 mov ebp, esp
562 push edi
563 push esi
564 push eax
565 push ebx
566 push ecx
567 push edx
568
569 mov ecx, dword ptr [ebp+16] ; U ulBltWidth
570 mov esi, dword ptr [ebp+12] ; V src
571 mov ebx, ecx ; U
572 mov edx, dword ptr [ebp+20] ; V ulBltHeight
573 and ebx, 0Fh ; U ebx = # of bytes < 16
574 mov edi, dword ptr [ebp+8] ; V dest
575 cmp edx, 0
576 jz BltRecEnd ; height is zero so done
577 shr ecx, 4 ; U
578 jz SmallBlt ; Small (width < 16) rectangle done in special case
579 test ebx, ebx
580 jnz ComplexBlt ; ulBltWidth mod 16 is not 0
581
582;
583; Blitwidth is an multiple of 16
584;
585 mov ebx, dword ptr [ebp+24] ; ulDestPitch
586 mov eax, dword ptr [ebp+28] ; ulSrcPitch
587 sub ebx, dword ptr [ebp+16] ; adjust both widths
588 sub eax, dword ptr [ebp+16]
589 mov dword ptr [ebp+28], eax ; store adjusted SrcPitch
590 mov eax, ecx
591LineLoop:
592 FLD QWORD PTR [ESI]
593 FLD QWORD PTR [ESI+8]
594 FXCH
595 FSTP QWORD PTR [EDI]
596 FSTP QWORD PTR [EDI+8]
597 ADD ESI,16
598 ADD EDI,16
599 dec eax
600 jz LineLoop
601 dec edx
602 jz BltRecEnd
603 add ESI, dword ptr[ebp+28]
604 add EDI, ebx
605 mov eax, ecx
606 jmp LineLoop
607
608SmallBlt:
609 mov eax, dword ptr [ebp+28] ; ulSrcPitch
610 mov ecx, dword ptr [ebp+24] ; ulDestPitch
611 jmp ds:SmallJmpTable[ebx*4]
612SmallJmpTable:
613 dd offset BltRecEnd ; BlitWidth is 0 done
614 dd offset Rec1
615 dd offset Rec2
616 dd offset Rec3
617 dd offset Rec4
618 dd offset Rec5
619 dd offset Rec6
620 dd offset Rec7
621 dd offset Rec8
622 dd offset Rec9
623 dd offset Rec10
624 dd offset Rec11
625 dd offset Rec12
626 dd offset Rec13
627 dd offset Rec14
628 dd offset Rec15
629
630;One Pixel wide
631
632Rec1:
633 cmp edx,4
634 jb Rec1_0123
635 mov bl, byte ptr [esi]
636 add esi,eax
637 mov byte ptr [edi], bl
638 add edi,ecx
639 mov bl, byte ptr [esi]
640 add esi,eax
641 mov byte ptr [edi], bl
642 add edi,ecx
643 mov bl, byte ptr [esi]
644 add esi,eax
645 mov byte ptr [edi], bl
646 add edi,ecx
647 mov bl, byte ptr [esi]
648 add esi,eax
649 mov byte ptr [edi], bl
650 add edi,ecx
651 sub edx,4
652 jnz Rec1
653 jmp BltRecEnd
654Rec1_0123:
655 cmp edx,2
656 jz Rec1_2
657 jb Rec1_01
658; Must be 3 lines left
659 mov bl, byte ptr [esi]
660 add esi,eax
661 mov byte ptr [edi], bl
662 add edi,ecx
663 mov bl, byte ptr [esi]
664 add esi,eax
665 mov byte ptr [edi], bl
666 add edi,ecx
667 mov bl, byte ptr [esi]
668 mov byte ptr [edi], bl
669 jmp BltRecEnd
670Rec1_2:
671 mov bl, byte ptr [esi]
672 add esi,eax
673 mov byte ptr [edi], bl
674 add edi,ecx
675 mov bl, byte ptr [esi]
676 mov byte ptr [edi], bl
677 jmp BltRecEnd
678Rec1_01:
679 test edx,edx
680 jz BltRecEnd
681 mov bl, byte ptr [esi]
682 mov byte ptr [edi], bl
683 jmp BltRecEnd
684
685;2 Pixel Wide
686
687Rec2:
688 cmp edx,4
689 jb Rec2_0123
690 mov bx, word ptr [esi]
691 add esi,eax
692 mov word ptr [edi], bx
693 add edi,ecx
694 mov bx, word ptr [esi]
695 add esi,eax
696 mov word ptr [edi], bx
697 add edi,ecx
698 mov bx, word ptr [esi]
699 add esi,eax
700 mov word ptr [edi], bx
701 add edi,ecx
702 mov bx, word ptr [esi]
703 add esi,eax
704 mov word ptr [edi], bx
705 add edi,ecx
706 sub edx, 4
707 jnz Rec2
708 jmp BltRecEnd
709
710Rec2_0123:
711 cmp edx,2
712 jz Rec2_2
713 jb Rec2_01
714;3 lines left
715 mov bx, word ptr [esi]
716 add esi,eax
717 mov word ptr [edi], bx
718 add edi,ecx
719 mov bx, word ptr [esi]
720 add esi,eax
721 mov word ptr [edi], bx
722 add edi,ecx
723 mov bx, word ptr [esi]
724 mov word ptr [edi], bx
725 jmp BltRecEnd
726Rec2_2:
727 mov bx, word ptr [esi]
728 add esi,eax
729 mov word ptr [edi], bx
730 add edi,ecx
731 mov bx, word ptr [esi]
732 mov word ptr [edi], bx
733 jmp BltRecEnd
734Rec2_01:
735 test edx,edx
736 jz BltRecEnd
737 mov bx, word ptr [esi]
738 mov word ptr [edi], bx
739 jmp BltRecEnd
740
741; 3 Pixel Wide must check if it's better to read 4 bytes as
742; Intel might stall on reading 2 and 1 byte, but this takes more care as we
743; could create a pagefault on the last 3 pixel
744
745Rec3:
746 cmp edx,4
747 jb Rec3_0123
748 push edx
749 mov bx, word ptr [esi]
750 mov dl, byte ptr [esi+2]
751 add esi,eax
752 mov word ptr [edi], bx
753 mov byte ptr [edi+2], dl
754 add edi,ecx
755 mov bx, word ptr [esi]
756 mov dl, byte ptr [esi+2]
757 add esi,eax
758 mov word ptr [edi], bx
759 mov byte ptr [edi+2], dl
760 add edi,ecx
761 mov bx, word ptr [esi]
762 mov dl, byte ptr [esi+2]
763 add esi,eax
764 mov word ptr [edi], bx
765 mov byte ptr [edi+2], dl
766 add edi,ecx
767 mov bx, word ptr [esi]
768 mov dl, byte ptr [esi+2]
769 add esi,eax
770 mov word ptr [edi], bx
771 mov byte ptr [edi+2], dl
772 add edi,ecx
773 pop edx
774 sub edx,4
775 jnz Rec3
776 jmp BltRecEnd
777
778Rec3_0123:
779 cmp edx,2
780 jz Rec3_2
781 jb Rec3_01
782; Must be 3 lines left
783 mov bx, word ptr [esi]
784 mov dl, byte ptr [esi+2]
785 add esi,eax
786 mov word ptr [edi], bx
787 mov byte ptr [edi+2], dl
788 add edi,ecx
789 mov bx, word ptr [esi]
790 mov dl, byte ptr [esi+2]
791 add esi,eax
792 mov word ptr [edi], bx
793 mov byte ptr [edi+2], dl
794 add edi,ecx
795 mov bx, word ptr [esi]
796 mov dl, byte ptr [esi+2]
797 mov word ptr [edi], bx
798 mov byte ptr [edi+2], dl
799 jmp BltRecEnd
800Rec3_2:
801 mov bx, word ptr [esi]
802 mov dl, byte ptr [esi+2]
803 add esi,eax
804 mov word ptr [edi], bx
805 mov byte ptr [edi+2], dl
806 add edi,ecx
807 mov bx, word ptr [esi]
808 mov dl, byte ptr [esi+2]
809 mov word ptr [edi], bx
810 mov byte ptr [edi+2], dl
811 jmp BltRecEnd
812Rec3_01:
813 test edx,edx
814 jz BltRecEnd
815 mov bx, word ptr [esi]
816 mov dl, byte ptr [esi+2]
817 mov word ptr [edi], bx
818 mov byte ptr [edi+2], dl
819 jmp BltRecEnd
820
821; 4 Pixel Wide
822
823Rec4:
824 cmp edx,4
825 jb Rec4_0123
826 mov ebx, dword ptr [esi]
827 add esi,eax
828 mov dword ptr [edi], ebx
829 add edi,ecx
830 mov ebx, dword ptr [esi]
831 add esi,eax
832 mov dword ptr [edi], ebx
833 add edi,ecx
834 mov ebx, dword ptr [esi]
835 add esi,eax
836 mov dword ptr [edi], ebx
837 add edi,ecx
838 mov ebx, dword ptr [esi]
839 add esi,eax
840 mov dword ptr [edi], ebx
841 add edi,ecx
842 sub edx ,4
843 jnz Rec4
844 jmp BltRecEnd
845
846Rec4_0123:
847 cmp edx,2
848 jz Rec2_2
849 jb Rec2_01
850;3 lines left
851 mov ebx, dword ptr [esi]
852 add esi,eax
853 mov dword ptr [edi], ebx
854 add edi,ecx
855 mov ebx, dword ptr [esi]
856 add esi,eax
857 mov dword ptr [edi], ebx
858 add edi,ecx
859 mov ebx, dword ptr [esi]
860 mov dword ptr [edi], ebx
861 jmp BltRecEnd
862Rec4_2:
863 mov ebx, dword ptr [esi]
864 add esi,eax
865 mov dword ptr [edi], ebx
866 add edi,ecx
867 mov ebx, dword ptr [esi]
868 mov dword ptr [edi], ebx
869 jmp BltRecEnd
870Rec4_01:
871 test edx,edx
872 jz BltRecEnd
873 mov ebx, dword ptr [esi]
874 mov dword ptr [edi], ebx
875 jmp BltRecEnd
876
877; 5 Pixel Wide
878
879Rec5:
880 cmp edx,4
881 jb Rec5_0123
882 push edx
883 mov ebx, dword ptr [esi]
884 mov dl, byte ptr [esi+4]
885 add esi,eax
886 mov dword ptr [edi], ebx
887 mov byte ptr [edi+4], dl
888 add edi,ecx
889 mov ebx, dword ptr [esi]
890 mov dl, byte ptr [esi+4]
891 add esi,eax
892 mov dword ptr [edi], ebx
893 mov byte ptr [edi+4], dl
894 add edi,ecx
895 mov ebx, dword ptr [esi]
896 mov dl, byte ptr [esi+4]
897 add esi,eax
898 mov dword ptr [edi], ebx
899 mov byte ptr [edi+4], dl
900 add edi,ecx
901 mov ebx, dword ptr [esi]
902 mov dl, byte ptr [esi+4]
903 add esi,eax
904 mov dword ptr [edi], ebx
905 mov byte ptr [edi+4], dl
906 add edi,ecx
907 pop edx
908 sub edx ,4
909 jnz Rec5
910 jmp BltRecEnd
911Rec5_0123:
912 cmp edx,2
913 jz Rec5_2
914 jb Rec5_01
915; Must be 3 lines left
916 mov ebx, dword ptr [esi]
917 mov dl, byte ptr [esi+4]
918 add esi,eax
919 mov dword ptr [edi], ebx
920 mov byte ptr [edi+4], dl
921 add edi,ecx
922 mov ebx, dword ptr [esi]
923 mov dl, byte ptr [esi+4]
924 add esi,eax
925 mov dword ptr [edi], ebx
926 mov byte ptr [edi+4], dl
927 add edi,ecx
928 mov ebx, dword ptr [esi]
929 mov dl, byte ptr [esi+4]
930 mov dword ptr [edi], ebx
931 mov byte ptr [edi+4], dl
932 jmp BltRecEnd
933Rec5_2:
934 mov ebx, dword ptr [esi]
935 mov dl, byte ptr [esi+4]
936 add esi,eax
937 mov dword ptr [edi], ebx
938 mov byte ptr [edi+4], dl
939 add edi,ecx
940 mov ebx, dword ptr [esi]
941 mov dl, byte ptr [esi+4]
942 mov dword ptr [edi], ebx
943 mov byte ptr [edi+4], dl
944 jmp BltRecEnd
945Rec5_01:
946 test edx,edx
947 jz BltRecEnd
948 mov ebx, dword ptr [esi]
949 mov dl, byte ptr [esi+4]
950 mov dword ptr [edi], ebx
951 mov byte ptr [edi+4], dl
952 jmp BltRecEnd
953
954; 6 Pixel Wide
955
956Rec6:
957 cmp edx,4
958 jb Rec6_0123
959 push edx
960 mov ebx, dword ptr [esi]
961 mov dx, word ptr [esi+4]
962 add esi,eax
963 mov dword ptr [edi], ebx
964 mov word ptr [edi+4], dx
965 add edi,ecx
966 mov ebx, dword ptr [esi]
967 mov dx, word ptr [esi+4]
968 add esi,eax
969 mov dword ptr [edi], ebx
970 mov word ptr [edi+4], dx
971 add edi,ecx
972 mov ebx, dword ptr [esi]
973 mov dx, word ptr [esi+4]
974 add esi,eax
975 mov dword ptr [edi], ebx
976 mov word ptr [edi+4], dx
977 add edi,ecx
978 mov ebx, dword ptr [esi]
979 mov dx, word ptr [esi+4]
980 add esi,eax
981 mov dword ptr [edi], ebx
982 mov word ptr [edi+4], dx
983 add edi,ecx
984 pop edx
985 sub edx ,4
986 jnz Rec6
987 jmp BltRecEnd
988Rec6_0123:
989 cmp edx,2
990 jz Rec6_2
991 jb Rec6_01
992; Must be 3 lines left
993 mov ebx, dword ptr [esi]
994 mov dx, word ptr [esi+4]
995 add esi,eax
996 mov dword ptr [edi], ebx
997 mov word ptr [edi+4], dx
998 add edi,ecx
999 mov ebx, dword ptr [esi]
1000 mov dx, word ptr [esi+4]
1001 add esi,eax
1002 mov dword ptr [edi], ebx
1003 mov word ptr [edi+4], dx
1004 add edi,ecx
1005 mov ebx, dword ptr [esi]
1006 mov dx, word ptr [esi+4]
1007 mov dword ptr [edi], ebx
1008 mov word ptr [edi+4], dx
1009 jmp BltRecEnd
1010Rec6_2:
1011 mov ebx, dword ptr [esi]
1012 mov dx, word ptr [esi+4]
1013 add esi,eax
1014 mov dword ptr [edi], ebx
1015 mov word ptr [edi+4], dx
1016 add edi,ecx
1017 mov ebx, dword ptr [esi]
1018 mov dx, word ptr [esi+4]
1019 mov dword ptr [edi], ebx
1020 mov word ptr [edi+4], dx
1021 jmp BltRecEnd
1022Rec6_01:
1023 test edx,edx
1024 jz BltRecEnd
1025 mov ebx, dword ptr [esi]
1026 mov dx, word ptr [esi+4]
1027 mov dword ptr [edi], ebx
1028 mov word ptr [edi+4], dx
1029 jmp BltRecEnd
1030
1031; 7 Pixel Wide
1032
1033Rec7:
1034 cmp edx,4
1035 jb Rec6_0123
1036 push edx
1037 mov ebx, dword ptr [esi]
1038 mov dx, word ptr [esi+4]
1039 mov dword ptr [edi], ebx
1040 mov word ptr [edi+4], dx
1041 mov bl, byte ptr[esi+6]
1042 add esi,eax
1043 mov byte ptr[edi+6],bl
1044 add edi,ecx
1045 xor ebx,ebx ; clear ebx to avoid stalls
1046 mov ebx, dword ptr [esi]
1047 mov dx, word ptr [esi+4]
1048 mov dword ptr [edi], ebx
1049 mov word ptr [edi+4], dx
1050 mov bl, byte ptr[esi+6]
1051 add esi,eax
1052 mov byte ptr[edi+6],bl
1053 add edi,ecx
1054 xor ebx,ebx ; clear ebx to avoid stalls
1055 mov ebx, dword ptr [esi]
1056 mov dx, word ptr [esi+4]
1057 mov dword ptr [edi], ebx
1058 mov word ptr [edi+4], dx
1059 mov bl, byte ptr[esi+6]
1060 add esi,eax
1061 mov byte ptr[edi+6],bl
1062 add edi,ecx
1063 xor ebx,ebx ; clear ebx to avoid stalls
1064 mov ebx, dword ptr [esi]
1065 mov dx, word ptr [esi+4]
1066 mov dword ptr [edi], ebx
1067 mov word ptr [edi+4], dx
1068 mov bl, byte ptr[esi+6]
1069 add esi,eax
1070 mov byte ptr[edi+6],bl
1071 add edi,ecx
1072 xor ebx,ebx ; clear ebx to avoid stalls
1073 pop edx
1074 sub edx ,4
1075 jnz Rec7
1076 jmp BltRecEnd
1077Rec7_0123:
1078 cmp edx,2
1079 jz Rec7_2
1080 jb Rec7_01
1081; Must be 3 lines left
1082 mov ebx, dword ptr [esi]
1083 mov dx, word ptr [esi+4]
1084 mov dword ptr [edi], ebx
1085 mov word ptr [edi+4], dx
1086 mov bl, byte ptr[esi+6]
1087 add esi,eax
1088 mov byte ptr[edi+6],bl
1089 add edi,ecx
1090 xor ebx,ebx ; clear ebx to avoid stalls
1091 mov ebx, dword ptr [esi]
1092 mov dx, word ptr [esi+4]
1093 mov dword ptr [edi], ebx
1094 mov word ptr [edi+4], dx
1095 mov bl, byte ptr[esi+6]
1096 add esi,eax
1097 mov byte ptr[edi+6],bl
1098 add edi,ecx
1099 xor ebx,ebx ; clear ebx to avoid stalls
1100 mov ebx, dword ptr [esi]
1101 mov dx, word ptr [esi+4]
1102 mov dword ptr [edi], ebx
1103 mov word ptr [edi+4], dx
1104 mov bl, byte ptr[esi+6]
1105 mov byte ptr[edi+6],bl
1106 jmp BltRecEnd
1107Rec7_2:
1108 mov ebx, dword ptr [esi]
1109 mov dx, word ptr [esi+4]
1110 mov dword ptr [edi], ebx
1111 mov word ptr [edi+4], dx
1112 mov bl, byte ptr[esi+6]
1113 add esi,eax
1114 mov byte ptr[edi+6],bl
1115 add edi,ecx
1116 xor ebx,ebx ; clear ebx to avoid stalls
1117 mov ebx, dword ptr [esi]
1118 mov dx, word ptr [esi+4]
1119 mov dword ptr [edi], ebx
1120 mov word ptr [edi+4], dx
1121 mov bl, byte ptr[esi+6]
1122 mov byte ptr[edi+6],bl
1123 jmp BltRecEnd
1124Rec7_01:
1125 test edx,edx
1126 jz BltRecEnd
1127 mov ebx, dword ptr [esi]
1128 mov dx, word ptr [esi+4]
1129 mov dword ptr [edi], ebx
1130 mov word ptr [edi+4], dx
1131 mov bl, byte ptr[esi+6]
1132 mov byte ptr[edi+6],bl
1133 jmp BltRecEnd
1134
1135; 8 Pixel Wide
1136
1137Rec8:
1138 cmp edx,4
1139 jb Rec8_0123
1140 push edx
1141 mov ebx, dword ptr [esi]
1142 mov edx, dword ptr [esi+4]
1143 mov dword ptr [edi], ebx
1144 mov dword ptr [edi+4], edx
1145 add esi,eax
1146 add edi,ecx
1147 mov ebx, dword ptr [esi]
1148 mov edx, dword ptr [esi+4]
1149 mov dword ptr [edi], ebx
1150 mov dword ptr [edi+4], edx
1151 add esi,eax
1152 add edi,ecx
1153 mov ebx, dword ptr [esi]
1154 mov edx, dword ptr [esi+4]
1155 mov dword ptr [edi], ebx
1156 mov dword ptr [edi+4], edx
1157 add esi,eax
1158 add edi,ecx
1159 mov ebx, dword ptr [esi]
1160 mov edx, dword ptr [esi+4]
1161 mov dword ptr [edi], ebx
1162 mov dword ptr [edi+4], edx
1163 add esi,eax
1164 add edi,ecx
1165 pop edx
1166 sub edx ,4
1167 jnz Rec8
1168 jmp BltRecEnd
1169
1170Rec8_0123:
1171 cmp edx,2
1172 jz Rec8_2
1173 jb Rec8_01
1174;3 lines left
1175 mov ebx, dword ptr [esi]
1176 mov edx, dword ptr [esi+4]
1177 mov dword ptr [edi], ebx
1178 mov dword ptr [edi+4], edx
1179 add esi,eax
1180 add edi,ecx
1181 mov ebx, dword ptr [esi]
1182 mov edx, dword ptr [esi+4]
1183 mov dword ptr [edi], ebx
1184 mov dword ptr [edi+4], edx
1185 add esi,eax
1186 add edi,ecx
1187 mov ebx, dword ptr [esi]
1188 mov edx, dword ptr [esi+4]
1189 mov dword ptr [edi], ebx
1190 mov dword ptr [edi+4], edx
1191 jmp BltRecEnd
1192Rec8_2:
1193 mov ebx, dword ptr [esi]
1194 mov edx, dword ptr [esi+4]
1195 mov dword ptr [edi], ebx
1196 mov dword ptr [edi+4], edx
1197 add esi,eax
1198 add edi,ecx
1199 mov ebx, dword ptr [esi]
1200 mov edx, dword ptr [esi+4]
1201 mov dword ptr [edi], ebx
1202 mov dword ptr [edi+4], edx
1203 jmp BltRecEnd
1204Rec8_01:
1205 test edx,edx
1206 jz BltRecEnd
1207 mov ebx, dword ptr [esi]
1208 mov edx, dword ptr [esi+4]
1209 mov dword ptr [edi], ebx
1210 mov dword ptr [edi+4], edx
1211 jmp BltRecEnd
1212
1213; 9 Pixel Wide
1214
1215Rec9:
1216 cmp edx,4
1217 jb Rec9_0123
1218 push edx
1219 FLD QWORD PTR [ESI]
1220 mov bl, byte ptr [esi+8]
1221 FSTP QWORD PTR [EDI]
1222 mov byte ptr [edi+8], bl
1223 add esi,eax
1224 add edi,ecx
1225 FLD QWORD PTR [ESI]
1226 mov bl, byte ptr [esi+8]
1227 FSTP QWORD PTR [EDI]
1228 mov byte ptr [edi+8], bl
1229 add esi,eax
1230 add edi,ecx
1231 FLD QWORD PTR [ESI]
1232 mov bl, byte ptr [esi+8]
1233 FSTP QWORD PTR [EDI]
1234 mov byte ptr [edi+8], bl
1235 add esi,eax
1236 add edi,ecx
1237 FLD QWORD PTR [ESI]
1238 mov bl, byte ptr [esi+8]
1239 FSTP QWORD PTR [EDI]
1240 mov byte ptr [edi+8], bl
1241 add esi,eax
1242 add edi,ecx
1243 pop edx
1244 sub edx ,4
1245 jnz Rec9
1246 jmp BltRecEnd
1247
1248Rec9_0123:
1249 cmp edx,2
1250 jz Rec9_2
1251 jb Rec9_01
1252;3 lines left
1253 FLD QWORD PTR [ESI]
1254 mov bl, byte ptr [esi+8]
1255 FSTP QWORD PTR [EDI]
1256 mov byte ptr [edi+8], bl
1257 add esi,eax
1258 add edi,ecx
1259 FLD QWORD PTR [ESI]
1260 mov bl, byte ptr [esi+8]
1261 FSTP QWORD PTR [EDI]
1262 mov byte ptr [edi+8], bl
1263 add esi,eax
1264 add edi,ecx
1265 FLD QWORD PTR [ESI]
1266 mov bl, byte ptr [esi+8]
1267 FSTP QWORD PTR [EDI]
1268 mov byte ptr [edi+8], bl
1269 jmp BltRecEnd
1270Rec9_2:
1271 FLD QWORD PTR [ESI]
1272 mov bl, byte ptr [esi+8]
1273 FSTP QWORD PTR [EDI]
1274 mov byte ptr [edi+8], bl
1275 add esi,eax
1276 add edi,ecx
1277 FLD QWORD PTR [ESI]
1278 mov bl, byte ptr [esi+8]
1279 FSTP QWORD PTR [EDI]
1280 mov byte ptr [edi+8], bl
1281 jmp BltRecEnd
1282Rec9_01:
1283 test edx,edx
1284 jz BltRecEnd
1285 FLD QWORD PTR [ESI]
1286 mov bl, byte ptr [esi+8]
1287 FSTP QWORD PTR [EDI]
1288 mov byte ptr [edi+8], bl
1289 jmp BltRecEnd
1290
1291; 10 Pixel Wide
1292
1293Rec10:
1294 cmp edx,4
1295 jb Rec10_0123
1296 FLD QWORD PTR [ESI]
1297 mov bx, word ptr [esi+8]
1298 FSTP QWORD PTR [EDI]
1299 mov word ptr [edi+8], bx
1300 add esi,eax
1301 add edi,ecx
1302 FLD QWORD PTR [ESI]
1303 mov bx, word ptr [esi+8]
1304 FSTP QWORD PTR [EDI]
1305 mov word ptr [edi+8], bx
1306 add esi,eax
1307 add edi,ecx
1308 FLD QWORD PTR [ESI]
1309 mov bx, word ptr [esi+8]
1310 FSTP QWORD PTR [EDI]
1311 mov word ptr [edi+8], bx
1312 add esi,eax
1313 add edi,ecx
1314 FLD QWORD PTR [ESI]
1315 mov bx, word ptr [esi+8]
1316 FSTP QWORD PTR [EDI]
1317 mov word ptr [edi+8], bx
1318 add esi,eax
1319 add edi,ecx
1320 sub edx ,4
1321 jnz Rec10
1322 jmp BltRecEnd
1323
1324Rec10_0123:
1325 cmp edx,2
1326 jz Rec10_2
1327 jb Rec10_01
1328;3 lines left
1329 FLD QWORD PTR [ESI]
1330 mov bx, word ptr [esi+8]
1331 FSTP QWORD PTR [EDI]
1332 mov word ptr [edi+8], bx
1333 add esi,eax
1334 add edi,ecx
1335 FLD QWORD PTR [ESI]
1336 mov bx, word ptr [esi+8]
1337 FSTP QWORD PTR [EDI]
1338 mov word ptr [edi+8], bx
1339 add esi,eax
1340 add edi,ecx
1341 FLD QWORD PTR [ESI]
1342 mov bx, word ptr [esi+8]
1343 FSTP QWORD PTR [EDI]
1344 mov word ptr [edi+8], bx
1345 jmp BltRecEnd
1346Rec10_2:
1347 FLD QWORD PTR [ESI]
1348 mov bx, word ptr [esi+8]
1349 FSTP QWORD PTR [EDI]
1350 mov word ptr [edi+8], bx
1351 add esi,eax
1352 add edi,ecx
1353 FLD QWORD PTR [ESI]
1354 mov bx, word ptr [esi+8]
1355 FSTP QWORD PTR [EDI]
1356 mov word ptr [edi+8], bx
1357 jmp BltRecEnd
1358Rec10_01:
1359 test edx,edx
1360 jz BltRecEnd
1361 FLD QWORD PTR [ESI]
1362 mov bx, word ptr [esi+8]
1363 FSTP QWORD PTR [EDI]
1364 mov word ptr [edi+8], bx
1365 jmp BltRecEnd
1366
1367; 11 Pixel Wide
1368
1369Rec11:
1370 cmp edx,4
1371 jb Rec11_0123
1372 push edx
1373 FLD QWORD PTR [ESI]
1374 mov bx, word ptr [esi+8]
1375 mov dl, byte ptr [esi+10]
1376 FSTP QWORD PTR [EDI]
1377 mov word ptr [edi+8], bx
1378 mov byte ptr [edi+10], dl
1379 add esi,eax
1380 add edi,ecx
1381 FLD QWORD PTR [ESI]
1382 mov bx, word ptr [esi+8]
1383 mov dl, byte ptr [esi+10]
1384 FSTP QWORD PTR [EDI]
1385 mov word ptr [edi+8], bx
1386 mov byte ptr [edi+10], dl
1387 add esi,eax
1388 add edi,ecx
1389 FLD QWORD PTR [ESI]
1390 mov bx, word ptr [esi+8]
1391 mov dl, byte ptr [esi+10]
1392 FSTP QWORD PTR [EDI]
1393 mov word ptr [edi+8], bx
1394 mov byte ptr [edi+10], dl
1395 add esi,eax
1396 add edi,ecx
1397 FLD QWORD PTR [ESI]
1398 mov bx, word ptr [esi+8]
1399 mov dl, byte ptr [esi+10]
1400 FSTP QWORD PTR [EDI]
1401 mov word ptr [edi+8], bx
1402 mov byte ptr [edi+10], dl
1403 add esi,eax
1404 add edi,ecx
1405 pop edx
1406 sub edx ,4
1407 jnz Rec10
1408 jmp BltRecEnd
1409
1410Rec11_0123:
1411 cmp edx,2
1412 jz Rec11_2
1413 jb Rec11_01
1414;3 lines left
1415 FLD QWORD PTR [ESI]
1416 mov bx, word ptr [esi+8]
1417 mov dl, byte ptr [esi+10]
1418 FSTP QWORD PTR [EDI]
1419 mov word ptr [edi+8], bx
1420 mov byte ptr [edi+10], dl
1421 add esi,eax
1422 add edi,ecx
1423 FLD QWORD PTR [ESI]
1424 mov bx, word ptr [esi+8]
1425 mov dl, byte ptr [esi+10]
1426 FSTP QWORD PTR [EDI]
1427 mov word ptr [edi+8], bx
1428 mov byte ptr [edi+10], dl
1429 add esi,eax
1430 add edi,ecx
1431 FLD QWORD PTR [ESI]
1432 mov bx, word ptr [esi+8]
1433 mov dl, byte ptr [esi+10]
1434 FSTP QWORD PTR [EDI]
1435 mov word ptr [edi+8], bx
1436 mov byte ptr [edi+10], dl
1437 jmp BltRecEnd
1438Rec11_2:
1439 FLD QWORD PTR [ESI]
1440 mov bx, word ptr [esi+8]
1441 mov dl, byte ptr [esi+10]
1442 FSTP QWORD PTR [EDI]
1443 mov word ptr [edi+8], bx
1444 mov byte ptr [edi+10], dl
1445 add esi,eax
1446 add edi,ecx
1447 FLD QWORD PTR [ESI]
1448 mov bx, word ptr [esi+8]
1449 mov dl, byte ptr [esi+10]
1450 FSTP QWORD PTR [EDI]
1451 mov word ptr [edi+8], bx
1452 mov byte ptr [edi+10], dl
1453 jmp BltRecEnd
1454Rec11_01:
1455 test edx,edx
1456 jz BltRecEnd
1457 FLD QWORD PTR [ESI]
1458 mov bx, word ptr [esi+8]
1459 mov dl, byte ptr [esi+10]
1460 FSTP QWORD PTR [EDI]
1461 mov word ptr [edi+8], bx
1462 mov byte ptr [edi+10], dl
1463 jmp BltRecEnd
1464
1465; 12 Pixel Wide
1466
1467Rec12:
1468 cmp edx,4
1469 jb Rec12_0123
1470 FLD QWORD PTR [ESI]
1471 mov ebx, dword ptr [esi+8]
1472 FSTP QWORD PTR [EDI]
1473 mov dword ptr [edi+8], ebx
1474 add esi,eax
1475 add edi,ecx
1476 FLD QWORD PTR [ESI]
1477 mov ebx, dword ptr [esi+8]
1478 FSTP QWORD PTR [EDI]
1479 mov dword ptr [edi+8], ebx
1480 add esi,eax
1481 add edi,ecx
1482 FLD QWORD PTR [ESI]
1483 mov ebx, dword ptr [esi+8]
1484 FSTP QWORD PTR [EDI]
1485 mov dword ptr [edi+8], ebx
1486 add esi,eax
1487 add edi,ecx
1488 FLD QWORD PTR [ESI]
1489 mov ebx, dword ptr [esi+8]
1490 FSTP QWORD PTR [EDI]
1491 mov dword ptr [edi+8], ebx
1492 add esi,eax
1493 add edi,ecx
1494 sub edx ,4
1495 jnz Rec12
1496 jmp BltRecEnd
1497
1498Rec12_0123:
1499 cmp edx,2
1500 jz Rec12_2
1501 jb Rec12_01
1502;3 lines left
1503 FLD QWORD PTR [ESI]
1504 mov ebx, dword ptr [esi+8]
1505 FSTP QWORD PTR [EDI]
1506 mov dword ptr [edi+8], ebx
1507 add esi,eax
1508 add edi,ecx
1509 FLD QWORD PTR [ESI]
1510 mov ebx, dword ptr [esi+8]
1511 FSTP QWORD PTR [EDI]
1512 mov dword ptr [edi+8], ebx
1513 add esi,eax
1514 add edi,ecx
1515 FLD QWORD PTR [ESI]
1516 mov ebx, dword ptr [esi+8]
1517 FSTP QWORD PTR [EDI]
1518 mov dword ptr [edi+8], ebx
1519 jmp BltRecEnd
1520Rec12_2:
1521 FLD QWORD PTR [ESI]
1522 mov ebx, dword ptr [esi+8]
1523 FSTP QWORD PTR [EDI]
1524 mov dword ptr [edi+8], ebx
1525 add esi,eax
1526 add edi,ecx
1527 FLD QWORD PTR [ESI]
1528 mov ebx, dword ptr [esi+8]
1529 FSTP QWORD PTR [EDI]
1530 mov dword ptr [edi+8], ebx
1531 jmp BltRecEnd
1532Rec12_01:
1533 test edx,edx
1534 jz BltRecEnd
1535 FLD QWORD PTR [ESI]
1536 mov ebx, dword ptr [esi+8]
1537 FSTP QWORD PTR [EDI]
1538 mov dword ptr [edi+8], ebx
1539 jmp BltRecEnd
1540
1541; 13 Pixel Wide
1542
1543Rec13:
1544 cmp edx,4
1545 jb Rec13_0123
1546 push edx
1547 FLD QWORD PTR [ESI]
1548 mov ebx, dword ptr [esi+8]
1549 mov dl, byte ptr [esi+12]
1550 FSTP QWORD PTR [EDI]
1551 mov dword ptr [edi+8], ebx
1552 mov byte ptr [edi+12], dl
1553 add esi,eax
1554 add edi,ecx
1555 FLD QWORD PTR [ESI]
1556 mov ebx, dword ptr [esi+8]
1557 mov dl, byte ptr [esi+12]
1558 FSTP QWORD PTR [EDI]
1559 mov dword ptr [edi+8], ebx
1560 mov byte ptr [edi+12], dl
1561 add esi,eax
1562 add edi,ecx
1563 FLD QWORD PTR [ESI]
1564 mov ebx, dword ptr [esi+8]
1565 mov dl, byte ptr [esi+12]
1566 FSTP QWORD PTR [EDI]
1567 mov dword ptr [edi+8], ebx
1568 mov byte ptr [edi+12], dl
1569 add esi,eax
1570 add edi,ecx
1571 FLD QWORD PTR [ESI]
1572 mov ebx, dword ptr [esi+8]
1573 mov dl, byte ptr [esi+12]
1574 FSTP QWORD PTR [EDI]
1575 mov dword ptr [edi+8], ebx
1576 mov byte ptr [edi+12], dl
1577 add esi,eax
1578 add edi,ecx
1579 pop edx
1580 sub edx ,4
1581 jnz Rec13
1582 jmp BltRecEnd
1583
1584Rec13_0123:
1585 cmp edx,2
1586 jz Rec13_2
1587 jb Rec13_01
1588;3 lines left
1589 FLD QWORD PTR [ESI]
1590 mov ebx, dword ptr [esi+8]
1591 mov dl, byte ptr [esi+12]
1592 FSTP QWORD PTR [EDI]
1593 mov dword ptr [edi+8], ebx
1594 mov byte ptr [edi+12], dl
1595 add esi,eax
1596 add edi,ecx
1597 FLD QWORD PTR [ESI]
1598 mov ebx, dword ptr [esi+8]
1599 mov dl, byte ptr [esi+12]
1600 FSTP QWORD PTR [EDI]
1601 mov dword ptr [edi+8], ebx
1602 mov byte ptr [edi+12], dl
1603 add esi,eax
1604 add edi,ecx
1605 FLD QWORD PTR [ESI]
1606 mov ebx, dword ptr [esi+8]
1607 mov dl, byte ptr [esi+12]
1608 FSTP QWORD PTR [EDI]
1609 mov dword ptr [edi+8], ebx
1610 mov byte ptr [edi+12], dl
1611 jmp BltRecEnd
1612Rec13_2:
1613 FLD QWORD PTR [ESI]
1614 mov ebx, dword ptr [esi+8]
1615 mov dl, byte ptr [esi+12]
1616 FSTP QWORD PTR [EDI]
1617 mov dword ptr [edi+8], ebx
1618 mov byte ptr [edi+12], dl
1619 add esi,eax
1620 add edi,ecx
1621 FLD QWORD PTR [ESI]
1622 mov ebx, dword ptr [esi+8]
1623 mov dl, byte ptr [esi+12]
1624 FSTP QWORD PTR [EDI]
1625 mov dword ptr [edi+8], ebx
1626 mov byte ptr [edi+12], dl
1627 jmp BltRecEnd
1628Rec13_01:
1629 test edx,edx
1630 jz BltRecEnd
1631 FLD QWORD PTR [ESI]
1632 mov ebx, dword ptr [esi+8]
1633 mov dl, byte ptr [esi+12]
1634 FSTP QWORD PTR [EDI]
1635 mov dword ptr [edi+8], ebx
1636 mov byte ptr [edi+12], dl
1637 jmp BltRecEnd
1638
1639; 14 Pixel Wide
1640
1641Rec14:
1642 cmp edx,4
1643 jb Rec14_0123
1644 push edx
1645 FLD QWORD PTR [ESI]
1646 mov ebx, dword ptr [esi+8]
1647 mov dx, word ptr [esi+12]
1648 FSTP QWORD PTR [EDI]
1649 mov dword ptr [edi+8], ebx
1650 mov word ptr [edi+12], dx
1651 add esi,eax
1652 add edi,ecx
1653 FLD QWORD PTR [ESI]
1654 mov ebx, dword ptr [esi+8]
1655 mov dx, word ptr [esi+12]
1656 FSTP QWORD PTR [EDI]
1657 mov dword ptr [edi+8], ebx
1658 mov word ptr [edi+12], dx
1659 add esi,eax
1660 add edi,ecx
1661 FLD QWORD PTR [ESI]
1662 mov ebx, dword ptr [esi+8]
1663 mov dx, word ptr [esi+12]
1664 FSTP QWORD PTR [EDI]
1665 mov dword ptr [edi+8], ebx
1666 mov word ptr [edi+12], dx
1667 add esi,eax
1668 add edi,ecx
1669 FLD QWORD PTR [ESI]
1670 mov ebx, dword ptr [esi+8]
1671 mov dx, word ptr [esi+12]
1672 FSTP QWORD PTR [EDI]
1673 mov dword ptr [edi+8], ebx
1674 mov word ptr [edi+12], dx
1675 add esi,eax
1676 add edi,ecx
1677 pop edx
1678 sub edx ,4
1679 jnz Rec14
1680 jmp BltRecEnd
1681
1682Rec14_0123:
1683 cmp edx,2
1684 jz Rec14_2
1685 jb Rec14_01
1686;3 lines left
1687 FLD QWORD PTR [ESI]
1688 mov ebx, dword ptr [esi+8]
1689 mov dx, word ptr [esi+12]
1690 FSTP QWORD PTR [EDI]
1691 mov dword ptr [edi+8], ebx
1692 mov word ptr [edi+12], dx
1693 add esi,eax
1694 add edi,ecx
1695 FLD QWORD PTR [ESI]
1696 mov ebx, dword ptr [esi+8]
1697 mov dx, word ptr [esi+12]
1698 FSTP QWORD PTR [EDI]
1699 mov dword ptr [edi+8], ebx
1700 mov word ptr [edi+12], dx
1701 add esi,eax
1702 add edi,ecx
1703 FLD QWORD PTR [ESI]
1704 mov ebx, dword ptr [esi+8]
1705 mov dx, word ptr [esi+12]
1706 FSTP QWORD PTR [EDI]
1707 mov dword ptr [edi+8], ebx
1708 mov word ptr [edi+12], dx
1709 jmp BltRecEnd
1710Rec14_2:
1711 FLD QWORD PTR [ESI]
1712 mov ebx, dword ptr [esi+8]
1713 mov dx, word ptr [esi+12]
1714 FSTP QWORD PTR [EDI]
1715 mov dword ptr [edi+8], ebx
1716 mov word ptr [edi+12], dx
1717 add esi,eax
1718 add edi,ecx
1719 FLD QWORD PTR [ESI]
1720 mov ebx, dword ptr [esi+8]
1721 mov dx, word ptr [esi+12]
1722 FSTP QWORD PTR [EDI]
1723 mov dword ptr [edi+8], ebx
1724 mov word ptr [edi+12], dx
1725 jmp BltRecEnd
1726Rec14_01:
1727 test edx,edx
1728 jz BltRecEnd
1729 FLD QWORD PTR [ESI]
1730 mov ebx, dword ptr [esi+8]
1731 mov dx, word ptr [esi+12]
1732 FSTP QWORD PTR [EDI]
1733 mov dword ptr [edi+8], ebx
1734 mov word ptr [edi+12], dx
1735 jmp BltRecEnd
1736
1737; 15 Pixel Wide
1738
1739Rec15:
1740 cmp edx,4
1741 jb Rec15_0123
1742 push edx
1743 FLD QWORD PTR [ESI]
1744 mov ebx, dword ptr [esi+8]
1745 mov dx, word ptr [esi+12]
1746 FSTP QWORD PTR [EDI]
1747 mov dword ptr [edi+8], ebx
1748 mov bl, byte ptr[esi+14]
1749 mov word ptr [edi+12], dx
1750 add esi,eax
1751 mov byte ptr[edi+14], bl
1752 add edi,ecx
1753 FLD QWORD PTR [ESI]
1754 mov ebx, dword ptr [esi+8]
1755 mov dx, word ptr [esi+12]
1756 FSTP QWORD PTR [EDI]
1757 mov dword ptr [edi+8], ebx
1758 mov bl, byte ptr[esi+14]
1759 mov word ptr [edi+12], dx
1760 add esi,eax
1761 mov byte ptr[edi+14], bl
1762 add edi,ecx
1763 FLD QWORD PTR [ESI]
1764 mov ebx, dword ptr [esi+8]
1765 mov dx, word ptr [esi+12]
1766 FSTP QWORD PTR [EDI]
1767 mov dword ptr [edi+8], ebx
1768 mov bl, byte ptr[esi+14]
1769 mov word ptr [edi+12], dx
1770 add esi,eax
1771 mov byte ptr[edi+14], bl
1772 add edi,ecx
1773 FLD QWORD PTR [ESI]
1774 mov ebx, dword ptr [esi+8]
1775 mov dx, word ptr [esi+12]
1776 FSTP QWORD PTR [EDI]
1777 mov dword ptr [edi+8], ebx
1778 mov bl, byte ptr[esi+14]
1779 mov word ptr [edi+12], dx
1780 add esi,eax
1781 mov byte ptr[edi+14], bl
1782 add edi,ecx
1783 pop edx
1784 sub edx ,4
1785 jnz Rec15
1786 jmp BltRecEnd
1787
1788Rec15_0123:
1789 cmp edx,2
1790 jz Rec15_2
1791 jb Rec15_01
1792;3 lines left
1793 FLD QWORD PTR [ESI]
1794 mov ebx, dword ptr [esi+8]
1795 mov dx, word ptr [esi+12]
1796 FSTP QWORD PTR [EDI]
1797 mov dword ptr [edi+8], ebx
1798 mov bl, byte ptr[esi+14]
1799 mov word ptr [edi+12], dx
1800 add esi,eax
1801 mov byte ptr[edi+14], bl
1802 add edi,ecx
1803 FLD QWORD PTR [ESI]
1804 mov ebx, dword ptr [esi+8]
1805 mov dx, word ptr [esi+12]
1806 FSTP QWORD PTR [EDI]
1807 mov dword ptr [edi+8], ebx
1808 mov bl, byte ptr[esi+14]
1809 mov word ptr [edi+12], dx
1810 add esi,eax
1811 mov byte ptr[edi+14], bl
1812 add edi,ecx
1813 FLD QWORD PTR [ESI]
1814 mov ebx, dword ptr [esi+8]
1815 mov dx, word ptr [esi+12]
1816 FSTP QWORD PTR [EDI]
1817 mov dword ptr [edi+8], ebx
1818 mov bl, byte ptr[esi+14]
1819 mov word ptr [edi+12], dx
1820 mov byte ptr[edi+14], bl
1821 jmp BltRecEnd
1822Rec15_2:
1823 FLD QWORD PTR [ESI]
1824 mov ebx, dword ptr [esi+8]
1825 mov dx, word ptr [esi+12]
1826 FSTP QWORD PTR [EDI]
1827 mov dword ptr [edi+8], ebx
1828 mov bl, byte ptr[esi+14]
1829 mov word ptr [edi+12], dx
1830 add esi,eax
1831 mov byte ptr[edi+14], bl
1832 add edi,ecx
1833 FLD QWORD PTR [ESI]
1834 mov ebx, dword ptr [esi+8]
1835 mov dx, word ptr [esi+12]
1836 FSTP QWORD PTR [EDI]
1837 mov dword ptr [edi+8], ebx
1838 mov bl, byte ptr[esi+14]
1839 mov word ptr [edi+12], dx
1840 mov byte ptr[edi+14], bl
1841 jmp BltRecEnd
1842Rec15_01:
1843 test edx,edx
1844 jz BltRecEnd
1845 FLD QWORD PTR [ESI]
1846 mov ebx, dword ptr [esi+8]
1847 mov dx, word ptr [esi+12]
1848 FSTP QWORD PTR [EDI]
1849 mov dword ptr [edi+8], ebx
1850 mov bl, byte ptr[esi+14]
1851 mov word ptr [edi+12], dx
1852 mov byte ptr[edi+14], bl
1853 jmp BltRecEnd
1854
1855
1856ComplexBlt:
1857 ; Blit first the even rect then the rest
1858
1859 push dword ptr [ebp+28] ; ulSrcPitch
1860 push dword ptr [ebp+24] ; ulDestPitch
1861 push edx
1862 shl ecx,4
1863 push ecx
1864 push esi
1865 push edi
1866 call _BltRec
1867 sub esp,24
1868 add esi,ecx
1869 add edi,ecx
1870 push dword ptr [ebp+28] ; ulSrcPitch
1871 push dword ptr [ebp+24] ; ulDestPitch
1872 push edx
1873 push ebx
1874 push esi
1875 push edi
1876 call _BltRec
1877 sub esp,24
1878
1879BltRecEnd:
1880 pop edx
1881 pop ecx
1882 pop ebx
1883 pop eax
1884 pop esi
1885 pop edi
1886 pop ebp
1887 ret
1888_BltRec ENDP
1889
1890
1891 PUBLIC _CPUHasMMX
1892;
1893; int __cdecl CPUHasMMX()
1894; returns:
1895; 0 = NoMMX
1896; 1 = MMX
1897; 2 = MMX+CMov instuction
1898
1899_CPUHasMMX PROC NEAR
1900 push ebp
1901 mov ebp, esp
1902 push edi
1903 push esi
1904 push ebx
1905 push ecx
1906 push edx
1907
1908 pushfd
1909 pop eax
1910 mov ebx ,eax
1911 xor eax, 00200000h
1912 push eax
1913 popfd
1914 pushfd
1915 pop eax
1916 sub eax,ebx
1917 jz Return ; No CPUID => No MMX => return 0 in eax;
1918 mov eax, 1
1919 CPUID
1920 test edx,00800000h ; MMX Bit Set ?
1921 jz Return
1922 mov eax, 1
1923 test edx,00008000h ; Conditonal Mov Bit Set ?
1924 jz Return
1925 inc eax
1926Return:
1927 pop edx
1928 pop ecx
1929 pop ebx
1930 pop esi
1931 pop edi
1932 pop ebp
1933 ret
1934_CPUHasMMX ENDP
1935
1936 PUBLIC _MemFlip
1937
1938;
1939; memcpy via FLD / FSTP MMX might even be faster but
1940; not present on every system
1941; to maximize the speed we copy 64 bytes in each loop
1942; and after the loop the rest left
1943;
1944;
1945;void __cdecl MemFlip(PBYTE dest, PBYTE src, ULONG Size);
1946
1947_MemFlip PROC NEAR
1948 push ebp
1949 mov ebp, esp
1950 push edi
1951 push esi
1952 push eax
1953 push ebx
1954 push ecx
1955
1956 mov eax , dword ptr [ebp+16] ; Size of Buffer
1957 mov edi , dword ptr [ebp+8] ; Destination
1958 mov ebx , eax
1959 mov esi , dword ptr [ebp+12] ; SourcePointer
1960
1961 and ebx , 0000003Fh ; Calc leftover bytes
1962 shr eax , 5 ; Calc Loops
1963
1964 jz COPYREMAIN ; Less then 64 to copy
1965ALIGN 4
1966
1967Loop64:
1968 FLD QWORD PTR [ESI] ; 1
1969 FLD QWORD PTR [ESI+8] ; 2
1970 FXCH ; Doesn't take any clocks
1971 FSTP QWORD PTR [EDI] ; 3,4 Clocks
1972 FSTP QWORD PTR [EDI+8] ; 5,6
1973 ADD ESI,16 ; 7 U Integer instruction can be executed parallel
1974 ADD EDI,16 ; 7 V Total clocks for copying 16 byte 7 clocks Rep Movs needs 20! for each 16 byte + 13 setup
1975 FLD QWORD PTR [ESI]
1976 FLD QWORD PTR [ESI+8]
1977 FXCH
1978 FSTP QWORD PTR [EDI]
1979 FSTP QWORD PTR [EDI+8]
1980 ADD ESI,16
1981 ADD EDI,16
1982 FLD QWORD PTR [ESI]
1983 FLD QWORD PTR [ESI+8]
1984 FXCH
1985 FSTP QWORD PTR [EDI]
1986 FSTP QWORD PTR [EDI+8]
1987 ADD ESI,16
1988 ADD EDI,16
1989 FLD QWORD PTR [ESI]
1990 FLD QWORD PTR [ESI+8]
1991 FXCH
1992 FSTP QWORD PTR [EDI]
1993 FSTP QWORD PTR [EDI+8]
1994 ADD ESI,16
1995 ADD EDI,16
1996 inc eax
1997 jnz Loop64
1998
1999COPYREMAIN:
2000 test ebx, ebx ; something left ?
2001 jz EndOffFlip
2002
2003 test ebx, 00000020h; at least 32 bytes left ?
2004 jz Test16
2005
2006 FLD QWORD PTR [ESI]
2007 FLD QWORD PTR [ESI+8]
2008 FXCH
2009 FSTP QWORD PTR [EDI]
2010 FSTP QWORD PTR [EDI+8]
2011 ADD ESI,16
2012 ADD EDI,16
2013 FLD QWORD PTR [ESI]
2014 FLD QWORD PTR [ESI+8]
2015 FXCH
2016 FSTP QWORD PTR [EDI]
2017 FSTP QWORD PTR [EDI+8]
2018 ADD ESI,16
2019 ADD EDI,16
2020 sub ebx, 00000020h
2021 jz EndOffFlip
2022
2023Test16:
2024
2025 test ebx, 00000010h; at least 16 bytes left ?
2026 jb Test8
2027
2028 FLD QWORD PTR [ESI]
2029 FLD QWORD PTR [ESI+8]
2030 FXCH
2031 FSTP QWORD PTR [EDI]
2032 FSTP QWORD PTR [EDI+8]
2033 ADD ESI,16
2034 ADD EDI,16
2035 sub ebx, 00000010h
2036 jz EndOffFlip
2037Test8:
2038
2039 test ebx, 00000008h; at least 8 bytes left ?
2040 jb Test4
2041 mov eax,[esi]
2042 mov ecx,[esi+4]
2043 mov [edi],eax
2044 mov [edi+4],ecx
2045 add esi, 8
2046 add edi, 8
2047 sub ebx, 8
2048 jz EndOffFlip
2049
2050Test4:
2051 test ebx, 00000004h; at least 4 bytes left ?
2052 jb Test2
2053 mov eax,[esi]
2054 sub ebx, 4
2055 mov [edi],eax
2056 add esi, 4
2057 add edi, 4
2058 test ebx, ebx ; something left ?
2059 jz EndOffFlip
2060
2061Test2:
2062 test ebx, 00000002h
2063 jb Copy1
2064 mov ax,[esi]
2065 sub ebx,2
2066 mov [edi],ax
2067 add esi,2
2068 add edi,2
2069 test ebx,ebx
2070 jz EndOffFlip
2071
2072Copy1:
2073 mov al,[esi]
2074 mov [edi],al
2075
2076EndOffFlip:
2077 pop ecx
2078 pop ebx
2079 pop eax
2080 pop esi
2081 pop edi
2082 pop ebp
2083 ret
2084_MemFlip ENDP
2085
2086CODE32 ENDS
2087
2088 END
Note: See TracBrowser for help on using the repository browser.