source: trunk/src/ddraw/asmutil.asm@ 8818

Last change on this file since 8818 was 8818, checked in by sandervl, 23 years ago

fill, SurfGetDC, SurfReleaseDC fixes & changes (see ChangeLog for details)

File size: 44.1 KB
Line 
1; $Id: asmutil.asm,v 1.9 2002-07-01 19:15:25 sandervl Exp $
2
3;
4; asmutil.asm Color key bit blitting for DirectDraw
5;
6; Copyright 1998 Sander van Leeuwen
7; 1999 Markus Montkowski
8;
9; Project Odin Software License can be found in LICENSE.TXT
10;
11
12 NAME asmutil
13.586p
14.MMX
15
16CODE32 SEGMENT DWORD USE32 PUBLIC 'CODE'
17CODE32 ENDS
18DATA32 SEGMENT DWORD USE32 PUBLIC 'DATA'
19DATA32 ENDS
20CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
21CONST32 ENDS
22BSS32 SEGMENT DWORD USE32 PUBLIC 'BSS'
23BSS32 ENDS
24DGROUP GROUP CONST32, BSS32, DATA32
25 ASSUME CS:FLAT, DS:FLAT, SS:FLAT, ES:FLAT
26 DATA32 SEGMENT
27 DATA32 ENDS
28 BSS32 SEGMENT
29 BSS32 ENDS
30 CONST32 SEGMENT
31 CONST32 ENDS
32
33DATA32 SEGMENT
34
35 align 4
36 and1mask dd 0001F001Fh
37 dd 0001F001Fh
38 and2mask dd 0FFC0FFC0h
39 dd 0FFC0FFC0h
40 and2mask565 dd 0FFE07FE0h
41 dd 07FE07FE0h
42
43DATA32 ENDS
44
45CODE32 SEGMENT
46
47 PUBLIC _BlitColorKey8
48
49; endpos = destbuf + blitlinesize;
50; while(destbuf < endpos) {
51; if(*srcbuf == colorkey) {
52; destbuf++;
53; }
54; else *destbuf++ = *srcbuf;
55; srcbuf++;
56; }
57; destbuf += (destscanlinesize-blitlinesize);
58; srcbuf += (srcscanlinesize-blitlinesize);
59;void BlitColorKey8(char *dest, char *src, ULONG key, ULONG linesize)
60_BlitColorKey8 PROC NEAR
61 push ebp
62 mov ebp, esp
63 push edi
64 push esi
65 push eax
66 push ebx
67 push ecx
68 push edx
69
70 mov edi, dword ptr [ebp+8] ;dest
71 mov esi, dword ptr [ebp+12] ;src
72 mov ecx, dword ptr [ebp+20] ;linesize
73 mov edx, dword ptr [ebp+16] ;colorkey
74
75 and ecx, 3
76 mov dh , dl
77 push ecx ;do the remaining bytes afterwards
78 mov eax, edx
79 shl edx, 16
80 and eax, 0000FFFFh
81 mov ecx, dword ptr [ebp+20] ;linesize
82 or edx, eax ; edx now contains the colorkey in each byte
83 shr ecx, 2 ;linesize in dwords
84 jz blitremain ; less then 4 bytes
85 jmp blitStart
86blitloop:
87 add esi, 4
88 add edi, 4
89blitStart:
90 mov ebx, dword ptr [esi]
91 mov eax, dword ptr [edi]
92 cmp ebx, edx ; All 4 bytes transparent?
93 jz TTTT
94 cmp bx, dx ; lower 2 bytes transparent?
95 jz XXTT
96 cmp bl, dl ; lower byte transparent?
97 jz XXOT
98 mov al, bl
99 cmp bh, dh ; upper Byte transparent then skip copy
100 jz XXTT
101XXOT:
102 mov ah, bh
103XXTT: ; handle upper 2 pixel
104 ror eax, 16
105 ror ebx, 16
106 cmp bx, dx
107 jz skipbyte4
108 cmp bl, dl
109 je skipbyte3
110 mov al, bl
111skipbyte3:
112 cmp bh, dh
113 je skipbyte4
114 mov ah, bh
115skipbyte4:
116 ror eax, 16
117 mov dword ptr [edi], eax
118TTTT:
119 dec ecx
120 jnz blitloop
121blitremain:
122 pop ecx
123 cmp ecx, 2
124 ja blit3
125 jz blit2
126 test ecx,ecx
127 jz endofblit
128 mov eax, dword ptr [esi]
129 mov ebx, dword ptr [edi]
130 cmp al,dl
131 jz endofblit
132 mov bl,al
133 mov dword ptr[edi],ebx
134 jmp endofblit
135
136blit3:
137 mov eax, dword ptr [esi]
138 mov ebx, dword ptr [edi]
139 cmp ax, dx
140 jz TTX
141 cmp ah, dh
142 jz TXX
143 mov bh, ah
144TXX:
145 cmp al, dl
146 jz TTX
147 mov bl, al
148TTX:
149 ror eax, 16
150 ror ebx, 16
151 cmp al, dl
152 jz Cpyback
153 mov bl, al
154Cpyback:
155 ror ebx, 16
156 mov dword ptr [edi], ebx
157 jmp endofblit
158
159blit2:
160 mov eax, dword ptr [esi]
161 mov ebx, dword ptr [edi]
162 cmp ax, dx ; both bytes transparent ?
163 jz endofblit
164 cmp ah, dh
165 jz TX
166 mov bh, ah
167TX:
168 cmp al, dl
169 jz OT
170 mov bl, al
171OT:
172 mov dword ptr[edi], ebx
173
174endofblit:
175 pop edx
176 pop ecx
177 pop ebx
178 pop eax
179 pop esi
180 pop edi
181 pop ebp
182 ret
183_BlitColorKey8 ENDP
184
185 PUBLIC _BlitColorKey16
186
187; endpos = destbuf + blitlinesize;
188; while(destbuf < endpos) {
189; if(*srcbuf == colorkey) {
190; destbuf++;
191; }
192; else *destbuf++ = *srcbuf;
193; srcbuf++;
194; }
195; destbuf += (destscanlinesize-blitlinesize);
196; srcbuf += (srcscanlinesize-blitlinesize);
197;void BlitColorKey16(char *dest, char *src, ULONG key, ULONG linesize)
198_BlitColorKey16 PROC NEAR
199 push ebp
200 mov ebp, esp
201 push edi
202 push esi
203 push eax
204 push ebx
205 push ecx
206 push edx
207
208 mov edi, dword ptr [ebp+8] ;dest
209 mov esi, dword ptr [ebp+12] ;src
210 mov ecx, dword ptr [ebp+20] ;linesize
211 mov edx, dword ptr [ebp+16] ;colorkey
212
213 mov eax, edx
214 shl edx, 16;
215 and eax, 0000FFFFh
216 or edx,eax ; create dwColorKey
217 shr ecx, 1 ; linesize in dwords
218 jz OnePixel ; FIXME: BUG if ecx was really 1!
219
220blitloop16:
221 mov eax, dword ptr [esi]
222 mov ebx, dword ptr [edi]
223 add esi, 4
224 cmp eax, edx ; are both pixel transparent?
225 je LoopUp ; Yes, then Jump to loopend
226 cmp ax, dx ; Is lower pixel transparent
227 je DrawOT ; Yes So We got OT (OPAQUE/Transparent
228 mov bx, ax ; No so copy the lower pixel
229DrawOT:
230 ror eax, 16 ;
231 cmp ax, dx ; Is higher pixel transparent
232 je CopyBack ;
233 mov bx, ax
234CopyBack:
235 ror ebx,16
236 mov dword ptr[edi], ebx ; copy back the result in ebx
237LoopUp:
238 mov ebx, dword ptr [ebp+20] ; V load this this in case we are done
239 add edi, 4 ; U
240OnePixel:
241 dec ecx
242 jnz blitloop16
243 test ebx, 1 ; Do we have an odd linesize
244 jz endofblit16
245 mov eax, dword ptr [esi]
246 mov ebx, dword ptr [edi]
247 cmp ax, dx
248 je endofblit16 ; last pixel is transparent
249 mov bx,ax ; No so copy the lower pixel
250 mov dword ptr [edi], ebx ; copy back the result in ebx
251
252endofblit16:
253 pop edx
254 pop ecx
255 pop ebx
256 pop eax
257 pop esi
258 pop edi
259 pop ebp
260 ret
261_BlitColorKey16 ENDP
262
263
264 PUBLIC _BlitColorKey8MMX
265; Now the same as BlitColorKey8 now with MMX
266;void BlitColorKey8MMX(char *dest, char *src, ULONG key, ULONG linesize)
267
268_BlitColorKey8MMX PROC NEAR
269 push ebp
270 mov ebp, esp
271 push edi
272 push esi
273 push eax
274 push ebx
275 push ecx
276 push edx
277
278 mov edx, [ebp+16] ;colorkey (in dl)
279 mov edi, [ebp+8] ;dest
280 mov esi, [ebp+12] ;src
281 mov ecx, dword ptr [ebp+20] ;linesize
282 mov dh,dl
283 mov eax,edx
284 shl edx,16
285 mov dx,ax
286 movd mm4,edx
287 movd mm5,edx
288 psllq mm4,32
289 por mm4,mm5
290 shr ecx,3
291 jz BltRemain8
292
293bltLoopMMX8:
294 movq mm0, [esi] ; get source qword
295 movq mm1, [edi] ; get dest qword
296 movq mm2, mm0 ; copy source
297 pcmpeqb mm0, mm4 ; create mask
298 pand mm1, mm0 ; mask dest
299 pandn mm0, mm2 ; NOT mask AND source
300 por mm1, mm0 ; or them
301 movq qword ptr [edi], mm1 ; write back result
302 add esi, 8
303 add edi, 8
304 dec ecx
305 jnz bltLoopMMX8
306BltRemain8:
307 mov eax, dword ptr [ebp+20];
308 and eax, 7
309 jmp ds:JmpTable[eax*4]
310
311align 4
312
313JmpTable:
314 dd offset cs:bltEndMMX8
315 dd offset cs:blt1MMX8
316 dd offset cs:blt2MMX8
317 dd offset cs:blt3MMX8
318 dd offset cs:blt4MMX8
319 dd offset cs:blt5MMX8
320 dd offset cs:blt6MMX8
321 dd offset cs:blt7MMX8
322align 2
323;
324; Maybe it would be faster for 7-5 to load a qword into mm0/mm1
325; but we might cross a page and so I guess this is saver
326;
327blt7MMX8:
328 movd mm0, dword ptr[esi]
329 mov ax, word ptr[esi+4]
330 mov bx, word ptr[edi+4]
331 movd mm1, dword ptr[edi]
332 psllq mm0, 32
333 shl eax, 8
334 shl ebx, 8
335 mov al, byte ptr[esi+6]
336 mov bl, byte ptr[edi+6]
337 movd mm5, eax
338 por mm0, mm5
339 psllq mm1, 32
340 movd mm6, ebx
341 por mm1, mm6
342 movq mm2, mm0 ; copy source
343 pcmpeqb mm0, mm4 ; create mask
344 pand mm1, mm0 ; mask dest
345 pandn mm0, mm2 ; mask source
346 por mm1, mm0 ; or them
347 movd eax, mm1
348 psrlq mm1,32
349 mov byte ptr[edi+6], al
350 movd dword ptr[edi], mm1
351 shr eax,8
352 mov word ptr[edi+4], ax
353 jmp bltEndMMX8
354
355blt6MMX8:
356 movd mm0, dword ptr[esi]
357 mov ax, word ptr[esi+4]
358 mov bx, word ptr[edi+4]
359 movd mm1, dword ptr[edi]
360 psllq mm0, 32
361 psllq mm1, 32
362 movd mm5, eax
363 por mm0, mm5
364 movd mm6, ebx
365 por mm1, mm6
366 movq mm2, mm0 ; copy source
367 pcmpeqb mm0, mm4 ; create mask
368 pand mm1, mm0 ; mask dest
369 pandn mm0, mm2 ; mask source
370 por mm1, mm0 ; or them
371 movd eax, mm1
372 psrlq mm1,32
373 mov word ptr[edi+4], ax
374 movd dword ptr[edi], mm1
375 jmp bltEndMMX8
376
377blt5MMX8:
378 movd mm0, dword ptr[esi]
379 movd mm1, dword ptr[edi]
380 movq mm2, mm0 ; copy source
381 pcmpeqb mm0, mm4 ; create mask
382 pand mm1, mm0 ; mask dest
383 add esi, 4
384 pandn mm0, mm2 ; mask source
385 por mm1, mm0 ; or them
386 movd dword ptr[edi], mm1
387 add edi,4
388 jmp blt1MMX8
389
390blt4MMX8:
391 movd mm0, dword ptr[esi]
392 movd mm1, dword ptr[edi]
393 movq mm2,mm0 ; copy source
394 pcmpeqb mm0,mm4 ; create mask
395 pand mm1,mm0 ; mask dest
396 pandn mm0,mm2 ; mask source
397 por mm1,mm0 ; or them
398 movd dword ptr [edi], mm1 ; write back result
399 jmp bltEndMMX8
400;
401; loading a dword into mm0/mm1 might be faster for 3-2...
402;
403blt3MMX8:
404 mov ax, word ptr [esi]
405 mov bx, word ptr [edi]
406 shl eax,8 ; 3 Pixel left to blit
407 shl ebx,8 ; so shift the buffers
408 mov al,byte ptr[esi+2]
409 mov bl,byte ptr[edi+2]
410 movd mm0,eax
411 movd mm1,ebx
412 movq mm2,mm0
413 pcmpeqb mm0,mm4 ; create mask
414 pand mm1,mm0 ; mask dest
415 pandn mm0,mm2 ; mask source
416 por mm1,mm0 ; or them
417 movd eax, mm1 ; write back result
418 mov byte ptr[edi+2], al
419 shr eax, 8
420 mov word ptr[edi], ax
421 jmp bltEndMMX8
422
423blt2MMX8:
424 mov al, byte ptr [esi]
425 cmp al, dl
426 je blt1aMMX8
427 mov byte ptr [edi], al
428; mov bl, byte ptr [esi+1]
429; cmp bl, dl
430; je bltEndMMX8
431; mov byte ptr [edi+1], bl
432; jmp bltEndMMX8
433blt1aMMX8:
434 add esi, 1
435 add edi, 1
436blt1MMX8:
437 mov al, byte ptr [esi]
438 cmp al, dl
439 je bltEndMMX8
440 mov byte ptr [edi], al
441
442bltEndMMX8:
443 pop edx
444 pop ecx
445 pop ebx
446 pop eax
447 pop esi
448 pop edi
449 pop ebp
450 ret
451_BlitColorKey8MMX ENDP
452
453
454 PUBLIC _BlitColorKey16MMX
455; Now the same as BlitColorKey16 now with MMX
456;void BlitColorKey16MMX(char *dest, char *src, ULONG key, ULONG linesize)
457_BlitColorKey16MMX PROC NEAR
458 push ebp
459 mov ebp, esp
460 push edi
461 push esi
462 push ecx
463 push edx
464
465 mov edx, dword ptr [ebp+16] ; colorkey
466 mov edi, dword ptr [ebp+8] ; dest
467 mov ecx, dword ptr [ebp+20] ; linesize in pixel!
468
469 mov eax, edx
470 shl edx, 16;
471 mov dx, ax ; extend colorKey to 32 bit
472
473 mov esi, dword ptr [ebp+12] ; src
474 mov eax, ecx ; copy of linesize
475 shr ecx,2
476 movd mm4, edx
477 jz BltRemain16
478
479 movd mm5,edx ; Extend colorkey to 64 Bit
480 psllq mm4,32
481 por mm4,mm5
482
483bltLoopMMX16:
484 movq mm0,qword ptr [esi] ; get source dword
485 movq mm1,qword ptr [edi] ; get destination
486 movq mm2,mm0 ; copy source
487 pcmpeqw mm0,mm4 ; create mask in mm0
488 pand mm1,mm0 ; mask dest
489 add esi, 8 ; point to next source qword
490 pandn mm0,mm2 ; NOT mask AND source
491 por mm1,mm0 ; or them
492 movq qword ptr [edi], mm1 ; write back result
493 add edi, 8
494 dec ecx
495 jnz bltLoopMMX16
496
497BltRemain16:
498 and eax,3
499 jmp ds:JumpTable[eax*4]
500
501align 4
502
503JumpTable:
504 dd offset cs:bltEndMMX16
505 dd offset cs:blt1MMX16
506 dd offset cs:blt2MMX16
507 dd offset cs:blt3MMX16
508align 2
509
510blt3MMX16:
511 movd mm0, dword ptr[esi]
512 movd mm1, dword ptr[edi]
513 movq mm2,mm0 ; copy source
514 add esi,4
515 pcmpeqw mm0,mm4 ; create mask 16 bit
516 pand mm1,mm0 ; mask dest
517 pandn mm0,mm2 ; mask source
518 add edi,4
519 por mm1,mm0 ; or them
520 movd dword ptr[edi-4], mm1
521 jmp blt1MMX16
522
523blt2MMX16:
524 movd mm0, dword ptr[esi]
525 movd mm1, dword ptr[edi]
526 movq mm2,mm0 ; copy source
527 pcmpeqw mm0,mm4 ; create mask 16 bit
528 pand mm1,mm0 ; mask dest
529 pandn mm0,mm2 ; mask source
530 por mm1,mm0 ; or them
531 movd dword ptr [edi], mm1 ; write back result
532 jmp bltEndMMX16
533
534blt1MMX16:
535 mov ax, word ptr [esi] ; cmov ?
536 cmp ax,dx
537 je bltEndMMX16
538 mov word ptr [edi], ax
539
540
541bltEndMMX16:
542 pop edx
543 pop ecx
544 pop esi
545 pop edi
546 pop ebp
547 ret
548_BlitColorKey16MMX ENDP
549
550;
551; extern void __cdecl BltTransSrcRecMMX(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
552; ULONG ulDestPitch, ULONG ulSrcPitch, ULONG ulTransCol);
553
554 PUBLIC _BltTransSrcRecMMX
555_BltTransSrcRecMMX PROC NEAR
556 push ebp
557 mov ebp, esp
558 push edi
559 push esi
560 push eax
561 push ebx
562 push ecx
563 push edx
564
565EndTSBlt:
566 pop edx
567 pop ecx
568 pop ebx
569 pop eax
570 pop esi
571 pop edi
572 pop ebp
573 ret
574
575_BltTransSrcRecMMX ENDP
576
577
578 PUBLIC _BltRec
579;
580; extern void __cdecl BltRec(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
581; ULONG ulDestPitch, ULONG ulSrcPitch);
582_BltRec PROC NEAR
583 push ebp
584 mov ebp, esp
585 push edi
586 push esi
587 push eax
588 push ebx
589 push ecx
590 push edx
591
592 mov ecx, dword ptr [ebp+16] ; U ulBltWidth
593 mov esi, dword ptr [ebp+12] ; V src
594 mov ebx, ecx ; U
595 mov edx, dword ptr [ebp+20] ; V ulBltHeight
596 and ebx, 0Fh ; U ebx = # of bytes < 16
597 mov edi, dword ptr [ebp+8] ; V dest
598 cmp edx, 0
599 jz BltRecEnd ; height is zero so done
600 shr ecx, 4 ; U
601 jz SmallBlt ; Small (width < 16) rectangle done in special case
602 test ebx, ebx
603 jnz ComplexBlt ; ulBltWidth mod 16 is not 0
604
605;
606; Blitwidth is an multiple of 16
607;
608 mov ebx, dword ptr [ebp+24] ; ulDestPitch
609 mov eax, dword ptr [ebp+28] ; ulSrcPitch
610 sub ebx, dword ptr [ebp+16] ; adjust both widths
611 sub eax, dword ptr [ebp+16]
612 mov dword ptr [ebp+28], eax ; store adjusted SrcPitch
613 mov eax, ecx
614LineLoop:
615 FLD QWORD PTR [ESI]
616 FLD QWORD PTR [ESI+8]
617 FXCH
618 FSTP QWORD PTR [EDI]
619 FSTP QWORD PTR [EDI+8]
620 ADD ESI,16
621 ADD EDI,16
622 dec eax
623 jz LineLoop
624 dec edx
625 jz BltRecEnd
626 add ESI, dword ptr[ebp+28]
627 add EDI, ebx
628 mov eax, ecx
629 jmp LineLoop
630
631SmallBlt:
632 mov eax, dword ptr [ebp+28] ; ulSrcPitch
633 mov ecx, dword ptr [ebp+24] ; ulDestPitch
634 jmp ds:SmallJmpTable[ebx*4]
635SmallJmpTable:
636 dd cs:offset BltRecEnd ; BlitWidth is 0 done
637 dd cs:offset Rec1
638 dd cs:offset Rec2
639 dd cs:offset Rec3
640 dd cs:offset Rec4
641 dd cs:offset Rec5
642 dd cs:offset Rec6
643 dd cs:offset Rec7
644 dd cs:offset Rec8
645 dd cs:offset Rec9
646 dd cs:offset Rec10
647 dd cs:offset Rec11
648 dd cs:offset Rec12
649 dd cs:offset Rec13
650 dd cs:offset Rec14
651 dd cs:offset Rec15
652
653;One Pixel wide
654
655Rec1:
656 cmp edx,4
657 jb Rec1_0123
658 mov bl, byte ptr [esi]
659 add esi,eax
660 mov byte ptr [edi], bl
661 add edi,ecx
662 mov bl, byte ptr [esi]
663 add esi,eax
664 mov byte ptr [edi], bl
665 add edi,ecx
666 mov bl, byte ptr [esi]
667 add esi,eax
668 mov byte ptr [edi], bl
669 add edi,ecx
670 mov bl, byte ptr [esi]
671 add esi,eax
672 mov byte ptr [edi], bl
673 add edi,ecx
674 sub edx,4
675 jnz Rec1
676 jmp BltRecEnd
677Rec1_0123:
678 cmp edx,2
679 jz Rec1_2
680 jb Rec1_01
681; Must be 3 lines left
682 mov bl, byte ptr [esi]
683 add esi,eax
684 mov byte ptr [edi], bl
685 add edi,ecx
686 mov bl, byte ptr [esi]
687 add esi,eax
688 mov byte ptr [edi], bl
689 add edi,ecx
690 mov bl, byte ptr [esi]
691 mov byte ptr [edi], bl
692 jmp BltRecEnd
693Rec1_2:
694 mov bl, byte ptr [esi]
695 add esi,eax
696 mov byte ptr [edi], bl
697 add edi,ecx
698 mov bl, byte ptr [esi]
699 mov byte ptr [edi], bl
700 jmp BltRecEnd
701Rec1_01:
702 test edx,edx
703 jz BltRecEnd
704 mov bl, byte ptr [esi]
705 mov byte ptr [edi], bl
706 jmp BltRecEnd
707
708;2 Pixel Wide
709
710Rec2:
711 cmp edx,4
712 jb Rec2_0123
713 mov bx, word ptr [esi]
714 add esi,eax
715 mov word ptr [edi], bx
716 add edi,ecx
717 mov bx, word ptr [esi]
718 add esi,eax
719 mov word ptr [edi], bx
720 add edi,ecx
721 mov bx, word ptr [esi]
722 add esi,eax
723 mov word ptr [edi], bx
724 add edi,ecx
725 mov bx, word ptr [esi]
726 add esi,eax
727 mov word ptr [edi], bx
728 add edi,ecx
729 sub edx, 4
730 jnz Rec2
731 jmp BltRecEnd
732
733Rec2_0123:
734 cmp edx,2
735 jz Rec2_2
736 jb Rec2_01
737;3 lines left
738 mov bx, word ptr [esi]
739 add esi,eax
740 mov word ptr [edi], bx
741 add edi,ecx
742 mov bx, word ptr [esi]
743 add esi,eax
744 mov word ptr [edi], bx
745 add edi,ecx
746 mov bx, word ptr [esi]
747 mov word ptr [edi], bx
748 jmp BltRecEnd
749Rec2_2:
750 mov bx, word ptr [esi]
751 add esi,eax
752 mov word ptr [edi], bx
753 add edi,ecx
754 mov bx, word ptr [esi]
755 mov word ptr [edi], bx
756 jmp BltRecEnd
757Rec2_01:
758 test edx,edx
759 jz BltRecEnd
760 mov bx, word ptr [esi]
761 mov word ptr [edi], bx
762 jmp BltRecEnd
763
764; 3 Pixel Wide must check if it's better to read 4 bytes as
765; Intel might stall on reading 2 and 1 byte, but this takes more care as we
766; could create a pagefault on the last 3 pixel
767
768Rec3:
769 cmp edx,4
770 jb Rec3_0123
771 push edx
772 mov bx, word ptr [esi]
773 mov dl, byte ptr [esi+2]
774 add esi,eax
775 mov word ptr [edi], bx
776 mov byte ptr [edi+2], dl
777 add edi,ecx
778 mov bx, word ptr [esi]
779 mov dl, byte ptr [esi+2]
780 add esi,eax
781 mov word ptr [edi], bx
782 mov byte ptr [edi+2], dl
783 add edi,ecx
784 mov bx, word ptr [esi]
785 mov dl, byte ptr [esi+2]
786 add esi,eax
787 mov word ptr [edi], bx
788 mov byte ptr [edi+2], dl
789 add edi,ecx
790 mov bx, word ptr [esi]
791 mov dl, byte ptr [esi+2]
792 add esi,eax
793 mov word ptr [edi], bx
794 mov byte ptr [edi+2], dl
795 add edi,ecx
796 pop edx
797 sub edx,4
798 jnz Rec3
799 jmp BltRecEnd
800
801Rec3_0123:
802 cmp edx,2
803 jz Rec3_2
804 jb Rec3_01
805; Must be 3 lines left
806 mov bx, word ptr [esi]
807 mov dl, byte ptr [esi+2]
808 add esi,eax
809 mov word ptr [edi], bx
810 mov byte ptr [edi+2], dl
811 add edi,ecx
812 mov bx, word ptr [esi]
813 mov dl, byte ptr [esi+2]
814 add esi,eax
815 mov word ptr [edi], bx
816 mov byte ptr [edi+2], dl
817 add edi,ecx
818 mov bx, word ptr [esi]
819 mov dl, byte ptr [esi+2]
820 mov word ptr [edi], bx
821 mov byte ptr [edi+2], dl
822 jmp BltRecEnd
823Rec3_2:
824 mov bx, word ptr [esi]
825 mov dl, byte ptr [esi+2]
826 add esi,eax
827 mov word ptr [edi], bx
828 mov byte ptr [edi+2], dl
829 add edi,ecx
830 mov bx, word ptr [esi]
831 mov dl, byte ptr [esi+2]
832 mov word ptr [edi], bx
833 mov byte ptr [edi+2], dl
834 jmp BltRecEnd
835Rec3_01:
836 test edx,edx
837 jz BltRecEnd
838 mov bx, word ptr [esi]
839 mov dl, byte ptr [esi+2]
840 mov word ptr [edi], bx
841 mov byte ptr [edi+2], dl
842 jmp BltRecEnd
843
844; 4 Pixel Wide
845
846Rec4:
847 cmp edx,4
848 jb Rec4_0123
849 mov ebx, dword ptr [esi]
850 add esi,eax
851 mov dword ptr [edi], ebx
852 add edi,ecx
853 mov ebx, dword ptr [esi]
854 add esi,eax
855 mov dword ptr [edi], ebx
856 add edi,ecx
857 mov ebx, dword ptr [esi]
858 add esi,eax
859 mov dword ptr [edi], ebx
860 add edi,ecx
861 mov ebx, dword ptr [esi]
862 add esi,eax
863 mov dword ptr [edi], ebx
864 add edi,ecx
865 sub edx ,4
866 jnz Rec4
867 jmp BltRecEnd
868
869Rec4_0123:
870 cmp edx,2
871 jz Rec2_2
872 jb Rec2_01
873;3 lines left
874 mov ebx, dword ptr [esi]
875 add esi,eax
876 mov dword ptr [edi], ebx
877 add edi,ecx
878 mov ebx, dword ptr [esi]
879 add esi,eax
880 mov dword ptr [edi], ebx
881 add edi,ecx
882 mov ebx, dword ptr [esi]
883 mov dword ptr [edi], ebx
884 jmp BltRecEnd
885Rec4_2:
886 mov ebx, dword ptr [esi]
887 add esi,eax
888 mov dword ptr [edi], ebx
889 add edi,ecx
890 mov ebx, dword ptr [esi]
891 mov dword ptr [edi], ebx
892 jmp BltRecEnd
893Rec4_01:
894 test edx,edx
895 jz BltRecEnd
896 mov ebx, dword ptr [esi]
897 mov dword ptr [edi], ebx
898 jmp BltRecEnd
899
900; 5 Pixel Wide
901
902Rec5:
903 cmp edx,4
904 jb Rec5_0123
905 push edx
906 mov ebx, dword ptr [esi]
907 mov dl, byte ptr [esi+4]
908 add esi,eax
909 mov dword ptr [edi], ebx
910 mov byte ptr [edi+4], dl
911 add edi,ecx
912 mov ebx, dword ptr [esi]
913 mov dl, byte ptr [esi+4]
914 add esi,eax
915 mov dword ptr [edi], ebx
916 mov byte ptr [edi+4], dl
917 add edi,ecx
918 mov ebx, dword ptr [esi]
919 mov dl, byte ptr [esi+4]
920 add esi,eax
921 mov dword ptr [edi], ebx
922 mov byte ptr [edi+4], dl
923 add edi,ecx
924 mov ebx, dword ptr [esi]
925 mov dl, byte ptr [esi+4]
926 add esi,eax
927 mov dword ptr [edi], ebx
928 mov byte ptr [edi+4], dl
929 add edi,ecx
930 pop edx
931 sub edx ,4
932 jnz Rec5
933 jmp BltRecEnd
934Rec5_0123:
935 cmp edx,2
936 jz Rec5_2
937 jb Rec5_01
938; Must be 3 lines left
939 mov ebx, dword ptr [esi]
940 mov dl, byte ptr [esi+4]
941 add esi,eax
942 mov dword ptr [edi], ebx
943 mov byte ptr [edi+4], dl
944 add edi,ecx
945 mov ebx, dword ptr [esi]
946 mov dl, byte ptr [esi+4]
947 add esi,eax
948 mov dword ptr [edi], ebx
949 mov byte ptr [edi+4], dl
950 add edi,ecx
951 mov ebx, dword ptr [esi]
952 mov dl, byte ptr [esi+4]
953 mov dword ptr [edi], ebx
954 mov byte ptr [edi+4], dl
955 jmp BltRecEnd
956Rec5_2:
957 mov ebx, dword ptr [esi]
958 mov dl, byte ptr [esi+4]
959 add esi,eax
960 mov dword ptr [edi], ebx
961 mov byte ptr [edi+4], dl
962 add edi,ecx
963 mov ebx, dword ptr [esi]
964 mov dl, byte ptr [esi+4]
965 mov dword ptr [edi], ebx
966 mov byte ptr [edi+4], dl
967 jmp BltRecEnd
968Rec5_01:
969 test edx,edx
970 jz BltRecEnd
971 mov ebx, dword ptr [esi]
972 mov dl, byte ptr [esi+4]
973 mov dword ptr [edi], ebx
974 mov byte ptr [edi+4], dl
975 jmp BltRecEnd
976
977; 6 Pixel Wide
978
979Rec6:
980 cmp edx,4
981 jb Rec6_0123
982 push edx
983 mov ebx, dword ptr [esi]
984 mov dx, word ptr [esi+4]
985 add esi,eax
986 mov dword ptr [edi], ebx
987 mov word ptr [edi+4], dx
988 add edi,ecx
989 mov ebx, dword ptr [esi]
990 mov dx, word ptr [esi+4]
991 add esi,eax
992 mov dword ptr [edi], ebx
993 mov word ptr [edi+4], dx
994 add edi,ecx
995 mov ebx, dword ptr [esi]
996 mov dx, word ptr [esi+4]
997 add esi,eax
998 mov dword ptr [edi], ebx
999 mov word ptr [edi+4], dx
1000 add edi,ecx
1001 mov ebx, dword ptr [esi]
1002 mov dx, word ptr [esi+4]
1003 add esi,eax
1004 mov dword ptr [edi], ebx
1005 mov word ptr [edi+4], dx
1006 add edi,ecx
1007 pop edx
1008 sub edx ,4
1009 jnz Rec6
1010 jmp BltRecEnd
1011Rec6_0123:
1012 cmp edx,2
1013 jz Rec6_2
1014 jb Rec6_01
1015; Must be 3 lines left
1016 mov ebx, dword ptr [esi]
1017 mov dx, word ptr [esi+4]
1018 add esi,eax
1019 mov dword ptr [edi], ebx
1020 mov word ptr [edi+4], dx
1021 add edi,ecx
1022 mov ebx, dword ptr [esi]
1023 mov dx, word ptr [esi+4]
1024 add esi,eax
1025 mov dword ptr [edi], ebx
1026 mov word ptr [edi+4], dx
1027 add edi,ecx
1028 mov ebx, dword ptr [esi]
1029 mov dx, word ptr [esi+4]
1030 mov dword ptr [edi], ebx
1031 mov word ptr [edi+4], dx
1032 jmp BltRecEnd
1033Rec6_2:
1034 mov ebx, dword ptr [esi]
1035 mov dx, word ptr [esi+4]
1036 add esi,eax
1037 mov dword ptr [edi], ebx
1038 mov word ptr [edi+4], dx
1039 add edi,ecx
1040 mov ebx, dword ptr [esi]
1041 mov dx, word ptr [esi+4]
1042 mov dword ptr [edi], ebx
1043 mov word ptr [edi+4], dx
1044 jmp BltRecEnd
1045Rec6_01:
1046 test edx,edx
1047 jz BltRecEnd
1048 mov ebx, dword ptr [esi]
1049 mov dx, word ptr [esi+4]
1050 mov dword ptr [edi], ebx
1051 mov word ptr [edi+4], dx
1052 jmp BltRecEnd
1053
1054; 7 Pixel Wide
1055
1056Rec7:
1057 cmp edx,4
1058 jb Rec6_0123
1059 push edx
1060 mov ebx, dword ptr [esi]
1061 mov dx, word ptr [esi+4]
1062 mov dword ptr [edi], ebx
1063 mov word ptr [edi+4], dx
1064 mov bl, byte ptr[esi+6]
1065 add esi,eax
1066 mov byte ptr[edi+6],bl
1067 add edi,ecx
1068 xor ebx,ebx ; clear ebx to avoid stalls
1069 mov ebx, dword ptr [esi]
1070 mov dx, word ptr [esi+4]
1071 mov dword ptr [edi], ebx
1072 mov word ptr [edi+4], dx
1073 mov bl, byte ptr[esi+6]
1074 add esi,eax
1075 mov byte ptr[edi+6],bl
1076 add edi,ecx
1077 xor ebx,ebx ; clear ebx to avoid stalls
1078 mov ebx, dword ptr [esi]
1079 mov dx, word ptr [esi+4]
1080 mov dword ptr [edi], ebx
1081 mov word ptr [edi+4], dx
1082 mov bl, byte ptr[esi+6]
1083 add esi,eax
1084 mov byte ptr[edi+6],bl
1085 add edi,ecx
1086 xor ebx,ebx ; clear ebx to avoid stalls
1087 mov ebx, dword ptr [esi]
1088 mov dx, word ptr [esi+4]
1089 mov dword ptr [edi], ebx
1090 mov word ptr [edi+4], dx
1091 mov bl, byte ptr[esi+6]
1092 add esi,eax
1093 mov byte ptr[edi+6],bl
1094 add edi,ecx
1095 xor ebx,ebx ; clear ebx to avoid stalls
1096 pop edx
1097 sub edx ,4
1098 jnz Rec7
1099 jmp BltRecEnd
1100Rec7_0123:
1101 cmp edx,2
1102 jz Rec7_2
1103 jb Rec7_01
1104; Must be 3 lines left
1105 mov ebx, dword ptr [esi]
1106 mov dx, word ptr [esi+4]
1107 mov dword ptr [edi], ebx
1108 mov word ptr [edi+4], dx
1109 mov bl, byte ptr[esi+6]
1110 add esi,eax
1111 mov byte ptr[edi+6],bl
1112 add edi,ecx
1113 xor ebx,ebx ; clear ebx to avoid stalls
1114 mov ebx, dword ptr [esi]
1115 mov dx, word ptr [esi+4]
1116 mov dword ptr [edi], ebx
1117 mov word ptr [edi+4], dx
1118 mov bl, byte ptr[esi+6]
1119 add esi,eax
1120 mov byte ptr[edi+6],bl
1121 add edi,ecx
1122 xor ebx,ebx ; clear ebx to avoid stalls
1123 mov ebx, dword ptr [esi]
1124 mov dx, word ptr [esi+4]
1125 mov dword ptr [edi], ebx
1126 mov word ptr [edi+4], dx
1127 mov bl, byte ptr[esi+6]
1128 mov byte ptr[edi+6],bl
1129 jmp BltRecEnd
1130Rec7_2:
1131 mov ebx, dword ptr [esi]
1132 mov dx, word ptr [esi+4]
1133 mov dword ptr [edi], ebx
1134 mov word ptr [edi+4], dx
1135 mov bl, byte ptr[esi+6]
1136 add esi,eax
1137 mov byte ptr[edi+6],bl
1138 add edi,ecx
1139 xor ebx,ebx ; clear ebx to avoid stalls
1140 mov ebx, dword ptr [esi]
1141 mov dx, word ptr [esi+4]
1142 mov dword ptr [edi], ebx
1143 mov word ptr [edi+4], dx
1144 mov bl, byte ptr[esi+6]
1145 mov byte ptr[edi+6],bl
1146 jmp BltRecEnd
1147Rec7_01:
1148 test edx,edx
1149 jz BltRecEnd
1150 mov ebx, dword ptr [esi]
1151 mov dx, word ptr [esi+4]
1152 mov dword ptr [edi], ebx
1153 mov word ptr [edi+4], dx
1154 mov bl, byte ptr[esi+6]
1155 mov byte ptr[edi+6],bl
1156 jmp BltRecEnd
1157
1158; 8 Pixel Wide
1159
1160Rec8:
1161 cmp edx,4
1162 jb Rec8_0123
1163 push edx
1164 mov ebx, dword ptr [esi]
1165 mov edx, dword ptr [esi+4]
1166 mov dword ptr [edi], ebx
1167 mov dword ptr [edi+4], edx
1168 add esi,eax
1169 add edi,ecx
1170 mov ebx, dword ptr [esi]
1171 mov edx, dword ptr [esi+4]
1172 mov dword ptr [edi], ebx
1173 mov dword ptr [edi+4], edx
1174 add esi,eax
1175 add edi,ecx
1176 mov ebx, dword ptr [esi]
1177 mov edx, dword ptr [esi+4]
1178 mov dword ptr [edi], ebx
1179 mov dword ptr [edi+4], edx
1180 add esi,eax
1181 add edi,ecx
1182 mov ebx, dword ptr [esi]
1183 mov edx, dword ptr [esi+4]
1184 mov dword ptr [edi], ebx
1185 mov dword ptr [edi+4], edx
1186 add esi,eax
1187 add edi,ecx
1188 pop edx
1189 sub edx ,4
1190 jnz Rec8
1191 jmp BltRecEnd
1192
1193Rec8_0123:
1194 cmp edx,2
1195 jz Rec8_2
1196 jb Rec8_01
1197;3 lines left
1198 mov ebx, dword ptr [esi]
1199 mov edx, dword ptr [esi+4]
1200 mov dword ptr [edi], ebx
1201 mov dword ptr [edi+4], edx
1202 add esi,eax
1203 add edi,ecx
1204 mov ebx, dword ptr [esi]
1205 mov edx, dword ptr [esi+4]
1206 mov dword ptr [edi], ebx
1207 mov dword ptr [edi+4], edx
1208 add esi,eax
1209 add edi,ecx
1210 mov ebx, dword ptr [esi]
1211 mov edx, dword ptr [esi+4]
1212 mov dword ptr [edi], ebx
1213 mov dword ptr [edi+4], edx
1214 jmp BltRecEnd
1215Rec8_2:
1216 mov ebx, dword ptr [esi]
1217 mov edx, dword ptr [esi+4]
1218 mov dword ptr [edi], ebx
1219 mov dword ptr [edi+4], edx
1220 add esi,eax
1221 add edi,ecx
1222 mov ebx, dword ptr [esi]
1223 mov edx, dword ptr [esi+4]
1224 mov dword ptr [edi], ebx
1225 mov dword ptr [edi+4], edx
1226 jmp BltRecEnd
1227Rec8_01:
1228 test edx,edx
1229 jz BltRecEnd
1230 mov ebx, dword ptr [esi]
1231 mov edx, dword ptr [esi+4]
1232 mov dword ptr [edi], ebx
1233 mov dword ptr [edi+4], edx
1234 jmp BltRecEnd
1235
1236; 9 Pixel Wide
1237
1238Rec9:
1239 cmp edx,4
1240 jb Rec9_0123
1241 push edx
1242 FLD QWORD PTR [ESI]
1243 mov bl, byte ptr [esi+8]
1244 FSTP QWORD PTR [EDI]
1245 mov byte ptr [edi+8], bl
1246 add esi,eax
1247 add edi,ecx
1248 FLD QWORD PTR [ESI]
1249 mov bl, byte ptr [esi+8]
1250 FSTP QWORD PTR [EDI]
1251 mov byte ptr [edi+8], bl
1252 add esi,eax
1253 add edi,ecx
1254 FLD QWORD PTR [ESI]
1255 mov bl, byte ptr [esi+8]
1256 FSTP QWORD PTR [EDI]
1257 mov byte ptr [edi+8], bl
1258 add esi,eax
1259 add edi,ecx
1260 FLD QWORD PTR [ESI]
1261 mov bl, byte ptr [esi+8]
1262 FSTP QWORD PTR [EDI]
1263 mov byte ptr [edi+8], bl
1264 add esi,eax
1265 add edi,ecx
1266 pop edx
1267 sub edx ,4
1268 jnz Rec9
1269 jmp BltRecEnd
1270
1271Rec9_0123:
1272 cmp edx,2
1273 jz Rec9_2
1274 jb Rec9_01
1275;3 lines left
1276 FLD QWORD PTR [ESI]
1277 mov bl, byte ptr [esi+8]
1278 FSTP QWORD PTR [EDI]
1279 mov byte ptr [edi+8], bl
1280 add esi,eax
1281 add edi,ecx
1282 FLD QWORD PTR [ESI]
1283 mov bl, byte ptr [esi+8]
1284 FSTP QWORD PTR [EDI]
1285 mov byte ptr [edi+8], bl
1286 add esi,eax
1287 add edi,ecx
1288 FLD QWORD PTR [ESI]
1289 mov bl, byte ptr [esi+8]
1290 FSTP QWORD PTR [EDI]
1291 mov byte ptr [edi+8], bl
1292 jmp BltRecEnd
1293Rec9_2:
1294 FLD QWORD PTR [ESI]
1295 mov bl, byte ptr [esi+8]
1296 FSTP QWORD PTR [EDI]
1297 mov byte ptr [edi+8], bl
1298 add esi,eax
1299 add edi,ecx
1300 FLD QWORD PTR [ESI]
1301 mov bl, byte ptr [esi+8]
1302 FSTP QWORD PTR [EDI]
1303 mov byte ptr [edi+8], bl
1304 jmp BltRecEnd
1305Rec9_01:
1306 test edx,edx
1307 jz BltRecEnd
1308 FLD QWORD PTR [ESI]
1309 mov bl, byte ptr [esi+8]
1310 FSTP QWORD PTR [EDI]
1311 mov byte ptr [edi+8], bl
1312 jmp BltRecEnd
1313
1314; 10 Pixel Wide
1315
1316Rec10:
1317 cmp edx,4
1318 jb Rec10_0123
1319 FLD QWORD PTR [ESI]
1320 mov bx, word ptr [esi+8]
1321 FSTP QWORD PTR [EDI]
1322 mov word ptr [edi+8], bx
1323 add esi,eax
1324 add edi,ecx
1325 FLD QWORD PTR [ESI]
1326 mov bx, word ptr [esi+8]
1327 FSTP QWORD PTR [EDI]
1328 mov word ptr [edi+8], bx
1329 add esi,eax
1330 add edi,ecx
1331 FLD QWORD PTR [ESI]
1332 mov bx, word ptr [esi+8]
1333 FSTP QWORD PTR [EDI]
1334 mov word ptr [edi+8], bx
1335 add esi,eax
1336 add edi,ecx
1337 FLD QWORD PTR [ESI]
1338 mov bx, word ptr [esi+8]
1339 FSTP QWORD PTR [EDI]
1340 mov word ptr [edi+8], bx
1341 add esi,eax
1342 add edi,ecx
1343 sub edx ,4
1344 jnz Rec10
1345 jmp BltRecEnd
1346
1347Rec10_0123:
1348 cmp edx,2
1349 jz Rec10_2
1350 jb Rec10_01
1351;3 lines left
1352 FLD QWORD PTR [ESI]
1353 mov bx, word ptr [esi+8]
1354 FSTP QWORD PTR [EDI]
1355 mov word ptr [edi+8], bx
1356 add esi,eax
1357 add edi,ecx
1358 FLD QWORD PTR [ESI]
1359 mov bx, word ptr [esi+8]
1360 FSTP QWORD PTR [EDI]
1361 mov word ptr [edi+8], bx
1362 add esi,eax
1363 add edi,ecx
1364 FLD QWORD PTR [ESI]
1365 mov bx, word ptr [esi+8]
1366 FSTP QWORD PTR [EDI]
1367 mov word ptr [edi+8], bx
1368 jmp BltRecEnd
1369Rec10_2:
1370 FLD QWORD PTR [ESI]
1371 mov bx, word ptr [esi+8]
1372 FSTP QWORD PTR [EDI]
1373 mov word ptr [edi+8], bx
1374 add esi,eax
1375 add edi,ecx
1376 FLD QWORD PTR [ESI]
1377 mov bx, word ptr [esi+8]
1378 FSTP QWORD PTR [EDI]
1379 mov word ptr [edi+8], bx
1380 jmp BltRecEnd
1381Rec10_01:
1382 test edx,edx
1383 jz BltRecEnd
1384 FLD QWORD PTR [ESI]
1385 mov bx, word ptr [esi+8]
1386 FSTP QWORD PTR [EDI]
1387 mov word ptr [edi+8], bx
1388 jmp BltRecEnd
1389
1390; 11 Pixel Wide
1391
1392Rec11:
1393 cmp edx,4
1394 jb Rec11_0123
1395 push edx
1396 FLD QWORD PTR [ESI]
1397 mov bx, word ptr [esi+8]
1398 mov dl, byte ptr [esi+10]
1399 FSTP QWORD PTR [EDI]
1400 mov word ptr [edi+8], bx
1401 mov byte ptr [edi+10], dl
1402 add esi,eax
1403 add edi,ecx
1404 FLD QWORD PTR [ESI]
1405 mov bx, word ptr [esi+8]
1406 mov dl, byte ptr [esi+10]
1407 FSTP QWORD PTR [EDI]
1408 mov word ptr [edi+8], bx
1409 mov byte ptr [edi+10], dl
1410 add esi,eax
1411 add edi,ecx
1412 FLD QWORD PTR [ESI]
1413 mov bx, word ptr [esi+8]
1414 mov dl, byte ptr [esi+10]
1415 FSTP QWORD PTR [EDI]
1416 mov word ptr [edi+8], bx
1417 mov byte ptr [edi+10], dl
1418 add esi,eax
1419 add edi,ecx
1420 FLD QWORD PTR [ESI]
1421 mov bx, word ptr [esi+8]
1422 mov dl, byte ptr [esi+10]
1423 FSTP QWORD PTR [EDI]
1424 mov word ptr [edi+8], bx
1425 mov byte ptr [edi+10], dl
1426 add esi,eax
1427 add edi,ecx
1428 pop edx
1429 sub edx ,4
1430 jnz Rec10
1431 jmp BltRecEnd
1432
1433Rec11_0123:
1434 cmp edx,2
1435 jz Rec11_2
1436 jb Rec11_01
1437;3 lines left
1438 FLD QWORD PTR [ESI]
1439 mov bx, word ptr [esi+8]
1440 mov dl, byte ptr [esi+10]
1441 FSTP QWORD PTR [EDI]
1442 mov word ptr [edi+8], bx
1443 mov byte ptr [edi+10], dl
1444 add esi,eax
1445 add edi,ecx
1446 FLD QWORD PTR [ESI]
1447 mov bx, word ptr [esi+8]
1448 mov dl, byte ptr [esi+10]
1449 FSTP QWORD PTR [EDI]
1450 mov word ptr [edi+8], bx
1451 mov byte ptr [edi+10], dl
1452 add esi,eax
1453 add edi,ecx
1454 FLD QWORD PTR [ESI]
1455 mov bx, word ptr [esi+8]
1456 mov dl, byte ptr [esi+10]
1457 FSTP QWORD PTR [EDI]
1458 mov word ptr [edi+8], bx
1459 mov byte ptr [edi+10], dl
1460 jmp BltRecEnd
1461Rec11_2:
1462 FLD QWORD PTR [ESI]
1463 mov bx, word ptr [esi+8]
1464 mov dl, byte ptr [esi+10]
1465 FSTP QWORD PTR [EDI]
1466 mov word ptr [edi+8], bx
1467 mov byte ptr [edi+10], dl
1468 add esi,eax
1469 add edi,ecx
1470 FLD QWORD PTR [ESI]
1471 mov bx, word ptr [esi+8]
1472 mov dl, byte ptr [esi+10]
1473 FSTP QWORD PTR [EDI]
1474 mov word ptr [edi+8], bx
1475 mov byte ptr [edi+10], dl
1476 jmp BltRecEnd
1477Rec11_01:
1478 test edx,edx
1479 jz BltRecEnd
1480 FLD QWORD PTR [ESI]
1481 mov bx, word ptr [esi+8]
1482 mov dl, byte ptr [esi+10]
1483 FSTP QWORD PTR [EDI]
1484 mov word ptr [edi+8], bx
1485 mov byte ptr [edi+10], dl
1486 jmp BltRecEnd
1487
1488; 12 Pixel Wide
1489
1490Rec12:
1491 cmp edx,4
1492 jb Rec12_0123
1493 FLD QWORD PTR [ESI]
1494 mov ebx, dword ptr [esi+8]
1495 FSTP QWORD PTR [EDI]
1496 mov dword ptr [edi+8], ebx
1497 add esi,eax
1498 add edi,ecx
1499 FLD QWORD PTR [ESI]
1500 mov ebx, dword ptr [esi+8]
1501 FSTP QWORD PTR [EDI]
1502 mov dword ptr [edi+8], ebx
1503 add esi,eax
1504 add edi,ecx
1505 FLD QWORD PTR [ESI]
1506 mov ebx, dword ptr [esi+8]
1507 FSTP QWORD PTR [EDI]
1508 mov dword ptr [edi+8], ebx
1509 add esi,eax
1510 add edi,ecx
1511 FLD QWORD PTR [ESI]
1512 mov ebx, dword ptr [esi+8]
1513 FSTP QWORD PTR [EDI]
1514 mov dword ptr [edi+8], ebx
1515 add esi,eax
1516 add edi,ecx
1517 sub edx ,4
1518 jnz Rec12
1519 jmp BltRecEnd
1520
1521Rec12_0123:
1522 cmp edx,2
1523 jz Rec12_2
1524 jb Rec12_01
1525;3 lines left
1526 FLD QWORD PTR [ESI]
1527 mov ebx, dword ptr [esi+8]
1528 FSTP QWORD PTR [EDI]
1529 mov dword ptr [edi+8], ebx
1530 add esi,eax
1531 add edi,ecx
1532 FLD QWORD PTR [ESI]
1533 mov ebx, dword ptr [esi+8]
1534 FSTP QWORD PTR [EDI]
1535 mov dword ptr [edi+8], ebx
1536 add esi,eax
1537 add edi,ecx
1538 FLD QWORD PTR [ESI]
1539 mov ebx, dword ptr [esi+8]
1540 FSTP QWORD PTR [EDI]
1541 mov dword ptr [edi+8], ebx
1542 jmp BltRecEnd
1543Rec12_2:
1544 FLD QWORD PTR [ESI]
1545 mov ebx, dword ptr [esi+8]
1546 FSTP QWORD PTR [EDI]
1547 mov dword ptr [edi+8], ebx
1548 add esi,eax
1549 add edi,ecx
1550 FLD QWORD PTR [ESI]
1551 mov ebx, dword ptr [esi+8]
1552 FSTP QWORD PTR [EDI]
1553 mov dword ptr [edi+8], ebx
1554 jmp BltRecEnd
1555Rec12_01:
1556 test edx,edx
1557 jz BltRecEnd
1558 FLD QWORD PTR [ESI]
1559 mov ebx, dword ptr [esi+8]
1560 FSTP QWORD PTR [EDI]
1561 mov dword ptr [edi+8], ebx
1562 jmp BltRecEnd
1563
1564; 13 Pixel Wide
1565
1566Rec13:
1567 cmp edx,4
1568 jb Rec13_0123
1569 push edx
1570 FLD QWORD PTR [ESI]
1571 mov ebx, dword ptr [esi+8]
1572 mov dl, byte ptr [esi+12]
1573 FSTP QWORD PTR [EDI]
1574 mov dword ptr [edi+8], ebx
1575 mov byte ptr [edi+12], dl
1576 add esi,eax
1577 add edi,ecx
1578 FLD QWORD PTR [ESI]
1579 mov ebx, dword ptr [esi+8]
1580 mov dl, byte ptr [esi+12]
1581 FSTP QWORD PTR [EDI]
1582 mov dword ptr [edi+8], ebx
1583 mov byte ptr [edi+12], dl
1584 add esi,eax
1585 add edi,ecx
1586 FLD QWORD PTR [ESI]
1587 mov ebx, dword ptr [esi+8]
1588 mov dl, byte ptr [esi+12]
1589 FSTP QWORD PTR [EDI]
1590 mov dword ptr [edi+8], ebx
1591 mov byte ptr [edi+12], dl
1592 add esi,eax
1593 add edi,ecx
1594 FLD QWORD PTR [ESI]
1595 mov ebx, dword ptr [esi+8]
1596 mov dl, byte ptr [esi+12]
1597 FSTP QWORD PTR [EDI]
1598 mov dword ptr [edi+8], ebx
1599 mov byte ptr [edi+12], dl
1600 add esi,eax
1601 add edi,ecx
1602 pop edx
1603 sub edx ,4
1604 jnz Rec13
1605 jmp BltRecEnd
1606
1607Rec13_0123:
1608 cmp edx,2
1609 jz Rec13_2
1610 jb Rec13_01
1611;3 lines left
1612 FLD QWORD PTR [ESI]
1613 mov ebx, dword ptr [esi+8]
1614 mov dl, byte ptr [esi+12]
1615 FSTP QWORD PTR [EDI]
1616 mov dword ptr [edi+8], ebx
1617 mov byte ptr [edi+12], dl
1618 add esi,eax
1619 add edi,ecx
1620 FLD QWORD PTR [ESI]
1621 mov ebx, dword ptr [esi+8]
1622 mov dl, byte ptr [esi+12]
1623 FSTP QWORD PTR [EDI]
1624 mov dword ptr [edi+8], ebx
1625 mov byte ptr [edi+12], dl
1626 add esi,eax
1627 add edi,ecx
1628 FLD QWORD PTR [ESI]
1629 mov ebx, dword ptr [esi+8]
1630 mov dl, byte ptr [esi+12]
1631 FSTP QWORD PTR [EDI]
1632 mov dword ptr [edi+8], ebx
1633 mov byte ptr [edi+12], dl
1634 jmp BltRecEnd
1635Rec13_2:
1636 FLD QWORD PTR [ESI]
1637 mov ebx, dword ptr [esi+8]
1638 mov dl, byte ptr [esi+12]
1639 FSTP QWORD PTR [EDI]
1640 mov dword ptr [edi+8], ebx
1641 mov byte ptr [edi+12], dl
1642 add esi,eax
1643 add edi,ecx
1644 FLD QWORD PTR [ESI]
1645 mov ebx, dword ptr [esi+8]
1646 mov dl, byte ptr [esi+12]
1647 FSTP QWORD PTR [EDI]
1648 mov dword ptr [edi+8], ebx
1649 mov byte ptr [edi+12], dl
1650 jmp BltRecEnd
1651Rec13_01:
1652 test edx,edx
1653 jz BltRecEnd
1654 FLD QWORD PTR [ESI]
1655 mov ebx, dword ptr [esi+8]
1656 mov dl, byte ptr [esi+12]
1657 FSTP QWORD PTR [EDI]
1658 mov dword ptr [edi+8], ebx
1659 mov byte ptr [edi+12], dl
1660 jmp BltRecEnd
1661
1662; 14 Pixel Wide
1663
1664Rec14:
1665 cmp edx,4
1666 jb Rec14_0123
1667 push edx
1668 FLD QWORD PTR [ESI]
1669 mov ebx, dword ptr [esi+8]
1670 mov dx, word ptr [esi+12]
1671 FSTP QWORD PTR [EDI]
1672 mov dword ptr [edi+8], ebx
1673 mov word ptr [edi+12], dx
1674 add esi,eax
1675 add edi,ecx
1676 FLD QWORD PTR [ESI]
1677 mov ebx, dword ptr [esi+8]
1678 mov dx, word ptr [esi+12]
1679 FSTP QWORD PTR [EDI]
1680 mov dword ptr [edi+8], ebx
1681 mov word ptr [edi+12], dx
1682 add esi,eax
1683 add edi,ecx
1684 FLD QWORD PTR [ESI]
1685 mov ebx, dword ptr [esi+8]
1686 mov dx, word ptr [esi+12]
1687 FSTP QWORD PTR [EDI]
1688 mov dword ptr [edi+8], ebx
1689 mov word ptr [edi+12], dx
1690 add esi,eax
1691 add edi,ecx
1692 FLD QWORD PTR [ESI]
1693 mov ebx, dword ptr [esi+8]
1694 mov dx, word ptr [esi+12]
1695 FSTP QWORD PTR [EDI]
1696 mov dword ptr [edi+8], ebx
1697 mov word ptr [edi+12], dx
1698 add esi,eax
1699 add edi,ecx
1700 pop edx
1701 sub edx ,4
1702 jnz Rec14
1703 jmp BltRecEnd
1704
1705Rec14_0123:
1706 cmp edx,2
1707 jz Rec14_2
1708 jb Rec14_01
1709;3 lines left
1710 FLD QWORD PTR [ESI]
1711 mov ebx, dword ptr [esi+8]
1712 mov dx, word ptr [esi+12]
1713 FSTP QWORD PTR [EDI]
1714 mov dword ptr [edi+8], ebx
1715 mov word ptr [edi+12], dx
1716 add esi,eax
1717 add edi,ecx
1718 FLD QWORD PTR [ESI]
1719 mov ebx, dword ptr [esi+8]
1720 mov dx, word ptr [esi+12]
1721 FSTP QWORD PTR [EDI]
1722 mov dword ptr [edi+8], ebx
1723 mov word ptr [edi+12], dx
1724 add esi,eax
1725 add edi,ecx
1726 FLD QWORD PTR [ESI]
1727 mov ebx, dword ptr [esi+8]
1728 mov dx, word ptr [esi+12]
1729 FSTP QWORD PTR [EDI]
1730 mov dword ptr [edi+8], ebx
1731 mov word ptr [edi+12], dx
1732 jmp BltRecEnd
1733Rec14_2:
1734 FLD QWORD PTR [ESI]
1735 mov ebx, dword ptr [esi+8]
1736 mov dx, word ptr [esi+12]
1737 FSTP QWORD PTR [EDI]
1738 mov dword ptr [edi+8], ebx
1739 mov word ptr [edi+12], dx
1740 add esi,eax
1741 add edi,ecx
1742 FLD QWORD PTR [ESI]
1743 mov ebx, dword ptr [esi+8]
1744 mov dx, word ptr [esi+12]
1745 FSTP QWORD PTR [EDI]
1746 mov dword ptr [edi+8], ebx
1747 mov word ptr [edi+12], dx
1748 jmp BltRecEnd
1749Rec14_01:
1750 test edx,edx
1751 jz BltRecEnd
1752 FLD QWORD PTR [ESI]
1753 mov ebx, dword ptr [esi+8]
1754 mov dx, word ptr [esi+12]
1755 FSTP QWORD PTR [EDI]
1756 mov dword ptr [edi+8], ebx
1757 mov word ptr [edi+12], dx
1758 jmp BltRecEnd
1759
1760; 15 Pixel Wide
1761
1762Rec15:
1763 cmp edx,4
1764 jb Rec15_0123
1765 push edx
1766 FLD QWORD PTR [ESI]
1767 mov ebx, dword ptr [esi+8]
1768 mov dx, word ptr [esi+12]
1769 FSTP QWORD PTR [EDI]
1770 mov dword ptr [edi+8], ebx
1771 mov bl, byte ptr[esi+14]
1772 mov word ptr [edi+12], dx
1773 add esi,eax
1774 mov byte ptr[edi+14], bl
1775 add edi,ecx
1776 FLD QWORD PTR [ESI]
1777 mov ebx, dword ptr [esi+8]
1778 mov dx, word ptr [esi+12]
1779 FSTP QWORD PTR [EDI]
1780 mov dword ptr [edi+8], ebx
1781 mov bl, byte ptr[esi+14]
1782 mov word ptr [edi+12], dx
1783 add esi,eax
1784 mov byte ptr[edi+14], bl
1785 add edi,ecx
1786 FLD QWORD PTR [ESI]
1787 mov ebx, dword ptr [esi+8]
1788 mov dx, word ptr [esi+12]
1789 FSTP QWORD PTR [EDI]
1790 mov dword ptr [edi+8], ebx
1791 mov bl, byte ptr[esi+14]
1792 mov word ptr [edi+12], dx
1793 add esi,eax
1794 mov byte ptr[edi+14], bl
1795 add edi,ecx
1796 FLD QWORD PTR [ESI]
1797 mov ebx, dword ptr [esi+8]
1798 mov dx, word ptr [esi+12]
1799 FSTP QWORD PTR [EDI]
1800 mov dword ptr [edi+8], ebx
1801 mov bl, byte ptr[esi+14]
1802 mov word ptr [edi+12], dx
1803 add esi,eax
1804 mov byte ptr[edi+14], bl
1805 add edi,ecx
1806 pop edx
1807 sub edx ,4
1808 jnz Rec15
1809 jmp BltRecEnd
1810
1811Rec15_0123:
1812 cmp edx,2
1813 jz Rec15_2
1814 jb Rec15_01
1815;3 lines left
1816 FLD QWORD PTR [ESI]
1817 mov ebx, dword ptr [esi+8]
1818 mov dx, word ptr [esi+12]
1819 FSTP QWORD PTR [EDI]
1820 mov dword ptr [edi+8], ebx
1821 mov bl, byte ptr[esi+14]
1822 mov word ptr [edi+12], dx
1823 add esi,eax
1824 mov byte ptr[edi+14], bl
1825 add edi,ecx
1826 FLD QWORD PTR [ESI]
1827 mov ebx, dword ptr [esi+8]
1828 mov dx, word ptr [esi+12]
1829 FSTP QWORD PTR [EDI]
1830 mov dword ptr [edi+8], ebx
1831 mov bl, byte ptr[esi+14]
1832 mov word ptr [edi+12], dx
1833 add esi,eax
1834 mov byte ptr[edi+14], bl
1835 add edi,ecx
1836 FLD QWORD PTR [ESI]
1837 mov ebx, dword ptr [esi+8]
1838 mov dx, word ptr [esi+12]
1839 FSTP QWORD PTR [EDI]
1840 mov dword ptr [edi+8], ebx
1841 mov bl, byte ptr[esi+14]
1842 mov word ptr [edi+12], dx
1843 mov byte ptr[edi+14], bl
1844 jmp BltRecEnd
1845Rec15_2:
1846 FLD QWORD PTR [ESI]
1847 mov ebx, dword ptr [esi+8]
1848 mov dx, word ptr [esi+12]
1849 FSTP QWORD PTR [EDI]
1850 mov dword ptr [edi+8], ebx
1851 mov bl, byte ptr[esi+14]
1852 mov word ptr [edi+12], dx
1853 add esi,eax
1854 mov byte ptr[edi+14], bl
1855 add edi,ecx
1856 FLD QWORD PTR [ESI]
1857 mov ebx, dword ptr [esi+8]
1858 mov dx, word ptr [esi+12]
1859 FSTP QWORD PTR [EDI]
1860 mov dword ptr [edi+8], ebx
1861 mov bl, byte ptr[esi+14]
1862 mov word ptr [edi+12], dx
1863 mov byte ptr[edi+14], bl
1864 jmp BltRecEnd
1865Rec15_01:
1866 test edx,edx
1867 jz BltRecEnd
1868 FLD QWORD PTR [ESI]
1869 mov ebx, dword ptr [esi+8]
1870 mov dx, word ptr [esi+12]
1871 FSTP QWORD PTR [EDI]
1872 mov dword ptr [edi+8], ebx
1873 mov bl, byte ptr[esi+14]
1874 mov word ptr [edi+12], dx
1875 mov byte ptr[edi+14], bl
1876 jmp BltRecEnd
1877
1878
1879ComplexBlt:
1880 ; Blit first the even rect then the rest
1881
1882 push dword ptr [ebp+28] ; ulSrcPitch
1883 push dword ptr [ebp+24] ; ulDestPitch
1884 push edx
1885 shl ecx,4
1886 push ecx
1887 push esi
1888 push edi
1889 call _BltRec
1890 sub esp,24
1891 add esi,ecx
1892 add edi,ecx
1893 push dword ptr [ebp+28] ; ulSrcPitch
1894 push dword ptr [ebp+24] ; ulDestPitch
1895 push edx
1896 push ebx
1897 push esi
1898 push edi
1899 call _BltRec
1900 sub esp,24
1901
1902BltRecEnd:
1903 pop edx
1904 pop ecx
1905 pop ebx
1906 pop eax
1907 pop esi
1908 pop edi
1909 pop ebp
1910 ret
1911_BltRec ENDP
1912
1913
1914 PUBLIC _CPUHasMMX
1915;
1916; int __cdecl CPUHasMMX()
1917; returns:
1918; 0 = NoMMX
1919; 1 = MMX
1920; 2 = MMX+CMov instuction
1921
1922_CPUHasMMX PROC NEAR
1923 push ebp
1924 mov ebp, esp
1925 push edi
1926 push esi
1927 push ebx
1928 push ecx
1929 push edx
1930
1931 pushfd
1932 pop eax
1933 mov ebx ,eax
1934 xor eax, 00200000h
1935 push eax
1936 popfd
1937 pushfd
1938 pop eax
1939 sub eax,ebx
1940 jz Return ; No CPUID => No MMX => return 0 in eax;
1941 mov eax, 1
1942 CPUID
1943 test edx,00800000h ; MMX Bit Set ?
1944 jz Return
1945 mov eax, 1
1946 test edx,00008000h ; Conditonal Mov Bit Set ?
1947 jz Return
1948 inc eax
1949Return:
1950; mov eax, 0 ; pretend no MMX is available
1951 pop edx
1952 pop ecx
1953 pop ebx
1954 pop esi
1955 pop edi
1956 pop ebp
1957 ret
1958_CPUHasMMX ENDP
1959
1960 PUBLIC _MemFlip
1961
1962;
1963; memcpy via FLD / FSTP MMX might even be faster but
1964; not present on every system
1965; to maximize the speed we copy 64 bytes in each loop
1966; and after the loop the rest left
1967;
1968;
1969;void __cdecl MemFlip(PBYTE dest, PBYTE src, ULONG Size);
1970
1971_MemFlip PROC NEAR
1972 push ebp
1973 mov ebp, esp
1974 push edi
1975 push esi
1976 push eax
1977 push ebx
1978 push ecx
1979
1980 mov eax , dword ptr [ebp+16] ; Size of Buffer
1981 mov edi , dword ptr [ebp+8] ; Destination
1982 mov ebx , eax
1983 mov esi , dword ptr [ebp+12] ; SourcePointer
1984
1985 and ebx , 0000003Fh ; Calc leftover bytes
1986 shr eax , 5 ; Calc Loops
1987
1988 jz COPYREMAIN ; Less then 64 to copy
1989ALIGN 4
1990
1991Loop64:
1992 FLD QWORD PTR [ESI] ; 1
1993 FLD QWORD PTR [ESI+8] ; 2
1994 FXCH ; Doesn't take any clocks
1995 FSTP QWORD PTR [EDI] ; 3,4 Clocks
1996 FSTP QWORD PTR [EDI+8] ; 5,6
1997 ADD ESI,16 ; 7 U Integer instruction can be executed parallel
1998 ADD EDI,16 ; 7 V Total clocks for copying 16 byte 7 clocks Rep Movs needs 20! for each 16 byte + 13 setup
1999 FLD QWORD PTR [ESI]
2000 FLD QWORD PTR [ESI+8]
2001 FXCH
2002 FSTP QWORD PTR [EDI]
2003 FSTP QWORD PTR [EDI+8]
2004 ADD ESI,16
2005 ADD EDI,16
2006 FLD QWORD PTR [ESI]
2007 FLD QWORD PTR [ESI+8]
2008 FXCH
2009 FSTP QWORD PTR [EDI]
2010 FSTP QWORD PTR [EDI+8]
2011 ADD ESI,16
2012 ADD EDI,16
2013 FLD QWORD PTR [ESI]
2014 FLD QWORD PTR [ESI+8]
2015 FXCH
2016 FSTP QWORD PTR [EDI]
2017 FSTP QWORD PTR [EDI+8]
2018 ADD ESI,16
2019 ADD EDI,16
2020 inc eax
2021 jnz Loop64
2022
2023COPYREMAIN:
2024 test ebx, ebx ; something left ?
2025 jz EndOffFlip
2026
2027 test ebx, 00000020h; at least 32 bytes left ?
2028 jz Test16
2029
2030 FLD QWORD PTR [ESI]
2031 FLD QWORD PTR [ESI+8]
2032 FXCH
2033 FSTP QWORD PTR [EDI]
2034 FSTP QWORD PTR [EDI+8]
2035 ADD ESI,16
2036 ADD EDI,16
2037 FLD QWORD PTR [ESI]
2038 FLD QWORD PTR [ESI+8]
2039 FXCH
2040 FSTP QWORD PTR [EDI]
2041 FSTP QWORD PTR [EDI+8]
2042 ADD ESI,16
2043 ADD EDI,16
2044 sub ebx, 00000020h
2045 jz EndOffFlip
2046
2047Test16:
2048
2049 test ebx, 00000010h; at least 16 bytes left ?
2050 jb Test8
2051
2052 FLD QWORD PTR [ESI]
2053 FLD QWORD PTR [ESI+8]
2054 FXCH
2055 FSTP QWORD PTR [EDI]
2056 FSTP QWORD PTR [EDI+8]
2057 ADD ESI,16
2058 ADD EDI,16
2059 sub ebx, 00000010h
2060 jz EndOffFlip
2061Test8:
2062
2063 test ebx, 00000008h; at least 8 bytes left ?
2064 jb Test4
2065 mov eax,[esi]
2066 mov ecx,[esi+4]
2067 mov [edi],eax
2068 mov [edi+4],ecx
2069 add esi, 8
2070 add edi, 8
2071 sub ebx, 8
2072 jz EndOffFlip
2073
2074Test4:
2075 test ebx, 00000004h; at least 4 bytes left ?
2076 jb Test2
2077 mov eax,[esi]
2078 sub ebx, 4
2079 mov [edi],eax
2080 add esi, 4
2081 add edi, 4
2082 test ebx, ebx ; something left ?
2083 jz EndOffFlip
2084
2085Test2:
2086 test ebx, 00000002h
2087 jb Copy1
2088 mov ax,[esi]
2089 sub ebx,2
2090 mov [edi],ax
2091 add esi,2
2092 add edi,2
2093 test ebx,ebx
2094 jz EndOffFlip
2095
2096Copy1:
2097 mov al,[esi]
2098 mov [edi],al
2099
2100EndOffFlip:
2101 pop ecx
2102 pop ebx
2103 pop eax
2104 pop esi
2105 pop edi
2106 pop ebp
2107 ret
2108_MemFlip ENDP
2109
2110; void _Optlink DDrawRGB555to565 (WORD *dest, WORD *src, ULONG num);
2111
2112 PUBLIC DDrawRGB555to565
2113
2114DDrawRGB555to565 PROC NEAR
2115
2116 push esi
2117 push edi
2118 cld
2119
2120 mov edi, eax ; _Optlink arg1 = EAX
2121 mov esi, edx ; _Optlink arg2 = EDX
2122 shr ecx, 1 ; _Optlink arg3 = ECX
2123 pushf
2124cvt:
2125 mov eax, [esi]
2126 mov edx, eax
2127
2128 add esi, 4
2129 shl eax, 1
2130
2131 and edx, 0001F001Fh
2132 and eax, 0FFC0FFC0h
2133
2134 add edi, 4
2135 or eax, edx
2136
2137 mov [edi-4], eax
2138 loop cvt
2139
2140 popf
2141 jnc SHORT done
2142
2143 mov ax, [esi]
2144 mov dx, ax
2145 and dx, 0001Fh
2146 shl ax, 1
2147 and ax, 0FFC0h
2148 or ax, dx
2149 mov [edi], ax
2150
2151done:
2152 pop edi
2153 pop esi
2154 ret
2155
2156DDrawRGB555to565 ENDP
2157
2158 PUBLIC DDrawRGB565to555
2159
2160DDrawRGB565to555 PROC NEAR
2161
2162 push esi
2163 push edi
2164 cld
2165
2166 cmp ecx, 0
2167 jz done
2168
2169 mov edi, eax ; _Optlink arg1 = EAX
2170 mov esi, edx ; _Optlink arg2 = EDX
2171 shr ecx, 1 ; _Optlink arg3 = ECX
2172 pushf
2173cvt:
2174 mov eax, [esi]
2175 mov edx, eax
2176
2177 add esi, 4
2178 shr eax, 1
2179
2180 and edx, 0001F001Fh
2181 and eax, 0FFE07FE0h
2182
2183 add edi, 4
2184 or eax, edx
2185
2186 mov [edi-4], eax
2187 loop cvt
2188
2189 popf
2190 jnc SHORT done
2191
2192 mov ax, [esi]
2193 mov dx, ax
2194 and dx, 0001Fh
2195 shr ax, 1
2196 and ax, 0FFE0h
2197 or ax, dx
2198 mov [edi], ax
2199
2200done:
2201 pop edi
2202 pop esi
2203 ret
2204
2205DDrawRGB565to555 ENDP
2206
2207; void _Optlink DDrawRGB555to565MMX(WORD *dest, WORD *src, ULONG num);
2208
2209 PUBLIC DDrawRGB555to565MMX
2210
2211DDrawRGB555to565MMX PROC NEAR
2212 push esi
2213 push edi
2214 cld
2215
2216 cmp ecx, 0
2217 jz done
2218
2219 push ecx
2220
2221 mov edi, eax ; _Optlink arg1 = EAX
2222 mov esi, edx ; _Optlink arg2 = EDX
2223 shr ecx, 3 ; _Optlink arg3 = ECX
2224 jz lastpixels
2225
2226 sub esp, 108
2227 fsaved dword ptr [esp]
2228
2229 movq mm2, qword ptr and1mask ; 0001F001F001F001Fh
2230 movq mm3, qword ptr and2mask ; FFC0FFC00FFC0FFC0h
2231
2232cvt:
2233 movq mm0, qword ptr [esi]
2234 add edi, 16
2235
2236 movq mm4, qword ptr [esi+8]
2237 movq mm1, mm0
2238
2239 movq mm5, mm4
2240 psllq mm0, 1
2241
2242 psllq mm4, 1
2243 pand mm1, mm2
2244
2245 pand mm0, mm3
2246 pand mm5, mm2
2247
2248 pand mm4, mm3
2249 por mm0, mm1
2250
2251 por mm4, mm5
2252 add esi, 16
2253
2254 movq qword ptr [edi-16], mm0
2255 dec ecx
2256
2257 movq qword ptr [edi-8], mm4
2258 jnz cvt
2259
2260 nop
2261 nop
2262
2263 frstord dword ptr [esp]
2264 add esp, 108
2265
2266lastpixels:
2267 pop ecx
2268 and ecx, 3
2269 jz short done
2270
2271cvt2loop:
2272 mov ax, [esi]
2273 mov dx, ax
2274
2275 add esi, 2
2276 and dx, 001Fh
2277
2278 shl ax, 1
2279 add edi, 2
2280
2281 and ax, 0FFC0h
2282 or ax, dx
2283
2284 mov [edi-2], ax
2285 loop cvt2loop
2286
2287done:
2288 pop edi
2289 pop esi
2290 ret
2291
2292DDrawRGB555to565MMX ENDP
2293
2294
2295; void _Optlink DDrawRGB565to555MMX(WORD *dest, WORD *src, ULONG num);
2296
2297 PUBLIC DDrawRGB565to555MMX
2298
2299DDrawRGB565to555MMX PROC NEAR
2300 push esi
2301 push edi
2302 cld
2303
2304 cmp ecx, 0
2305 jz done
2306
2307 push ecx
2308
2309 mov edi, eax ; _Optlink arg1 = EAX
2310 mov esi, edx ; _Optlink arg2 = EDX
2311 shr ecx, 3 ; _Optlink arg3 = ECX
2312 jz lastpixels
2313
2314 sub esp, 108
2315 fsaved dword ptr [esp]
2316
2317 movq mm2, qword ptr and1mask ; 0001F001F001F001Fh
2318 movq mm3, qword ptr and2mask565 ; FFE07FE007FE07FE0h
2319
2320cvt:
2321 movq mm0, qword ptr [esi]
2322 add edi, 16
2323
2324 movq mm4, qword ptr [esi+8]
2325 movq mm1, mm0
2326
2327 movq mm5, mm4
2328 psrlq mm0, 1
2329
2330 psrlq mm4, 1
2331 pand mm1, mm2
2332
2333 pand mm0, mm3
2334 pand mm5, mm2
2335
2336 pand mm4, mm3
2337 por mm0, mm1
2338
2339 por mm4, mm5
2340 add esi, 16
2341
2342 movq qword ptr [edi-16], mm0
2343 dec ecx
2344
2345 movq qword ptr [edi-8], mm4
2346 jnz cvt
2347
2348 nop
2349 nop
2350
2351 frstord dword ptr [esp]
2352 add esp, 108
2353
2354lastpixels:
2355 pop ecx
2356 and ecx, 3
2357 jz short done
2358
2359cvt2loop:
2360 mov ax, [esi]
2361 mov dx, ax
2362
2363 add esi, 2
2364 and dx, 001Fh
2365
2366 shr ax, 1
2367 add edi, 2
2368
2369 and ax, 0FFE0h
2370 or ax, dx
2371
2372 mov [edi-2], ax
2373 loop cvt2loop
2374
2375done:
2376 pop edi
2377 pop esi
2378 ret
2379
2380DDrawRGB565to555MMX ENDP
2381
2382CODE32 ENDS
2383
2384 END
Note: See TracBrowser for help on using the repository browser.