source: trunk/src/ddraw/asmutil.asm@ 8819

Last change on this file since 8819 was 8819, checked in by sandervl, 23 years ago

* empty log message *

File size: 44.2 KB
Line 
1; $Id: asmutil.asm,v 1.10 2002-07-01 19:16:20 sandervl Exp $
2
3;
4; asmutil.asm Color key bit blitting for DirectDraw
5;
6; Copyright 1998 Sander van Leeuwen
7; 1999 Markus Montkowski
8; Copyright 2000 Daniela Engert (dani@ngrt.de)
9;
10; Project Odin Software License can be found in LICENSE.TXT
11;
12
13 NAME asmutil
14.586p
15.MMX
16
17CODE32 SEGMENT DWORD USE32 PUBLIC 'CODE'
18CODE32 ENDS
19DATA32 SEGMENT DWORD USE32 PUBLIC 'DATA'
20DATA32 ENDS
21CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
22CONST32 ENDS
23BSS32 SEGMENT DWORD USE32 PUBLIC 'BSS'
24BSS32 ENDS
25DGROUP GROUP CONST32, BSS32, DATA32
26 ASSUME CS:FLAT, DS:FLAT, SS:FLAT, ES:FLAT
27 DATA32 SEGMENT
28 DATA32 ENDS
29 BSS32 SEGMENT
30 BSS32 ENDS
31 CONST32 SEGMENT
32 CONST32 ENDS
33
34DATA32 SEGMENT
35
36 align 4
37 and1mask dd 0001F001Fh
38 dd 0001F001Fh
39 and2mask dd 0FFC0FFC0h
40 dd 0FFC0FFC0h
41 and2mask565 dd 0FFE07FE0h
42 dd 07FE07FE0h
43
44DATA32 ENDS
45
46CODE32 SEGMENT
47
48 PUBLIC _BlitColorKey8
49
50; endpos = destbuf + blitlinesize;
51; while(destbuf < endpos) {
52; if(*srcbuf == colorkey) {
53; destbuf++;
54; }
55; else *destbuf++ = *srcbuf;
56; srcbuf++;
57; }
58; destbuf += (destscanlinesize-blitlinesize);
59; srcbuf += (srcscanlinesize-blitlinesize);
60;void BlitColorKey8(char *dest, char *src, ULONG key, ULONG linesize)
61_BlitColorKey8 PROC NEAR
62 push ebp
63 mov ebp, esp
64 push edi
65 push esi
66 push eax
67 push ebx
68 push ecx
69 push edx
70
71 mov edi, dword ptr [ebp+8] ;dest
72 mov esi, dword ptr [ebp+12] ;src
73 mov ecx, dword ptr [ebp+20] ;linesize
74 mov edx, dword ptr [ebp+16] ;colorkey
75
76 and ecx, 3
77 mov dh , dl
78 push ecx ;do the remaining bytes afterwards
79 mov eax, edx
80 shl edx, 16
81 and eax, 0000FFFFh
82 mov ecx, dword ptr [ebp+20] ;linesize
83 or edx, eax ; edx now contains the colorkey in each byte
84 shr ecx, 2 ;linesize in dwords
85 jz blitremain ; less then 4 bytes
86 jmp blitStart
87blitloop:
88 add esi, 4
89 add edi, 4
90blitStart:
91 mov ebx, dword ptr [esi]
92 mov eax, dword ptr [edi]
93 cmp ebx, edx ; All 4 bytes transparent?
94 jz TTTT
95 cmp bx, dx ; lower 2 bytes transparent?
96 jz XXTT
97 cmp bl, dl ; lower byte transparent?
98 jz XXOT
99 mov al, bl
100 cmp bh, dh ; upper Byte transparent then skip copy
101 jz XXTT
102XXOT:
103 mov ah, bh
104XXTT: ; handle upper 2 pixel
105 ror eax, 16
106 ror ebx, 16
107 cmp bx, dx
108 jz skipbyte4
109 cmp bl, dl
110 je skipbyte3
111 mov al, bl
112skipbyte3:
113 cmp bh, dh
114 je skipbyte4
115 mov ah, bh
116skipbyte4:
117 ror eax, 16
118 mov dword ptr [edi], eax
119TTTT:
120 dec ecx
121 jnz blitloop
122blitremain:
123 pop ecx
124 cmp ecx, 2
125 ja blit3
126 jz blit2
127 test ecx,ecx
128 jz endofblit
129 mov eax, dword ptr [esi]
130 mov ebx, dword ptr [edi]
131 cmp al,dl
132 jz endofblit
133 mov bl,al
134 mov dword ptr[edi],ebx
135 jmp endofblit
136
137blit3:
138 mov eax, dword ptr [esi]
139 mov ebx, dword ptr [edi]
140 cmp ax, dx
141 jz TTX
142 cmp ah, dh
143 jz TXX
144 mov bh, ah
145TXX:
146 cmp al, dl
147 jz TTX
148 mov bl, al
149TTX:
150 ror eax, 16
151 ror ebx, 16
152 cmp al, dl
153 jz Cpyback
154 mov bl, al
155Cpyback:
156 ror ebx, 16
157 mov dword ptr [edi], ebx
158 jmp endofblit
159
160blit2:
161 mov eax, dword ptr [esi]
162 mov ebx, dword ptr [edi]
163 cmp ax, dx ; both bytes transparent ?
164 jz endofblit
165 cmp ah, dh
166 jz TX
167 mov bh, ah
168TX:
169 cmp al, dl
170 jz OT
171 mov bl, al
172OT:
173 mov dword ptr[edi], ebx
174
175endofblit:
176 pop edx
177 pop ecx
178 pop ebx
179 pop eax
180 pop esi
181 pop edi
182 pop ebp
183 ret
184_BlitColorKey8 ENDP
185
186 PUBLIC _BlitColorKey16
187
188; endpos = destbuf + blitlinesize;
189; while(destbuf < endpos) {
190; if(*srcbuf == colorkey) {
191; destbuf++;
192; }
193; else *destbuf++ = *srcbuf;
194; srcbuf++;
195; }
196; destbuf += (destscanlinesize-blitlinesize);
197; srcbuf += (srcscanlinesize-blitlinesize);
198;void BlitColorKey16(char *dest, char *src, ULONG key, ULONG linesize)
199_BlitColorKey16 PROC NEAR
200 push ebp
201 mov ebp, esp
202 push edi
203 push esi
204 push eax
205 push ebx
206 push ecx
207 push edx
208
209 mov edi, dword ptr [ebp+8] ;dest
210 mov esi, dword ptr [ebp+12] ;src
211 mov ecx, dword ptr [ebp+20] ;linesize
212 mov edx, dword ptr [ebp+16] ;colorkey
213
214 mov eax, edx
215 shl edx, 16;
216 and eax, 0000FFFFh
217 or edx,eax ; create dwColorKey
218 shr ecx, 1 ; linesize in dwords
219 jz OnePixel ; FIXME: BUG if ecx was really 1!
220
221blitloop16:
222 mov eax, dword ptr [esi]
223 mov ebx, dword ptr [edi]
224 add esi, 4
225 cmp eax, edx ; are both pixel transparent?
226 je LoopUp ; Yes, then Jump to loopend
227 cmp ax, dx ; Is lower pixel transparent
228 je DrawOT ; Yes So We got OT (OPAQUE/Transparent
229 mov bx, ax ; No so copy the lower pixel
230DrawOT:
231 ror eax, 16 ;
232 cmp ax, dx ; Is higher pixel transparent
233 je CopyBack ;
234 mov bx, ax
235CopyBack:
236 ror ebx,16
237 mov dword ptr[edi], ebx ; copy back the result in ebx
238LoopUp:
239 mov ebx, dword ptr [ebp+20] ; V load this this in case we are done
240 add edi, 4 ; U
241OnePixel:
242 dec ecx
243 jnz blitloop16
244 test ebx, 1 ; Do we have an odd linesize
245 jz endofblit16
246 mov eax, dword ptr [esi]
247 mov ebx, dword ptr [edi]
248 cmp ax, dx
249 je endofblit16 ; last pixel is transparent
250 mov bx,ax ; No so copy the lower pixel
251 mov dword ptr [edi], ebx ; copy back the result in ebx
252
253endofblit16:
254 pop edx
255 pop ecx
256 pop ebx
257 pop eax
258 pop esi
259 pop edi
260 pop ebp
261 ret
262_BlitColorKey16 ENDP
263
264
265 PUBLIC _BlitColorKey8MMX
266; Now the same as BlitColorKey8 now with MMX
267;void BlitColorKey8MMX(char *dest, char *src, ULONG key, ULONG linesize)
268
269_BlitColorKey8MMX PROC NEAR
270 push ebp
271 mov ebp, esp
272 push edi
273 push esi
274 push eax
275 push ebx
276 push ecx
277 push edx
278
279 mov edx, [ebp+16] ;colorkey (in dl)
280 mov edi, [ebp+8] ;dest
281 mov esi, [ebp+12] ;src
282 mov ecx, dword ptr [ebp+20] ;linesize
283 mov dh,dl
284 mov eax,edx
285 shl edx,16
286 mov dx,ax
287 movd mm4,edx
288 movd mm5,edx
289 psllq mm4,32
290 por mm4,mm5
291 shr ecx,3
292 jz BltRemain8
293
294bltLoopMMX8:
295 movq mm0, [esi] ; get source qword
296 movq mm1, [edi] ; get dest qword
297 movq mm2, mm0 ; copy source
298 pcmpeqb mm0, mm4 ; create mask
299 pand mm1, mm0 ; mask dest
300 pandn mm0, mm2 ; NOT mask AND source
301 por mm1, mm0 ; or them
302 movq qword ptr [edi], mm1 ; write back result
303 add esi, 8
304 add edi, 8
305 dec ecx
306 jnz bltLoopMMX8
307BltRemain8:
308 mov eax, dword ptr [ebp+20];
309 and eax, 7
310 jmp ds:JmpTable[eax*4]
311
312align 4
313
314JmpTable:
315 dd offset cs:bltEndMMX8
316 dd offset cs:blt1MMX8
317 dd offset cs:blt2MMX8
318 dd offset cs:blt3MMX8
319 dd offset cs:blt4MMX8
320 dd offset cs:blt5MMX8
321 dd offset cs:blt6MMX8
322 dd offset cs:blt7MMX8
323align 2
324;
325; Maybe it would be faster for 7-5 to load a qword into mm0/mm1
326; but we might cross a page and so I guess this is saver
327;
328blt7MMX8:
329 movd mm0, dword ptr[esi]
330 mov ax, word ptr[esi+4]
331 mov bx, word ptr[edi+4]
332 movd mm1, dword ptr[edi]
333 psllq mm0, 32
334 shl eax, 8
335 shl ebx, 8
336 mov al, byte ptr[esi+6]
337 mov bl, byte ptr[edi+6]
338 movd mm5, eax
339 por mm0, mm5
340 psllq mm1, 32
341 movd mm6, ebx
342 por mm1, mm6
343 movq mm2, mm0 ; copy source
344 pcmpeqb mm0, mm4 ; create mask
345 pand mm1, mm0 ; mask dest
346 pandn mm0, mm2 ; mask source
347 por mm1, mm0 ; or them
348 movd eax, mm1
349 psrlq mm1,32
350 mov byte ptr[edi+6], al
351 movd dword ptr[edi], mm1
352 shr eax,8
353 mov word ptr[edi+4], ax
354 jmp bltEndMMX8
355
356blt6MMX8:
357 movd mm0, dword ptr[esi]
358 mov ax, word ptr[esi+4]
359 mov bx, word ptr[edi+4]
360 movd mm1, dword ptr[edi]
361 psllq mm0, 32
362 psllq mm1, 32
363 movd mm5, eax
364 por mm0, mm5
365 movd mm6, ebx
366 por mm1, mm6
367 movq mm2, mm0 ; copy source
368 pcmpeqb mm0, mm4 ; create mask
369 pand mm1, mm0 ; mask dest
370 pandn mm0, mm2 ; mask source
371 por mm1, mm0 ; or them
372 movd eax, mm1
373 psrlq mm1,32
374 mov word ptr[edi+4], ax
375 movd dword ptr[edi], mm1
376 jmp bltEndMMX8
377
378blt5MMX8:
379 movd mm0, dword ptr[esi]
380 movd mm1, dword ptr[edi]
381 movq mm2, mm0 ; copy source
382 pcmpeqb mm0, mm4 ; create mask
383 pand mm1, mm0 ; mask dest
384 add esi, 4
385 pandn mm0, mm2 ; mask source
386 por mm1, mm0 ; or them
387 movd dword ptr[edi], mm1
388 add edi,4
389 jmp blt1MMX8
390
391blt4MMX8:
392 movd mm0, dword ptr[esi]
393 movd mm1, dword ptr[edi]
394 movq mm2,mm0 ; copy source
395 pcmpeqb mm0,mm4 ; create mask
396 pand mm1,mm0 ; mask dest
397 pandn mm0,mm2 ; mask source
398 por mm1,mm0 ; or them
399 movd dword ptr [edi], mm1 ; write back result
400 jmp bltEndMMX8
401;
402; loading a dword into mm0/mm1 might be faster for 3-2...
403;
404blt3MMX8:
405 mov ax, word ptr [esi]
406 mov bx, word ptr [edi]
407 shl eax,8 ; 3 Pixel left to blit
408 shl ebx,8 ; so shift the buffers
409 mov al,byte ptr[esi+2]
410 mov bl,byte ptr[edi+2]
411 movd mm0,eax
412 movd mm1,ebx
413 movq mm2,mm0
414 pcmpeqb mm0,mm4 ; create mask
415 pand mm1,mm0 ; mask dest
416 pandn mm0,mm2 ; mask source
417 por mm1,mm0 ; or them
418 movd eax, mm1 ; write back result
419 mov byte ptr[edi+2], al
420 shr eax, 8
421 mov word ptr[edi], ax
422 jmp bltEndMMX8
423
424blt2MMX8:
425 mov al, byte ptr [esi]
426 cmp al, dl
427 je blt1aMMX8
428 mov byte ptr [edi], al
429; mov bl, byte ptr [esi+1]
430; cmp bl, dl
431; je bltEndMMX8
432; mov byte ptr [edi+1], bl
433; jmp bltEndMMX8
434blt1aMMX8:
435 add esi, 1
436 add edi, 1
437blt1MMX8:
438 mov al, byte ptr [esi]
439 cmp al, dl
440 je bltEndMMX8
441 mov byte ptr [edi], al
442
443bltEndMMX8:
444 pop edx
445 pop ecx
446 pop ebx
447 pop eax
448 pop esi
449 pop edi
450 pop ebp
451 ret
452_BlitColorKey8MMX ENDP
453
454
455 PUBLIC _BlitColorKey16MMX
456; Now the same as BlitColorKey16 now with MMX
457;void BlitColorKey16MMX(char *dest, char *src, ULONG key, ULONG linesize)
458_BlitColorKey16MMX PROC NEAR
459 push ebp
460 mov ebp, esp
461 push edi
462 push esi
463 push ecx
464 push edx
465
466 mov edx, dword ptr [ebp+16] ; colorkey
467 mov edi, dword ptr [ebp+8] ; dest
468 mov ecx, dword ptr [ebp+20] ; linesize in pixel!
469
470 mov eax, edx
471 shl edx, 16;
472 mov dx, ax ; extend colorKey to 32 bit
473
474 mov esi, dword ptr [ebp+12] ; src
475 mov eax, ecx ; copy of linesize
476 shr ecx,2
477 movd mm4, edx
478 jz BltRemain16
479
480 movd mm5,edx ; Extend colorkey to 64 Bit
481 psllq mm4,32
482 por mm4,mm5
483
484bltLoopMMX16:
485 movq mm0,qword ptr [esi] ; get source dword
486 movq mm1,qword ptr [edi] ; get destination
487 movq mm2,mm0 ; copy source
488 pcmpeqw mm0,mm4 ; create mask in mm0
489 pand mm1,mm0 ; mask dest
490 add esi, 8 ; point to next source qword
491 pandn mm0,mm2 ; NOT mask AND source
492 por mm1,mm0 ; or them
493 movq qword ptr [edi], mm1 ; write back result
494 add edi, 8
495 dec ecx
496 jnz bltLoopMMX16
497
498BltRemain16:
499 and eax,3
500 jmp ds:JumpTable[eax*4]
501
502align 4
503
504JumpTable:
505 dd offset cs:bltEndMMX16
506 dd offset cs:blt1MMX16
507 dd offset cs:blt2MMX16
508 dd offset cs:blt3MMX16
509align 2
510
511blt3MMX16:
512 movd mm0, dword ptr[esi]
513 movd mm1, dword ptr[edi]
514 movq mm2,mm0 ; copy source
515 add esi,4
516 pcmpeqw mm0,mm4 ; create mask 16 bit
517 pand mm1,mm0 ; mask dest
518 pandn mm0,mm2 ; mask source
519 add edi,4
520 por mm1,mm0 ; or them
521 movd dword ptr[edi-4], mm1
522 jmp blt1MMX16
523
524blt2MMX16:
525 movd mm0, dword ptr[esi]
526 movd mm1, dword ptr[edi]
527 movq mm2,mm0 ; copy source
528 pcmpeqw mm0,mm4 ; create mask 16 bit
529 pand mm1,mm0 ; mask dest
530 pandn mm0,mm2 ; mask source
531 por mm1,mm0 ; or them
532 movd dword ptr [edi], mm1 ; write back result
533 jmp bltEndMMX16
534
535blt1MMX16:
536 mov ax, word ptr [esi] ; cmov ?
537 cmp ax,dx
538 je bltEndMMX16
539 mov word ptr [edi], ax
540
541
542bltEndMMX16:
543 pop edx
544 pop ecx
545 pop esi
546 pop edi
547 pop ebp
548 ret
549_BlitColorKey16MMX ENDP
550
551;
552; extern void __cdecl BltTransSrcRecMMX(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
553; ULONG ulDestPitch, ULONG ulSrcPitch, ULONG ulTransCol);
554
555 PUBLIC _BltTransSrcRecMMX
556_BltTransSrcRecMMX PROC NEAR
557 push ebp
558 mov ebp, esp
559 push edi
560 push esi
561 push eax
562 push ebx
563 push ecx
564 push edx
565
566EndTSBlt:
567 pop edx
568 pop ecx
569 pop ebx
570 pop eax
571 pop esi
572 pop edi
573 pop ebp
574 ret
575
576_BltTransSrcRecMMX ENDP
577
578
579 PUBLIC _BltRec
580;
581; extern void __cdecl BltRec(PBYTE dest, PBYTE src, ULONG ulBltWidth,ULONG ulBltHeight
582; ULONG ulDestPitch, ULONG ulSrcPitch);
583_BltRec PROC NEAR
584 push ebp
585 mov ebp, esp
586 push edi
587 push esi
588 push eax
589 push ebx
590 push ecx
591 push edx
592
593 mov ecx, dword ptr [ebp+16] ; U ulBltWidth
594 mov esi, dword ptr [ebp+12] ; V src
595 mov ebx, ecx ; U
596 mov edx, dword ptr [ebp+20] ; V ulBltHeight
597 and ebx, 0Fh ; U ebx = # of bytes < 16
598 mov edi, dword ptr [ebp+8] ; V dest
599 cmp edx, 0
600 jz BltRecEnd ; height is zero so done
601 shr ecx, 4 ; U
602 jz SmallBlt ; Small (width < 16) rectangle done in special case
603 test ebx, ebx
604 jnz ComplexBlt ; ulBltWidth mod 16 is not 0
605
606;
607; Blitwidth is an multiple of 16
608;
609 mov ebx, dword ptr [ebp+24] ; ulDestPitch
610 mov eax, dword ptr [ebp+28] ; ulSrcPitch
611 sub ebx, dword ptr [ebp+16] ; adjust both widths
612 sub eax, dword ptr [ebp+16]
613 mov dword ptr [ebp+28], eax ; store adjusted SrcPitch
614 mov eax, ecx
615LineLoop:
616 FLD QWORD PTR [ESI]
617 FLD QWORD PTR [ESI+8]
618 FXCH
619 FSTP QWORD PTR [EDI]
620 FSTP QWORD PTR [EDI+8]
621 ADD ESI,16
622 ADD EDI,16
623 dec eax
624 jz LineLoop
625 dec edx
626 jz BltRecEnd
627 add ESI, dword ptr[ebp+28]
628 add EDI, ebx
629 mov eax, ecx
630 jmp LineLoop
631
632SmallBlt:
633 mov eax, dword ptr [ebp+28] ; ulSrcPitch
634 mov ecx, dword ptr [ebp+24] ; ulDestPitch
635 jmp ds:SmallJmpTable[ebx*4]
636SmallJmpTable:
637 dd cs:offset BltRecEnd ; BlitWidth is 0 done
638 dd cs:offset Rec1
639 dd cs:offset Rec2
640 dd cs:offset Rec3
641 dd cs:offset Rec4
642 dd cs:offset Rec5
643 dd cs:offset Rec6
644 dd cs:offset Rec7
645 dd cs:offset Rec8
646 dd cs:offset Rec9
647 dd cs:offset Rec10
648 dd cs:offset Rec11
649 dd cs:offset Rec12
650 dd cs:offset Rec13
651 dd cs:offset Rec14
652 dd cs:offset Rec15
653
654;One Pixel wide
655
656Rec1:
657 cmp edx,4
658 jb Rec1_0123
659 mov bl, byte ptr [esi]
660 add esi,eax
661 mov byte ptr [edi], bl
662 add edi,ecx
663 mov bl, byte ptr [esi]
664 add esi,eax
665 mov byte ptr [edi], bl
666 add edi,ecx
667 mov bl, byte ptr [esi]
668 add esi,eax
669 mov byte ptr [edi], bl
670 add edi,ecx
671 mov bl, byte ptr [esi]
672 add esi,eax
673 mov byte ptr [edi], bl
674 add edi,ecx
675 sub edx,4
676 jnz Rec1
677 jmp BltRecEnd
678Rec1_0123:
679 cmp edx,2
680 jz Rec1_2
681 jb Rec1_01
682; Must be 3 lines left
683 mov bl, byte ptr [esi]
684 add esi,eax
685 mov byte ptr [edi], bl
686 add edi,ecx
687 mov bl, byte ptr [esi]
688 add esi,eax
689 mov byte ptr [edi], bl
690 add edi,ecx
691 mov bl, byte ptr [esi]
692 mov byte ptr [edi], bl
693 jmp BltRecEnd
694Rec1_2:
695 mov bl, byte ptr [esi]
696 add esi,eax
697 mov byte ptr [edi], bl
698 add edi,ecx
699 mov bl, byte ptr [esi]
700 mov byte ptr [edi], bl
701 jmp BltRecEnd
702Rec1_01:
703 test edx,edx
704 jz BltRecEnd
705 mov bl, byte ptr [esi]
706 mov byte ptr [edi], bl
707 jmp BltRecEnd
708
709;2 Pixel Wide
710
711Rec2:
712 cmp edx,4
713 jb Rec2_0123
714 mov bx, word ptr [esi]
715 add esi,eax
716 mov word ptr [edi], bx
717 add edi,ecx
718 mov bx, word ptr [esi]
719 add esi,eax
720 mov word ptr [edi], bx
721 add edi,ecx
722 mov bx, word ptr [esi]
723 add esi,eax
724 mov word ptr [edi], bx
725 add edi,ecx
726 mov bx, word ptr [esi]
727 add esi,eax
728 mov word ptr [edi], bx
729 add edi,ecx
730 sub edx, 4
731 jnz Rec2
732 jmp BltRecEnd
733
734Rec2_0123:
735 cmp edx,2
736 jz Rec2_2
737 jb Rec2_01
738;3 lines left
739 mov bx, word ptr [esi]
740 add esi,eax
741 mov word ptr [edi], bx
742 add edi,ecx
743 mov bx, word ptr [esi]
744 add esi,eax
745 mov word ptr [edi], bx
746 add edi,ecx
747 mov bx, word ptr [esi]
748 mov word ptr [edi], bx
749 jmp BltRecEnd
750Rec2_2:
751 mov bx, word ptr [esi]
752 add esi,eax
753 mov word ptr [edi], bx
754 add edi,ecx
755 mov bx, word ptr [esi]
756 mov word ptr [edi], bx
757 jmp BltRecEnd
758Rec2_01:
759 test edx,edx
760 jz BltRecEnd
761 mov bx, word ptr [esi]
762 mov word ptr [edi], bx
763 jmp BltRecEnd
764
765; 3 Pixel Wide must check if it's better to read 4 bytes as
766; Intel might stall on reading 2 and 1 byte, but this takes more care as we
767; could create a pagefault on the last 3 pixel
768
769Rec3:
770 cmp edx,4
771 jb Rec3_0123
772 push edx
773 mov bx, word ptr [esi]
774 mov dl, byte ptr [esi+2]
775 add esi,eax
776 mov word ptr [edi], bx
777 mov byte ptr [edi+2], dl
778 add edi,ecx
779 mov bx, word ptr [esi]
780 mov dl, byte ptr [esi+2]
781 add esi,eax
782 mov word ptr [edi], bx
783 mov byte ptr [edi+2], dl
784 add edi,ecx
785 mov bx, word ptr [esi]
786 mov dl, byte ptr [esi+2]
787 add esi,eax
788 mov word ptr [edi], bx
789 mov byte ptr [edi+2], dl
790 add edi,ecx
791 mov bx, word ptr [esi]
792 mov dl, byte ptr [esi+2]
793 add esi,eax
794 mov word ptr [edi], bx
795 mov byte ptr [edi+2], dl
796 add edi,ecx
797 pop edx
798 sub edx,4
799 jnz Rec3
800 jmp BltRecEnd
801
802Rec3_0123:
803 cmp edx,2
804 jz Rec3_2
805 jb Rec3_01
806; Must be 3 lines left
807 mov bx, word ptr [esi]
808 mov dl, byte ptr [esi+2]
809 add esi,eax
810 mov word ptr [edi], bx
811 mov byte ptr [edi+2], dl
812 add edi,ecx
813 mov bx, word ptr [esi]
814 mov dl, byte ptr [esi+2]
815 add esi,eax
816 mov word ptr [edi], bx
817 mov byte ptr [edi+2], dl
818 add edi,ecx
819 mov bx, word ptr [esi]
820 mov dl, byte ptr [esi+2]
821 mov word ptr [edi], bx
822 mov byte ptr [edi+2], dl
823 jmp BltRecEnd
824Rec3_2:
825 mov bx, word ptr [esi]
826 mov dl, byte ptr [esi+2]
827 add esi,eax
828 mov word ptr [edi], bx
829 mov byte ptr [edi+2], dl
830 add edi,ecx
831 mov bx, word ptr [esi]
832 mov dl, byte ptr [esi+2]
833 mov word ptr [edi], bx
834 mov byte ptr [edi+2], dl
835 jmp BltRecEnd
836Rec3_01:
837 test edx,edx
838 jz BltRecEnd
839 mov bx, word ptr [esi]
840 mov dl, byte ptr [esi+2]
841 mov word ptr [edi], bx
842 mov byte ptr [edi+2], dl
843 jmp BltRecEnd
844
845; 4 Pixel Wide
846
847Rec4:
848 cmp edx,4
849 jb Rec4_0123
850 mov ebx, dword ptr [esi]
851 add esi,eax
852 mov dword ptr [edi], ebx
853 add edi,ecx
854 mov ebx, dword ptr [esi]
855 add esi,eax
856 mov dword ptr [edi], ebx
857 add edi,ecx
858 mov ebx, dword ptr [esi]
859 add esi,eax
860 mov dword ptr [edi], ebx
861 add edi,ecx
862 mov ebx, dword ptr [esi]
863 add esi,eax
864 mov dword ptr [edi], ebx
865 add edi,ecx
866 sub edx ,4
867 jnz Rec4
868 jmp BltRecEnd
869
870Rec4_0123:
871 cmp edx,2
872 jz Rec2_2
873 jb Rec2_01
874;3 lines left
875 mov ebx, dword ptr [esi]
876 add esi,eax
877 mov dword ptr [edi], ebx
878 add edi,ecx
879 mov ebx, dword ptr [esi]
880 add esi,eax
881 mov dword ptr [edi], ebx
882 add edi,ecx
883 mov ebx, dword ptr [esi]
884 mov dword ptr [edi], ebx
885 jmp BltRecEnd
886Rec4_2:
887 mov ebx, dword ptr [esi]
888 add esi,eax
889 mov dword ptr [edi], ebx
890 add edi,ecx
891 mov ebx, dword ptr [esi]
892 mov dword ptr [edi], ebx
893 jmp BltRecEnd
894Rec4_01:
895 test edx,edx
896 jz BltRecEnd
897 mov ebx, dword ptr [esi]
898 mov dword ptr [edi], ebx
899 jmp BltRecEnd
900
901; 5 Pixel Wide
902
903Rec5:
904 cmp edx,4
905 jb Rec5_0123
906 push edx
907 mov ebx, dword ptr [esi]
908 mov dl, byte ptr [esi+4]
909 add esi,eax
910 mov dword ptr [edi], ebx
911 mov byte ptr [edi+4], dl
912 add edi,ecx
913 mov ebx, dword ptr [esi]
914 mov dl, byte ptr [esi+4]
915 add esi,eax
916 mov dword ptr [edi], ebx
917 mov byte ptr [edi+4], dl
918 add edi,ecx
919 mov ebx, dword ptr [esi]
920 mov dl, byte ptr [esi+4]
921 add esi,eax
922 mov dword ptr [edi], ebx
923 mov byte ptr [edi+4], dl
924 add edi,ecx
925 mov ebx, dword ptr [esi]
926 mov dl, byte ptr [esi+4]
927 add esi,eax
928 mov dword ptr [edi], ebx
929 mov byte ptr [edi+4], dl
930 add edi,ecx
931 pop edx
932 sub edx ,4
933 jnz Rec5
934 jmp BltRecEnd
935Rec5_0123:
936 cmp edx,2
937 jz Rec5_2
938 jb Rec5_01
939; Must be 3 lines left
940 mov ebx, dword ptr [esi]
941 mov dl, byte ptr [esi+4]
942 add esi,eax
943 mov dword ptr [edi], ebx
944 mov byte ptr [edi+4], dl
945 add edi,ecx
946 mov ebx, dword ptr [esi]
947 mov dl, byte ptr [esi+4]
948 add esi,eax
949 mov dword ptr [edi], ebx
950 mov byte ptr [edi+4], dl
951 add edi,ecx
952 mov ebx, dword ptr [esi]
953 mov dl, byte ptr [esi+4]
954 mov dword ptr [edi], ebx
955 mov byte ptr [edi+4], dl
956 jmp BltRecEnd
957Rec5_2:
958 mov ebx, dword ptr [esi]
959 mov dl, byte ptr [esi+4]
960 add esi,eax
961 mov dword ptr [edi], ebx
962 mov byte ptr [edi+4], dl
963 add edi,ecx
964 mov ebx, dword ptr [esi]
965 mov dl, byte ptr [esi+4]
966 mov dword ptr [edi], ebx
967 mov byte ptr [edi+4], dl
968 jmp BltRecEnd
969Rec5_01:
970 test edx,edx
971 jz BltRecEnd
972 mov ebx, dword ptr [esi]
973 mov dl, byte ptr [esi+4]
974 mov dword ptr [edi], ebx
975 mov byte ptr [edi+4], dl
976 jmp BltRecEnd
977
978; 6 Pixel Wide
979
980Rec6:
981 cmp edx,4
982 jb Rec6_0123
983 push edx
984 mov ebx, dword ptr [esi]
985 mov dx, word ptr [esi+4]
986 add esi,eax
987 mov dword ptr [edi], ebx
988 mov word ptr [edi+4], dx
989 add edi,ecx
990 mov ebx, dword ptr [esi]
991 mov dx, word ptr [esi+4]
992 add esi,eax
993 mov dword ptr [edi], ebx
994 mov word ptr [edi+4], dx
995 add edi,ecx
996 mov ebx, dword ptr [esi]
997 mov dx, word ptr [esi+4]
998 add esi,eax
999 mov dword ptr [edi], ebx
1000 mov word ptr [edi+4], dx
1001 add edi,ecx
1002 mov ebx, dword ptr [esi]
1003 mov dx, word ptr [esi+4]
1004 add esi,eax
1005 mov dword ptr [edi], ebx
1006 mov word ptr [edi+4], dx
1007 add edi,ecx
1008 pop edx
1009 sub edx ,4
1010 jnz Rec6
1011 jmp BltRecEnd
1012Rec6_0123:
1013 cmp edx,2
1014 jz Rec6_2
1015 jb Rec6_01
1016; Must be 3 lines left
1017 mov ebx, dword ptr [esi]
1018 mov dx, word ptr [esi+4]
1019 add esi,eax
1020 mov dword ptr [edi], ebx
1021 mov word ptr [edi+4], dx
1022 add edi,ecx
1023 mov ebx, dword ptr [esi]
1024 mov dx, word ptr [esi+4]
1025 add esi,eax
1026 mov dword ptr [edi], ebx
1027 mov word ptr [edi+4], dx
1028 add edi,ecx
1029 mov ebx, dword ptr [esi]
1030 mov dx, word ptr [esi+4]
1031 mov dword ptr [edi], ebx
1032 mov word ptr [edi+4], dx
1033 jmp BltRecEnd
1034Rec6_2:
1035 mov ebx, dword ptr [esi]
1036 mov dx, word ptr [esi+4]
1037 add esi,eax
1038 mov dword ptr [edi], ebx
1039 mov word ptr [edi+4], dx
1040 add edi,ecx
1041 mov ebx, dword ptr [esi]
1042 mov dx, word ptr [esi+4]
1043 mov dword ptr [edi], ebx
1044 mov word ptr [edi+4], dx
1045 jmp BltRecEnd
1046Rec6_01:
1047 test edx,edx
1048 jz BltRecEnd
1049 mov ebx, dword ptr [esi]
1050 mov dx, word ptr [esi+4]
1051 mov dword ptr [edi], ebx
1052 mov word ptr [edi+4], dx
1053 jmp BltRecEnd
1054
1055; 7 Pixel Wide
1056
1057Rec7:
1058 cmp edx,4
1059 jb Rec6_0123
1060 push edx
1061 mov ebx, dword ptr [esi]
1062 mov dx, word ptr [esi+4]
1063 mov dword ptr [edi], ebx
1064 mov word ptr [edi+4], dx
1065 mov bl, byte ptr[esi+6]
1066 add esi,eax
1067 mov byte ptr[edi+6],bl
1068 add edi,ecx
1069 xor ebx,ebx ; clear ebx to avoid stalls
1070 mov ebx, dword ptr [esi]
1071 mov dx, word ptr [esi+4]
1072 mov dword ptr [edi], ebx
1073 mov word ptr [edi+4], dx
1074 mov bl, byte ptr[esi+6]
1075 add esi,eax
1076 mov byte ptr[edi+6],bl
1077 add edi,ecx
1078 xor ebx,ebx ; clear ebx to avoid stalls
1079 mov ebx, dword ptr [esi]
1080 mov dx, word ptr [esi+4]
1081 mov dword ptr [edi], ebx
1082 mov word ptr [edi+4], dx
1083 mov bl, byte ptr[esi+6]
1084 add esi,eax
1085 mov byte ptr[edi+6],bl
1086 add edi,ecx
1087 xor ebx,ebx ; clear ebx to avoid stalls
1088 mov ebx, dword ptr [esi]
1089 mov dx, word ptr [esi+4]
1090 mov dword ptr [edi], ebx
1091 mov word ptr [edi+4], dx
1092 mov bl, byte ptr[esi+6]
1093 add esi,eax
1094 mov byte ptr[edi+6],bl
1095 add edi,ecx
1096 xor ebx,ebx ; clear ebx to avoid stalls
1097 pop edx
1098 sub edx ,4
1099 jnz Rec7
1100 jmp BltRecEnd
1101Rec7_0123:
1102 cmp edx,2
1103 jz Rec7_2
1104 jb Rec7_01
1105; Must be 3 lines left
1106 mov ebx, dword ptr [esi]
1107 mov dx, word ptr [esi+4]
1108 mov dword ptr [edi], ebx
1109 mov word ptr [edi+4], dx
1110 mov bl, byte ptr[esi+6]
1111 add esi,eax
1112 mov byte ptr[edi+6],bl
1113 add edi,ecx
1114 xor ebx,ebx ; clear ebx to avoid stalls
1115 mov ebx, dword ptr [esi]
1116 mov dx, word ptr [esi+4]
1117 mov dword ptr [edi], ebx
1118 mov word ptr [edi+4], dx
1119 mov bl, byte ptr[esi+6]
1120 add esi,eax
1121 mov byte ptr[edi+6],bl
1122 add edi,ecx
1123 xor ebx,ebx ; clear ebx to avoid stalls
1124 mov ebx, dword ptr [esi]
1125 mov dx, word ptr [esi+4]
1126 mov dword ptr [edi], ebx
1127 mov word ptr [edi+4], dx
1128 mov bl, byte ptr[esi+6]
1129 mov byte ptr[edi+6],bl
1130 jmp BltRecEnd
1131Rec7_2:
1132 mov ebx, dword ptr [esi]
1133 mov dx, word ptr [esi+4]
1134 mov dword ptr [edi], ebx
1135 mov word ptr [edi+4], dx
1136 mov bl, byte ptr[esi+6]
1137 add esi,eax
1138 mov byte ptr[edi+6],bl
1139 add edi,ecx
1140 xor ebx,ebx ; clear ebx to avoid stalls
1141 mov ebx, dword ptr [esi]
1142 mov dx, word ptr [esi+4]
1143 mov dword ptr [edi], ebx
1144 mov word ptr [edi+4], dx
1145 mov bl, byte ptr[esi+6]
1146 mov byte ptr[edi+6],bl
1147 jmp BltRecEnd
1148Rec7_01:
1149 test edx,edx
1150 jz BltRecEnd
1151 mov ebx, dword ptr [esi]
1152 mov dx, word ptr [esi+4]
1153 mov dword ptr [edi], ebx
1154 mov word ptr [edi+4], dx
1155 mov bl, byte ptr[esi+6]
1156 mov byte ptr[edi+6],bl
1157 jmp BltRecEnd
1158
1159; 8 Pixel Wide
1160
1161Rec8:
1162 cmp edx,4
1163 jb Rec8_0123
1164 push edx
1165 mov ebx, dword ptr [esi]
1166 mov edx, dword ptr [esi+4]
1167 mov dword ptr [edi], ebx
1168 mov dword ptr [edi+4], edx
1169 add esi,eax
1170 add edi,ecx
1171 mov ebx, dword ptr [esi]
1172 mov edx, dword ptr [esi+4]
1173 mov dword ptr [edi], ebx
1174 mov dword ptr [edi+4], edx
1175 add esi,eax
1176 add edi,ecx
1177 mov ebx, dword ptr [esi]
1178 mov edx, dword ptr [esi+4]
1179 mov dword ptr [edi], ebx
1180 mov dword ptr [edi+4], edx
1181 add esi,eax
1182 add edi,ecx
1183 mov ebx, dword ptr [esi]
1184 mov edx, dword ptr [esi+4]
1185 mov dword ptr [edi], ebx
1186 mov dword ptr [edi+4], edx
1187 add esi,eax
1188 add edi,ecx
1189 pop edx
1190 sub edx ,4
1191 jnz Rec8
1192 jmp BltRecEnd
1193
1194Rec8_0123:
1195 cmp edx,2
1196 jz Rec8_2
1197 jb Rec8_01
1198;3 lines left
1199 mov ebx, dword ptr [esi]
1200 mov edx, dword ptr [esi+4]
1201 mov dword ptr [edi], ebx
1202 mov dword ptr [edi+4], edx
1203 add esi,eax
1204 add edi,ecx
1205 mov ebx, dword ptr [esi]
1206 mov edx, dword ptr [esi+4]
1207 mov dword ptr [edi], ebx
1208 mov dword ptr [edi+4], edx
1209 add esi,eax
1210 add edi,ecx
1211 mov ebx, dword ptr [esi]
1212 mov edx, dword ptr [esi+4]
1213 mov dword ptr [edi], ebx
1214 mov dword ptr [edi+4], edx
1215 jmp BltRecEnd
1216Rec8_2:
1217 mov ebx, dword ptr [esi]
1218 mov edx, dword ptr [esi+4]
1219 mov dword ptr [edi], ebx
1220 mov dword ptr [edi+4], edx
1221 add esi,eax
1222 add edi,ecx
1223 mov ebx, dword ptr [esi]
1224 mov edx, dword ptr [esi+4]
1225 mov dword ptr [edi], ebx
1226 mov dword ptr [edi+4], edx
1227 jmp BltRecEnd
1228Rec8_01:
1229 test edx,edx
1230 jz BltRecEnd
1231 mov ebx, dword ptr [esi]
1232 mov edx, dword ptr [esi+4]
1233 mov dword ptr [edi], ebx
1234 mov dword ptr [edi+4], edx
1235 jmp BltRecEnd
1236
1237; 9 Pixel Wide
1238
1239Rec9:
1240 cmp edx,4
1241 jb Rec9_0123
1242 push edx
1243 FLD QWORD PTR [ESI]
1244 mov bl, byte ptr [esi+8]
1245 FSTP QWORD PTR [EDI]
1246 mov byte ptr [edi+8], bl
1247 add esi,eax
1248 add edi,ecx
1249 FLD QWORD PTR [ESI]
1250 mov bl, byte ptr [esi+8]
1251 FSTP QWORD PTR [EDI]
1252 mov byte ptr [edi+8], bl
1253 add esi,eax
1254 add edi,ecx
1255 FLD QWORD PTR [ESI]
1256 mov bl, byte ptr [esi+8]
1257 FSTP QWORD PTR [EDI]
1258 mov byte ptr [edi+8], bl
1259 add esi,eax
1260 add edi,ecx
1261 FLD QWORD PTR [ESI]
1262 mov bl, byte ptr [esi+8]
1263 FSTP QWORD PTR [EDI]
1264 mov byte ptr [edi+8], bl
1265 add esi,eax
1266 add edi,ecx
1267 pop edx
1268 sub edx ,4
1269 jnz Rec9
1270 jmp BltRecEnd
1271
1272Rec9_0123:
1273 cmp edx,2
1274 jz Rec9_2
1275 jb Rec9_01
1276;3 lines left
1277 FLD QWORD PTR [ESI]
1278 mov bl, byte ptr [esi+8]
1279 FSTP QWORD PTR [EDI]
1280 mov byte ptr [edi+8], bl
1281 add esi,eax
1282 add edi,ecx
1283 FLD QWORD PTR [ESI]
1284 mov bl, byte ptr [esi+8]
1285 FSTP QWORD PTR [EDI]
1286 mov byte ptr [edi+8], bl
1287 add esi,eax
1288 add edi,ecx
1289 FLD QWORD PTR [ESI]
1290 mov bl, byte ptr [esi+8]
1291 FSTP QWORD PTR [EDI]
1292 mov byte ptr [edi+8], bl
1293 jmp BltRecEnd
1294Rec9_2:
1295 FLD QWORD PTR [ESI]
1296 mov bl, byte ptr [esi+8]
1297 FSTP QWORD PTR [EDI]
1298 mov byte ptr [edi+8], bl
1299 add esi,eax
1300 add edi,ecx
1301 FLD QWORD PTR [ESI]
1302 mov bl, byte ptr [esi+8]
1303 FSTP QWORD PTR [EDI]
1304 mov byte ptr [edi+8], bl
1305 jmp BltRecEnd
1306Rec9_01:
1307 test edx,edx
1308 jz BltRecEnd
1309 FLD QWORD PTR [ESI]
1310 mov bl, byte ptr [esi+8]
1311 FSTP QWORD PTR [EDI]
1312 mov byte ptr [edi+8], bl
1313 jmp BltRecEnd
1314
1315; 10 Pixel Wide
1316
1317Rec10:
1318 cmp edx,4
1319 jb Rec10_0123
1320 FLD QWORD PTR [ESI]
1321 mov bx, word ptr [esi+8]
1322 FSTP QWORD PTR [EDI]
1323 mov word ptr [edi+8], bx
1324 add esi,eax
1325 add edi,ecx
1326 FLD QWORD PTR [ESI]
1327 mov bx, word ptr [esi+8]
1328 FSTP QWORD PTR [EDI]
1329 mov word ptr [edi+8], bx
1330 add esi,eax
1331 add edi,ecx
1332 FLD QWORD PTR [ESI]
1333 mov bx, word ptr [esi+8]
1334 FSTP QWORD PTR [EDI]
1335 mov word ptr [edi+8], bx
1336 add esi,eax
1337 add edi,ecx
1338 FLD QWORD PTR [ESI]
1339 mov bx, word ptr [esi+8]
1340 FSTP QWORD PTR [EDI]
1341 mov word ptr [edi+8], bx
1342 add esi,eax
1343 add edi,ecx
1344 sub edx ,4
1345 jnz Rec10
1346 jmp BltRecEnd
1347
1348Rec10_0123:
1349 cmp edx,2
1350 jz Rec10_2
1351 jb Rec10_01
1352;3 lines left
1353 FLD QWORD PTR [ESI]
1354 mov bx, word ptr [esi+8]
1355 FSTP QWORD PTR [EDI]
1356 mov word ptr [edi+8], bx
1357 add esi,eax
1358 add edi,ecx
1359 FLD QWORD PTR [ESI]
1360 mov bx, word ptr [esi+8]
1361 FSTP QWORD PTR [EDI]
1362 mov word ptr [edi+8], bx
1363 add esi,eax
1364 add edi,ecx
1365 FLD QWORD PTR [ESI]
1366 mov bx, word ptr [esi+8]
1367 FSTP QWORD PTR [EDI]
1368 mov word ptr [edi+8], bx
1369 jmp BltRecEnd
1370Rec10_2:
1371 FLD QWORD PTR [ESI]
1372 mov bx, word ptr [esi+8]
1373 FSTP QWORD PTR [EDI]
1374 mov word ptr [edi+8], bx
1375 add esi,eax
1376 add edi,ecx
1377 FLD QWORD PTR [ESI]
1378 mov bx, word ptr [esi+8]
1379 FSTP QWORD PTR [EDI]
1380 mov word ptr [edi+8], bx
1381 jmp BltRecEnd
1382Rec10_01:
1383 test edx,edx
1384 jz BltRecEnd
1385 FLD QWORD PTR [ESI]
1386 mov bx, word ptr [esi+8]
1387 FSTP QWORD PTR [EDI]
1388 mov word ptr [edi+8], bx
1389 jmp BltRecEnd
1390
1391; 11 Pixel Wide
1392
1393Rec11:
1394 cmp edx,4
1395 jb Rec11_0123
1396 push edx
1397 FLD QWORD PTR [ESI]
1398 mov bx, word ptr [esi+8]
1399 mov dl, byte ptr [esi+10]
1400 FSTP QWORD PTR [EDI]
1401 mov word ptr [edi+8], bx
1402 mov byte ptr [edi+10], dl
1403 add esi,eax
1404 add edi,ecx
1405 FLD QWORD PTR [ESI]
1406 mov bx, word ptr [esi+8]
1407 mov dl, byte ptr [esi+10]
1408 FSTP QWORD PTR [EDI]
1409 mov word ptr [edi+8], bx
1410 mov byte ptr [edi+10], dl
1411 add esi,eax
1412 add edi,ecx
1413 FLD QWORD PTR [ESI]
1414 mov bx, word ptr [esi+8]
1415 mov dl, byte ptr [esi+10]
1416 FSTP QWORD PTR [EDI]
1417 mov word ptr [edi+8], bx
1418 mov byte ptr [edi+10], dl
1419 add esi,eax
1420 add edi,ecx
1421 FLD QWORD PTR [ESI]
1422 mov bx, word ptr [esi+8]
1423 mov dl, byte ptr [esi+10]
1424 FSTP QWORD PTR [EDI]
1425 mov word ptr [edi+8], bx
1426 mov byte ptr [edi+10], dl
1427 add esi,eax
1428 add edi,ecx
1429 pop edx
1430 sub edx ,4
1431 jnz Rec10
1432 jmp BltRecEnd
1433
1434Rec11_0123:
1435 cmp edx,2
1436 jz Rec11_2
1437 jb Rec11_01
1438;3 lines left
1439 FLD QWORD PTR [ESI]
1440 mov bx, word ptr [esi+8]
1441 mov dl, byte ptr [esi+10]
1442 FSTP QWORD PTR [EDI]
1443 mov word ptr [edi+8], bx
1444 mov byte ptr [edi+10], dl
1445 add esi,eax
1446 add edi,ecx
1447 FLD QWORD PTR [ESI]
1448 mov bx, word ptr [esi+8]
1449 mov dl, byte ptr [esi+10]
1450 FSTP QWORD PTR [EDI]
1451 mov word ptr [edi+8], bx
1452 mov byte ptr [edi+10], dl
1453 add esi,eax
1454 add edi,ecx
1455 FLD QWORD PTR [ESI]
1456 mov bx, word ptr [esi+8]
1457 mov dl, byte ptr [esi+10]
1458 FSTP QWORD PTR [EDI]
1459 mov word ptr [edi+8], bx
1460 mov byte ptr [edi+10], dl
1461 jmp BltRecEnd
1462Rec11_2:
1463 FLD QWORD PTR [ESI]
1464 mov bx, word ptr [esi+8]
1465 mov dl, byte ptr [esi+10]
1466 FSTP QWORD PTR [EDI]
1467 mov word ptr [edi+8], bx
1468 mov byte ptr [edi+10], dl
1469 add esi,eax
1470 add edi,ecx
1471 FLD QWORD PTR [ESI]
1472 mov bx, word ptr [esi+8]
1473 mov dl, byte ptr [esi+10]
1474 FSTP QWORD PTR [EDI]
1475 mov word ptr [edi+8], bx
1476 mov byte ptr [edi+10], dl
1477 jmp BltRecEnd
1478Rec11_01:
1479 test edx,edx
1480 jz BltRecEnd
1481 FLD QWORD PTR [ESI]
1482 mov bx, word ptr [esi+8]
1483 mov dl, byte ptr [esi+10]
1484 FSTP QWORD PTR [EDI]
1485 mov word ptr [edi+8], bx
1486 mov byte ptr [edi+10], dl
1487 jmp BltRecEnd
1488
1489; 12 Pixel Wide
1490
1491Rec12:
1492 cmp edx,4
1493 jb Rec12_0123
1494 FLD QWORD PTR [ESI]
1495 mov ebx, dword ptr [esi+8]
1496 FSTP QWORD PTR [EDI]
1497 mov dword ptr [edi+8], ebx
1498 add esi,eax
1499 add edi,ecx
1500 FLD QWORD PTR [ESI]
1501 mov ebx, dword ptr [esi+8]
1502 FSTP QWORD PTR [EDI]
1503 mov dword ptr [edi+8], ebx
1504 add esi,eax
1505 add edi,ecx
1506 FLD QWORD PTR [ESI]
1507 mov ebx, dword ptr [esi+8]
1508 FSTP QWORD PTR [EDI]
1509 mov dword ptr [edi+8], ebx
1510 add esi,eax
1511 add edi,ecx
1512 FLD QWORD PTR [ESI]
1513 mov ebx, dword ptr [esi+8]
1514 FSTP QWORD PTR [EDI]
1515 mov dword ptr [edi+8], ebx
1516 add esi,eax
1517 add edi,ecx
1518 sub edx ,4
1519 jnz Rec12
1520 jmp BltRecEnd
1521
1522Rec12_0123:
1523 cmp edx,2
1524 jz Rec12_2
1525 jb Rec12_01
1526;3 lines left
1527 FLD QWORD PTR [ESI]
1528 mov ebx, dword ptr [esi+8]
1529 FSTP QWORD PTR [EDI]
1530 mov dword ptr [edi+8], ebx
1531 add esi,eax
1532 add edi,ecx
1533 FLD QWORD PTR [ESI]
1534 mov ebx, dword ptr [esi+8]
1535 FSTP QWORD PTR [EDI]
1536 mov dword ptr [edi+8], ebx
1537 add esi,eax
1538 add edi,ecx
1539 FLD QWORD PTR [ESI]
1540 mov ebx, dword ptr [esi+8]
1541 FSTP QWORD PTR [EDI]
1542 mov dword ptr [edi+8], ebx
1543 jmp BltRecEnd
1544Rec12_2:
1545 FLD QWORD PTR [ESI]
1546 mov ebx, dword ptr [esi+8]
1547 FSTP QWORD PTR [EDI]
1548 mov dword ptr [edi+8], ebx
1549 add esi,eax
1550 add edi,ecx
1551 FLD QWORD PTR [ESI]
1552 mov ebx, dword ptr [esi+8]
1553 FSTP QWORD PTR [EDI]
1554 mov dword ptr [edi+8], ebx
1555 jmp BltRecEnd
1556Rec12_01:
1557 test edx,edx
1558 jz BltRecEnd
1559 FLD QWORD PTR [ESI]
1560 mov ebx, dword ptr [esi+8]
1561 FSTP QWORD PTR [EDI]
1562 mov dword ptr [edi+8], ebx
1563 jmp BltRecEnd
1564
1565; 13 Pixel Wide
1566
1567Rec13:
1568 cmp edx,4
1569 jb Rec13_0123
1570 push edx
1571 FLD QWORD PTR [ESI]
1572 mov ebx, dword ptr [esi+8]
1573 mov dl, byte ptr [esi+12]
1574 FSTP QWORD PTR [EDI]
1575 mov dword ptr [edi+8], ebx
1576 mov byte ptr [edi+12], dl
1577 add esi,eax
1578 add edi,ecx
1579 FLD QWORD PTR [ESI]
1580 mov ebx, dword ptr [esi+8]
1581 mov dl, byte ptr [esi+12]
1582 FSTP QWORD PTR [EDI]
1583 mov dword ptr [edi+8], ebx
1584 mov byte ptr [edi+12], dl
1585 add esi,eax
1586 add edi,ecx
1587 FLD QWORD PTR [ESI]
1588 mov ebx, dword ptr [esi+8]
1589 mov dl, byte ptr [esi+12]
1590 FSTP QWORD PTR [EDI]
1591 mov dword ptr [edi+8], ebx
1592 mov byte ptr [edi+12], dl
1593 add esi,eax
1594 add edi,ecx
1595 FLD QWORD PTR [ESI]
1596 mov ebx, dword ptr [esi+8]
1597 mov dl, byte ptr [esi+12]
1598 FSTP QWORD PTR [EDI]
1599 mov dword ptr [edi+8], ebx
1600 mov byte ptr [edi+12], dl
1601 add esi,eax
1602 add edi,ecx
1603 pop edx
1604 sub edx ,4
1605 jnz Rec13
1606 jmp BltRecEnd
1607
1608Rec13_0123:
1609 cmp edx,2
1610 jz Rec13_2
1611 jb Rec13_01
1612;3 lines left
1613 FLD QWORD PTR [ESI]
1614 mov ebx, dword ptr [esi+8]
1615 mov dl, byte ptr [esi+12]
1616 FSTP QWORD PTR [EDI]
1617 mov dword ptr [edi+8], ebx
1618 mov byte ptr [edi+12], dl
1619 add esi,eax
1620 add edi,ecx
1621 FLD QWORD PTR [ESI]
1622 mov ebx, dword ptr [esi+8]
1623 mov dl, byte ptr [esi+12]
1624 FSTP QWORD PTR [EDI]
1625 mov dword ptr [edi+8], ebx
1626 mov byte ptr [edi+12], dl
1627 add esi,eax
1628 add edi,ecx
1629 FLD QWORD PTR [ESI]
1630 mov ebx, dword ptr [esi+8]
1631 mov dl, byte ptr [esi+12]
1632 FSTP QWORD PTR [EDI]
1633 mov dword ptr [edi+8], ebx
1634 mov byte ptr [edi+12], dl
1635 jmp BltRecEnd
1636Rec13_2:
1637 FLD QWORD PTR [ESI]
1638 mov ebx, dword ptr [esi+8]
1639 mov dl, byte ptr [esi+12]
1640 FSTP QWORD PTR [EDI]
1641 mov dword ptr [edi+8], ebx
1642 mov byte ptr [edi+12], dl
1643 add esi,eax
1644 add edi,ecx
1645 FLD QWORD PTR [ESI]
1646 mov ebx, dword ptr [esi+8]
1647 mov dl, byte ptr [esi+12]
1648 FSTP QWORD PTR [EDI]
1649 mov dword ptr [edi+8], ebx
1650 mov byte ptr [edi+12], dl
1651 jmp BltRecEnd
1652Rec13_01:
1653 test edx,edx
1654 jz BltRecEnd
1655 FLD QWORD PTR [ESI]
1656 mov ebx, dword ptr [esi+8]
1657 mov dl, byte ptr [esi+12]
1658 FSTP QWORD PTR [EDI]
1659 mov dword ptr [edi+8], ebx
1660 mov byte ptr [edi+12], dl
1661 jmp BltRecEnd
1662
1663; 14 Pixel Wide
1664
1665Rec14:
1666 cmp edx,4
1667 jb Rec14_0123
1668 push edx
1669 FLD QWORD PTR [ESI]
1670 mov ebx, dword ptr [esi+8]
1671 mov dx, word ptr [esi+12]
1672 FSTP QWORD PTR [EDI]
1673 mov dword ptr [edi+8], ebx
1674 mov word ptr [edi+12], dx
1675 add esi,eax
1676 add edi,ecx
1677 FLD QWORD PTR [ESI]
1678 mov ebx, dword ptr [esi+8]
1679 mov dx, word ptr [esi+12]
1680 FSTP QWORD PTR [EDI]
1681 mov dword ptr [edi+8], ebx
1682 mov word ptr [edi+12], dx
1683 add esi,eax
1684 add edi,ecx
1685 FLD QWORD PTR [ESI]
1686 mov ebx, dword ptr [esi+8]
1687 mov dx, word ptr [esi+12]
1688 FSTP QWORD PTR [EDI]
1689 mov dword ptr [edi+8], ebx
1690 mov word ptr [edi+12], dx
1691 add esi,eax
1692 add edi,ecx
1693 FLD QWORD PTR [ESI]
1694 mov ebx, dword ptr [esi+8]
1695 mov dx, word ptr [esi+12]
1696 FSTP QWORD PTR [EDI]
1697 mov dword ptr [edi+8], ebx
1698 mov word ptr [edi+12], dx
1699 add esi,eax
1700 add edi,ecx
1701 pop edx
1702 sub edx ,4
1703 jnz Rec14
1704 jmp BltRecEnd
1705
1706Rec14_0123:
1707 cmp edx,2
1708 jz Rec14_2
1709 jb Rec14_01
1710;3 lines left
1711 FLD QWORD PTR [ESI]
1712 mov ebx, dword ptr [esi+8]
1713 mov dx, word ptr [esi+12]
1714 FSTP QWORD PTR [EDI]
1715 mov dword ptr [edi+8], ebx
1716 mov word ptr [edi+12], dx
1717 add esi,eax
1718 add edi,ecx
1719 FLD QWORD PTR [ESI]
1720 mov ebx, dword ptr [esi+8]
1721 mov dx, word ptr [esi+12]
1722 FSTP QWORD PTR [EDI]
1723 mov dword ptr [edi+8], ebx
1724 mov word ptr [edi+12], dx
1725 add esi,eax
1726 add edi,ecx
1727 FLD QWORD PTR [ESI]
1728 mov ebx, dword ptr [esi+8]
1729 mov dx, word ptr [esi+12]
1730 FSTP QWORD PTR [EDI]
1731 mov dword ptr [edi+8], ebx
1732 mov word ptr [edi+12], dx
1733 jmp BltRecEnd
1734Rec14_2:
1735 FLD QWORD PTR [ESI]
1736 mov ebx, dword ptr [esi+8]
1737 mov dx, word ptr [esi+12]
1738 FSTP QWORD PTR [EDI]
1739 mov dword ptr [edi+8], ebx
1740 mov word ptr [edi+12], dx
1741 add esi,eax
1742 add edi,ecx
1743 FLD QWORD PTR [ESI]
1744 mov ebx, dword ptr [esi+8]
1745 mov dx, word ptr [esi+12]
1746 FSTP QWORD PTR [EDI]
1747 mov dword ptr [edi+8], ebx
1748 mov word ptr [edi+12], dx
1749 jmp BltRecEnd
1750Rec14_01:
1751 test edx,edx
1752 jz BltRecEnd
1753 FLD QWORD PTR [ESI]
1754 mov ebx, dword ptr [esi+8]
1755 mov dx, word ptr [esi+12]
1756 FSTP QWORD PTR [EDI]
1757 mov dword ptr [edi+8], ebx
1758 mov word ptr [edi+12], dx
1759 jmp BltRecEnd
1760
1761; 15 Pixel Wide
1762
1763Rec15:
1764 cmp edx,4
1765 jb Rec15_0123
1766 push edx
1767 FLD QWORD PTR [ESI]
1768 mov ebx, dword ptr [esi+8]
1769 mov dx, word ptr [esi+12]
1770 FSTP QWORD PTR [EDI]
1771 mov dword ptr [edi+8], ebx
1772 mov bl, byte ptr[esi+14]
1773 mov word ptr [edi+12], dx
1774 add esi,eax
1775 mov byte ptr[edi+14], bl
1776 add edi,ecx
1777 FLD QWORD PTR [ESI]
1778 mov ebx, dword ptr [esi+8]
1779 mov dx, word ptr [esi+12]
1780 FSTP QWORD PTR [EDI]
1781 mov dword ptr [edi+8], ebx
1782 mov bl, byte ptr[esi+14]
1783 mov word ptr [edi+12], dx
1784 add esi,eax
1785 mov byte ptr[edi+14], bl
1786 add edi,ecx
1787 FLD QWORD PTR [ESI]
1788 mov ebx, dword ptr [esi+8]
1789 mov dx, word ptr [esi+12]
1790 FSTP QWORD PTR [EDI]
1791 mov dword ptr [edi+8], ebx
1792 mov bl, byte ptr[esi+14]
1793 mov word ptr [edi+12], dx
1794 add esi,eax
1795 mov byte ptr[edi+14], bl
1796 add edi,ecx
1797 FLD QWORD PTR [ESI]
1798 mov ebx, dword ptr [esi+8]
1799 mov dx, word ptr [esi+12]
1800 FSTP QWORD PTR [EDI]
1801 mov dword ptr [edi+8], ebx
1802 mov bl, byte ptr[esi+14]
1803 mov word ptr [edi+12], dx
1804 add esi,eax
1805 mov byte ptr[edi+14], bl
1806 add edi,ecx
1807 pop edx
1808 sub edx ,4
1809 jnz Rec15
1810 jmp BltRecEnd
1811
1812Rec15_0123:
1813 cmp edx,2
1814 jz Rec15_2
1815 jb Rec15_01
1816;3 lines left
1817 FLD QWORD PTR [ESI]
1818 mov ebx, dword ptr [esi+8]
1819 mov dx, word ptr [esi+12]
1820 FSTP QWORD PTR [EDI]
1821 mov dword ptr [edi+8], ebx
1822 mov bl, byte ptr[esi+14]
1823 mov word ptr [edi+12], dx
1824 add esi,eax
1825 mov byte ptr[edi+14], bl
1826 add edi,ecx
1827 FLD QWORD PTR [ESI]
1828 mov ebx, dword ptr [esi+8]
1829 mov dx, word ptr [esi+12]
1830 FSTP QWORD PTR [EDI]
1831 mov dword ptr [edi+8], ebx
1832 mov bl, byte ptr[esi+14]
1833 mov word ptr [edi+12], dx
1834 add esi,eax
1835 mov byte ptr[edi+14], bl
1836 add edi,ecx
1837 FLD QWORD PTR [ESI]
1838 mov ebx, dword ptr [esi+8]
1839 mov dx, word ptr [esi+12]
1840 FSTP QWORD PTR [EDI]
1841 mov dword ptr [edi+8], ebx
1842 mov bl, byte ptr[esi+14]
1843 mov word ptr [edi+12], dx
1844 mov byte ptr[edi+14], bl
1845 jmp BltRecEnd
1846Rec15_2:
1847 FLD QWORD PTR [ESI]
1848 mov ebx, dword ptr [esi+8]
1849 mov dx, word ptr [esi+12]
1850 FSTP QWORD PTR [EDI]
1851 mov dword ptr [edi+8], ebx
1852 mov bl, byte ptr[esi+14]
1853 mov word ptr [edi+12], dx
1854 add esi,eax
1855 mov byte ptr[edi+14], bl
1856 add edi,ecx
1857 FLD QWORD PTR [ESI]
1858 mov ebx, dword ptr [esi+8]
1859 mov dx, word ptr [esi+12]
1860 FSTP QWORD PTR [EDI]
1861 mov dword ptr [edi+8], ebx
1862 mov bl, byte ptr[esi+14]
1863 mov word ptr [edi+12], dx
1864 mov byte ptr[edi+14], bl
1865 jmp BltRecEnd
1866Rec15_01:
1867 test edx,edx
1868 jz BltRecEnd
1869 FLD QWORD PTR [ESI]
1870 mov ebx, dword ptr [esi+8]
1871 mov dx, word ptr [esi+12]
1872 FSTP QWORD PTR [EDI]
1873 mov dword ptr [edi+8], ebx
1874 mov bl, byte ptr[esi+14]
1875 mov word ptr [edi+12], dx
1876 mov byte ptr[edi+14], bl
1877 jmp BltRecEnd
1878
1879
1880ComplexBlt:
1881 ; Blit first the even rect then the rest
1882
1883 push dword ptr [ebp+28] ; ulSrcPitch
1884 push dword ptr [ebp+24] ; ulDestPitch
1885 push edx
1886 shl ecx,4
1887 push ecx
1888 push esi
1889 push edi
1890 call _BltRec
1891 sub esp,24
1892 add esi,ecx
1893 add edi,ecx
1894 push dword ptr [ebp+28] ; ulSrcPitch
1895 push dword ptr [ebp+24] ; ulDestPitch
1896 push edx
1897 push ebx
1898 push esi
1899 push edi
1900 call _BltRec
1901 sub esp,24
1902
1903BltRecEnd:
1904 pop edx
1905 pop ecx
1906 pop ebx
1907 pop eax
1908 pop esi
1909 pop edi
1910 pop ebp
1911 ret
1912_BltRec ENDP
1913
1914
1915 PUBLIC _CPUHasMMX
1916;
1917; int __cdecl CPUHasMMX()
1918; returns:
1919; 0 = NoMMX
1920; 1 = MMX
1921; 2 = MMX+CMov instuction
1922
1923_CPUHasMMX PROC NEAR
1924 push ebp
1925 mov ebp, esp
1926 push edi
1927 push esi
1928 push ebx
1929 push ecx
1930 push edx
1931
1932 pushfd
1933 pop eax
1934 mov ebx ,eax
1935 xor eax, 00200000h
1936 push eax
1937 popfd
1938 pushfd
1939 pop eax
1940 sub eax,ebx
1941 jz Return ; No CPUID => No MMX => return 0 in eax;
1942 mov eax, 1
1943 CPUID
1944 test edx,00800000h ; MMX Bit Set ?
1945 jz Return
1946 mov eax, 1
1947 test edx,00008000h ; Conditonal Mov Bit Set ?
1948 jz Return
1949 inc eax
1950Return:
1951; mov eax, 0 ; pretend no MMX is available
1952 pop edx
1953 pop ecx
1954 pop ebx
1955 pop esi
1956 pop edi
1957 pop ebp
1958 ret
1959_CPUHasMMX ENDP
1960
1961 PUBLIC _MemFlip
1962
1963;
1964; memcpy via FLD / FSTP MMX might even be faster but
1965; not present on every system
1966; to maximize the speed we copy 64 bytes in each loop
1967; and after the loop the rest left
1968;
1969;
1970;void __cdecl MemFlip(PBYTE dest, PBYTE src, ULONG Size);
1971
1972_MemFlip PROC NEAR
1973 push ebp
1974 mov ebp, esp
1975 push edi
1976 push esi
1977 push eax
1978 push ebx
1979 push ecx
1980
1981 mov eax , dword ptr [ebp+16] ; Size of Buffer
1982 mov edi , dword ptr [ebp+8] ; Destination
1983 mov ebx , eax
1984 mov esi , dword ptr [ebp+12] ; SourcePointer
1985
1986 and ebx , 0000003Fh ; Calc leftover bytes
1987 shr eax , 5 ; Calc Loops
1988
1989 jz COPYREMAIN ; Less then 64 to copy
1990ALIGN 4
1991
1992Loop64:
1993 FLD QWORD PTR [ESI] ; 1
1994 FLD QWORD PTR [ESI+8] ; 2
1995 FXCH ; Doesn't take any clocks
1996 FSTP QWORD PTR [EDI] ; 3,4 Clocks
1997 FSTP QWORD PTR [EDI+8] ; 5,6
1998 ADD ESI,16 ; 7 U Integer instruction can be executed parallel
1999 ADD EDI,16 ; 7 V Total clocks for copying 16 byte 7 clocks Rep Movs needs 20! for each 16 byte + 13 setup
2000 FLD QWORD PTR [ESI]
2001 FLD QWORD PTR [ESI+8]
2002 FXCH
2003 FSTP QWORD PTR [EDI]
2004 FSTP QWORD PTR [EDI+8]
2005 ADD ESI,16
2006 ADD EDI,16
2007 FLD QWORD PTR [ESI]
2008 FLD QWORD PTR [ESI+8]
2009 FXCH
2010 FSTP QWORD PTR [EDI]
2011 FSTP QWORD PTR [EDI+8]
2012 ADD ESI,16
2013 ADD EDI,16
2014 FLD QWORD PTR [ESI]
2015 FLD QWORD PTR [ESI+8]
2016 FXCH
2017 FSTP QWORD PTR [EDI]
2018 FSTP QWORD PTR [EDI+8]
2019 ADD ESI,16
2020 ADD EDI,16
2021 inc eax
2022 jnz Loop64
2023
2024COPYREMAIN:
2025 test ebx, ebx ; something left ?
2026 jz EndOffFlip
2027
2028 test ebx, 00000020h; at least 32 bytes left ?
2029 jz Test16
2030
2031 FLD QWORD PTR [ESI]
2032 FLD QWORD PTR [ESI+8]
2033 FXCH
2034 FSTP QWORD PTR [EDI]
2035 FSTP QWORD PTR [EDI+8]
2036 ADD ESI,16
2037 ADD EDI,16
2038 FLD QWORD PTR [ESI]
2039 FLD QWORD PTR [ESI+8]
2040 FXCH
2041 FSTP QWORD PTR [EDI]
2042 FSTP QWORD PTR [EDI+8]
2043 ADD ESI,16
2044 ADD EDI,16
2045 sub ebx, 00000020h
2046 jz EndOffFlip
2047
2048Test16:
2049
2050 test ebx, 00000010h; at least 16 bytes left ?
2051 jb Test8
2052
2053 FLD QWORD PTR [ESI]
2054 FLD QWORD PTR [ESI+8]
2055 FXCH
2056 FSTP QWORD PTR [EDI]
2057 FSTP QWORD PTR [EDI+8]
2058 ADD ESI,16
2059 ADD EDI,16
2060 sub ebx, 00000010h
2061 jz EndOffFlip
2062Test8:
2063
2064 test ebx, 00000008h; at least 8 bytes left ?
2065 jb Test4
2066 mov eax,[esi]
2067 mov ecx,[esi+4]
2068 mov [edi],eax
2069 mov [edi+4],ecx
2070 add esi, 8
2071 add edi, 8
2072 sub ebx, 8
2073 jz EndOffFlip
2074
2075Test4:
2076 test ebx, 00000004h; at least 4 bytes left ?
2077 jb Test2
2078 mov eax,[esi]
2079 sub ebx, 4
2080 mov [edi],eax
2081 add esi, 4
2082 add edi, 4
2083 test ebx, ebx ; something left ?
2084 jz EndOffFlip
2085
2086Test2:
2087 test ebx, 00000002h
2088 jb Copy1
2089 mov ax,[esi]
2090 sub ebx,2
2091 mov [edi],ax
2092 add esi,2
2093 add edi,2
2094 test ebx,ebx
2095 jz EndOffFlip
2096
2097Copy1:
2098 mov al,[esi]
2099 mov [edi],al
2100
2101EndOffFlip:
2102 pop ecx
2103 pop ebx
2104 pop eax
2105 pop esi
2106 pop edi
2107 pop ebp
2108 ret
2109_MemFlip ENDP
2110
2111; void _Optlink DDrawRGB555to565 (WORD *dest, WORD *src, ULONG num);
2112
2113 PUBLIC DDrawRGB555to565
2114
2115DDrawRGB555to565 PROC NEAR
2116
2117 push esi
2118 push edi
2119 cld
2120
2121 mov edi, eax ; _Optlink arg1 = EAX
2122 mov esi, edx ; _Optlink arg2 = EDX
2123 shr ecx, 1 ; _Optlink arg3 = ECX
2124 pushf
2125cvt:
2126 mov eax, [esi]
2127 mov edx, eax
2128
2129 add esi, 4
2130 shl eax, 1
2131
2132 and edx, 0001F001Fh
2133 and eax, 0FFC0FFC0h
2134
2135 add edi, 4
2136 or eax, edx
2137
2138 mov [edi-4], eax
2139 loop cvt
2140
2141 popf
2142 jnc SHORT done
2143
2144 mov ax, [esi]
2145 mov dx, ax
2146 and dx, 0001Fh
2147 shl ax, 1
2148 and ax, 0FFC0h
2149 or ax, dx
2150 mov [edi], ax
2151
2152done:
2153 pop edi
2154 pop esi
2155 ret
2156
2157DDrawRGB555to565 ENDP
2158
2159 PUBLIC DDrawRGB565to555
2160
2161DDrawRGB565to555 PROC NEAR
2162
2163 push esi
2164 push edi
2165 cld
2166
2167 cmp ecx, 0
2168 jz done
2169
2170 mov edi, eax ; _Optlink arg1 = EAX
2171 mov esi, edx ; _Optlink arg2 = EDX
2172 shr ecx, 1 ; _Optlink arg3 = ECX
2173 pushf
2174cvt:
2175 mov eax, [esi]
2176 mov edx, eax
2177
2178 add esi, 4
2179 shr eax, 1
2180
2181 and edx, 0001F001Fh
2182 and eax, 0FFE07FE0h
2183
2184 add edi, 4
2185 or eax, edx
2186
2187 mov [edi-4], eax
2188 loop cvt
2189
2190 popf
2191 jnc SHORT done
2192
2193 mov ax, [esi]
2194 mov dx, ax
2195 and dx, 0001Fh
2196 shr ax, 1
2197 and ax, 0FFE0h
2198 or ax, dx
2199 mov [edi], ax
2200
2201done:
2202 pop edi
2203 pop esi
2204 ret
2205
2206DDrawRGB565to555 ENDP
2207
2208; void _Optlink DDrawRGB555to565MMX(WORD *dest, WORD *src, ULONG num);
2209
2210 PUBLIC DDrawRGB555to565MMX
2211
2212DDrawRGB555to565MMX PROC NEAR
2213 push esi
2214 push edi
2215 cld
2216
2217 cmp ecx, 0
2218 jz done
2219
2220 push ecx
2221
2222 mov edi, eax ; _Optlink arg1 = EAX
2223 mov esi, edx ; _Optlink arg2 = EDX
2224 shr ecx, 3 ; _Optlink arg3 = ECX
2225 jz lastpixels
2226
2227 sub esp, 108
2228 fsaved dword ptr [esp]
2229
2230 movq mm2, qword ptr and1mask ; 0001F001F001F001Fh
2231 movq mm3, qword ptr and2mask ; FFC0FFC00FFC0FFC0h
2232
2233cvt:
2234 movq mm0, qword ptr [esi]
2235 add edi, 16
2236
2237 movq mm4, qword ptr [esi+8]
2238 movq mm1, mm0
2239
2240 movq mm5, mm4
2241 psllq mm0, 1
2242
2243 psllq mm4, 1
2244 pand mm1, mm2
2245
2246 pand mm0, mm3
2247 pand mm5, mm2
2248
2249 pand mm4, mm3
2250 por mm0, mm1
2251
2252 por mm4, mm5
2253 add esi, 16
2254
2255 movq qword ptr [edi-16], mm0
2256 dec ecx
2257
2258 movq qword ptr [edi-8], mm4
2259 jnz cvt
2260
2261 nop
2262 nop
2263
2264 frstord dword ptr [esp]
2265 add esp, 108
2266
2267lastpixels:
2268 pop ecx
2269 and ecx, 3
2270 jz short done
2271
2272cvt2loop:
2273 mov ax, [esi]
2274 mov dx, ax
2275
2276 add esi, 2
2277 and dx, 001Fh
2278
2279 shl ax, 1
2280 add edi, 2
2281
2282 and ax, 0FFC0h
2283 or ax, dx
2284
2285 mov [edi-2], ax
2286 loop cvt2loop
2287
2288done:
2289 pop edi
2290 pop esi
2291 ret
2292
2293DDrawRGB555to565MMX ENDP
2294
2295
2296; void _Optlink DDrawRGB565to555MMX(WORD *dest, WORD *src, ULONG num);
2297
2298 PUBLIC DDrawRGB565to555MMX
2299
2300DDrawRGB565to555MMX PROC NEAR
2301 push esi
2302 push edi
2303 cld
2304
2305 cmp ecx, 0
2306 jz done
2307
2308 push ecx
2309
2310 mov edi, eax ; _Optlink arg1 = EAX
2311 mov esi, edx ; _Optlink arg2 = EDX
2312 shr ecx, 3 ; _Optlink arg3 = ECX
2313 jz lastpixels
2314
2315 sub esp, 108
2316 fsaved dword ptr [esp]
2317
2318 movq mm2, qword ptr and1mask ; 0001F001F001F001Fh
2319 movq mm3, qword ptr and2mask565 ; FFE07FE007FE07FE0h
2320
2321cvt:
2322 movq mm0, qword ptr [esi]
2323 add edi, 16
2324
2325 movq mm4, qword ptr [esi+8]
2326 movq mm1, mm0
2327
2328 movq mm5, mm4
2329 psrlq mm0, 1
2330
2331 psrlq mm4, 1
2332 pand mm1, mm2
2333
2334 pand mm0, mm3
2335 pand mm5, mm2
2336
2337 pand mm4, mm3
2338 por mm0, mm1
2339
2340 por mm4, mm5
2341 add esi, 16
2342
2343 movq qword ptr [edi-16], mm0
2344 dec ecx
2345
2346 movq qword ptr [edi-8], mm4
2347 jnz cvt
2348
2349 nop
2350 nop
2351
2352 frstord dword ptr [esp]
2353 add esp, 108
2354
2355lastpixels:
2356 pop ecx
2357 and ecx, 3
2358 jz short done
2359
2360cvt2loop:
2361 mov ax, [esi]
2362 mov dx, ax
2363
2364 add esi, 2
2365 and dx, 001Fh
2366
2367 shr ax, 1
2368 add edi, 2
2369
2370 and ax, 0FFE0h
2371 or ax, dx
2372
2373 mov [edi-2], ax
2374 loop cvt2loop
2375
2376done:
2377 pop edi
2378 pop esi
2379 ret
2380
2381DDrawRGB565to555MMX ENDP
2382
2383CODE32 ENDS
2384
2385 END
Note: See TracBrowser for help on using the repository browser.