- Timestamp:
- Sep 25, 2000, 8:53:45 PM (25 years ago)
- Location:
- trunk/src/ddraw
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/ddraw/asmutil.asm
r2638 r4322 1 ; $Id: asmutil.asm,v 1. 6 2000-02-04 19:31:26 hughExp $1 ; $Id: asmutil.asm,v 1.7 2000-09-25 18:53:45 mike Exp $ 2 2 3 3 ; … … 57 57 push edx 58 58 59 mov edi, dword ptr [ebp+8] ;dest59 mov edi, dword ptr [ebp+8] ;dest 60 60 mov esi, dword ptr [ebp+12] ;src 61 61 mov ecx, dword ptr [ebp+20] ;linesize … … 71 71 or edx, eax ; edx now contains the colorkey in each byte 72 72 shr ecx, 2 ;linesize in dwords 73 jz blitremain ; less then 4 bytes73 jz blitremain ; less then 4 bytes 74 74 jmp blitStart 75 75 blitloop: … … 79 79 mov ebx, dword ptr [esi] 80 80 mov eax, dword ptr [edi] 81 cmp ebx, edx ; All 4 bytes transparent?81 cmp ebx, edx ; All 4 bytes transparent? 82 82 jz TTTT 83 cmp bx, dx ; lower 2 bytes transparent?83 cmp bx, dx ; lower 2 bytes transparent? 84 84 jz XXTT 85 cmp bl, dl ; lower byte trans 85 cmp bl, dl ; lower byte transparent? 86 86 jz XXOT 87 87 mov al, bl … … 93 93 ror eax, 16 94 94 ror ebx, 16 95 cmp bx, dx95 cmp bx, dx 96 96 jz skipbyte4 97 97 cmp bl, dl … … 99 99 mov al, bl 100 100 skipbyte3: 101 cmp bh, d l101 cmp bh, dh 102 102 je skipbyte4 103 103 mov ah, bh … … 107 107 TTTT: 108 108 dec ecx 109 j zblitloop109 jnz blitloop 110 110 blitremain: 111 111 pop ecx … … 114 114 jz blit2 115 115 test ecx,ecx 116 jz endofblit117 mov eax, dword ptr [esi]116 jz endofblit 117 mov eax, dword ptr [esi] 118 118 mov ebx, dword ptr [edi] 119 rol eax, 8120 rol ebx, 8121 119 cmp al,dl 122 jz endofblit120 jz endofblit 123 121 mov bl,al 124 ror ebx, 8125 122 mov dword ptr[edi],ebx 126 123 jmp endofblit 127 124 128 125 blit3: 129 mov eax, dword ptr [esi]126 mov eax, dword ptr [esi] 130 127 mov ebx, dword ptr [edi] 131 ror eax, 16132 ror ebx, 16133 128 cmp ax, dx 134 jz TTX135 cmp ah, dh136 jz TXX137 mov bh,ah129 jz TTX 130 cmp ah, dh 131 jz TXX 132 mov bh, ah 138 133 TXX: 139 cmp al,dl140 jz TTX141 mov bl,al134 cmp al, dl 135 jz TTX 136 mov bl, al 142 137 TTX: 143 138 ror eax, 16 144 139 ror ebx, 16 145 cmp a h,dh146 jz Cpyback147 mov dh,ah140 cmp al, dl 141 jz Cpyback 142 mov bl, al 148 143 Cpyback: 144 ror ebx, 16 149 145 mov dword ptr [edi], ebx 150 jmp endofblit146 jmp endofblit 151 147 152 148 blit2: 153 mov eax, dword ptr [esi]149 mov eax, dword ptr [esi] 154 150 mov ebx, dword ptr [edi] 155 ror eax, 16156 ror ebx, 16157 151 cmp ax, dx ; both bytes transparent ? 158 jz endofblit159 cmp ah, dh160 jz TX161 mov bh,ah152 jz endofblit 153 cmp ah, dh 154 jz TX 155 mov bh, ah 162 156 TX: 163 cmp al,dl164 jz OT165 mov bl,al157 cmp al, dl 158 jz OT 159 mov bl, al 166 160 OT: 167 ror ebx, 16168 161 mov dword ptr[edi], ebx 169 162 … … 202 195 push edx 203 196 204 mov edi, dword ptr [ebp+8] ;dest197 mov edi, dword ptr [ebp+8] ;dest 205 198 mov esi, dword ptr [ebp+12] ;src 206 199 mov ecx, dword ptr [ebp+20] ;linesize … … 212 205 or edx,eax ; create dwColorKey 213 206 shr ecx, 1 ; linesize in dwords 214 jz OnePixel 207 jz OnePixel ; FIXME: BUG if ecx was really 1! 215 208 216 209 blitloop16: 217 210 mov eax, dword ptr [esi] 218 211 mov ebx, dword ptr [edi] 219 add esi , 4 220 cmp eax, edx ; are both pixel transparent? 221 je LoopUp ; Yes, then Jump to loopend 222 cmp ax,dx ; Is lower pixel transparent 223 je DrawOT ; Yes So We got OT (OPAQUE/Transparent 224 mov bx,ax ; No so copy the lower pixel 225 ror eax,16 ; 226 cmp ax,dx ; Is higher pixel transparent 227 je CopyBack ; 212 add esi, 4 213 cmp eax, edx ; are both pixel transparent? 214 je LoopUp ; Yes, then Jump to loopend 215 cmp ax, dx ; Is lower pixel transparent 216 je DrawOT ; Yes So We got OT (OPAQUE/Transparent 217 mov bx, ax ; No so copy the lower pixel 228 218 DrawOT: 219 ror eax, 16 ; 220 cmp ax, dx ; Is higher pixel transparent 221 je CopyBack ; 222 mov bx, ax 223 CopyBack: 229 224 ror ebx,16 230 mov bx,ax231 ror ebx,16232 CopyBack:233 225 mov dword ptr[edi], ebx ; copy back the result in ebx 234 226 LoopUp: 235 227 mov ebx, dword ptr [ebp+20] ; V load this this in case we are done 236 add edi , 4; U228 add edi, 4 ; U 237 229 OnePixel: 238 230 dec ecx 239 jnz blitloop16231 jnz blitloop16 240 232 test ebx, 1 ; Do we have an odd linesize 241 jz endofblit16233 jz endofblit16 242 234 mov eax, dword ptr [esi] 243 235 mov ebx, dword ptr [edi] 244 ror eax,16245 ror ebx,16246 236 cmp ax, dx 247 je endofblit16 ; last pixel is transparent 248 mov bx,ax ; No so copy the lower pixel 249 ror ebx,16 250 mov dword ptr[edi], ebx ; copy back the result in ebx 237 je endofblit16 ; last pixel is transparent 238 mov bx,ax ; No so copy the lower pixel 239 mov dword ptr [edi], ebx ; copy back the result in ebx 251 240 252 241 endofblit16: … … 276 265 push edx 277 266 278 mov edx, [ebp+16] ;colorkey279 mov edi, [ebp+8];dest280 mov dh,dl281 mov esi, [ebp+12] ;src282 mov ax,dx283 mov ecx, dword ptr [ebp+20] ;linesize284 sh redx,16285 mov dx,ax286 movd mm4,edx287 movd mm5,edx267 mov edx, [ebp+16] ;colorkey (in dl) 268 mov edi, [ebp+8] ;dest 269 mov esi, [ebp+12] ;src 270 mov ecx, dword ptr [ebp+20] ;linesize 271 mov dh,dl 272 mov eax,edx 273 shl edx,16 274 mov dx,ax 275 movd mm4,edx 276 movd mm5,edx 288 277 psllq mm4,32 289 por mm4,mm5290 shr ecx,3278 por mm4,mm5 279 shr ecx,3 291 280 jz BltRemain8 292 281 293 282 bltLoopMMX8: 294 movq mm0, [esi] ; get source qword295 movq mm1, [edi] ; get dest qword296 movq mm2, mm0; copy source297 PCMPEQB mm0,mm4 ; create mask298 pand mm1, mm0; mask dest299 pandn mm 2,mm0 ; masksource300 por mm1, mm2; or them283 movq mm0, [esi] ; get source qword 284 movq mm1, [edi] ; get dest qword 285 movq mm2, mm0 ; copy source 286 pcmpeqb mm0, mm4 ; create mask 287 pand mm1, mm0 ; mask dest 288 pandn mm0, mm2 ; NOT mask AND source 289 por mm1, mm0 ; or them 301 290 movq qword ptr [edi], mm1 ; write back result 302 291 add esi, 8 … … 326 315 ; 327 316 blt7MMX8: 328 movd mm0, dword ptr[esi] 329 mov ax, word ptr[esi+4] 330 mov bx, word ptr[edi+4] 331 movd mm1, dword ptr[edi] 332 psllq mm0,32 333 shl eax,8 334 shl ebx,8 335 mov al, byte ptr[esi+6] 336 mov bl, byte ptr[edi+6] 337 movd mm0,eax 338 psllq mm1,32 339 movd mm1,ebx 340 movq mm2,mm0 ; copy source 341 PCMPEQB mm0,mm4 ; create mask 342 pand mm1,mm0 ; mask dest 343 pandn mm2,mm0 ; mask source 344 por mm1,mm2 ; or them 317 movd mm0, dword ptr[esi] 318 mov ax, word ptr[esi+4] 319 mov bx, word ptr[edi+4] 320 movd mm1, dword ptr[edi] 321 psllq mm0, 32 322 shl eax, 8 323 shl ebx, 8 324 mov al, byte ptr[esi+6] 325 mov bl, byte ptr[edi+6] 326 movd mm5, eax 327 por mm0, mm5 328 psllq mm1, 32 329 movd mm6, ebx 330 por mm1, mm6 331 movq mm2, mm0 ; copy source 332 pcmpeqb mm0, mm4 ; create mask 333 pand mm1, mm0 ; mask dest 334 pandn mm0, mm2 ; mask source 335 por mm1, mm0 ; or them 345 336 movd eax, mm1 346 337 psrlq mm1,32 347 mov byte ptr[edi+6], al338 mov byte ptr[edi+6], al 348 339 movd dword ptr[edi], mm1 349 shr eax,8350 mov word ptr[edi+4],ax351 jmp bltEndMMX8340 shr eax,8 341 mov word ptr[edi+4], ax 342 jmp bltEndMMX8 352 343 353 344 blt6MMX8: 354 movd mm0, dword ptr[esi] 355 mov ax, word ptr[esi+4] 356 mov bx, word ptr[edi+4] 357 movd mm1, dword ptr[edi] 358 psllq mm0,32 359 psllq mm1,32 360 movd mm0,eax 361 movd mm1,ebx 362 movq mm2,mm0 ; copy source 363 pcmpeqb mm0,mm4 ; create mask 364 pand mm1,mm0 ; mask dest 365 pandn mm2,mm0 ; mask source 366 por mm1,mm2 ; or them 345 movd mm0, dword ptr[esi] 346 mov ax, word ptr[esi+4] 347 mov bx, word ptr[edi+4] 348 movd mm1, dword ptr[edi] 349 psllq mm0, 32 350 psllq mm1, 32 351 movd mm5, eax 352 por mm0, mm5 353 movd mm6, ebx 354 por mm1, mm6 355 movq mm2, mm0 ; copy source 356 pcmpeqb mm0, mm4 ; create mask 357 pand mm1, mm0 ; mask dest 358 pandn mm0, mm2 ; mask source 359 por mm1, mm0 ; or them 367 360 movd eax, mm1 368 361 psrlq mm1,32 369 mov word ptr[edi+4],ax362 mov word ptr[edi+4], ax 370 363 movd dword ptr[edi], mm1 371 jmp bltEndMMX8364 jmp bltEndMMX8 372 365 373 366 blt5MMX8: 374 367 movd mm0, dword ptr[esi] 375 368 movd mm1, dword ptr[edi] 376 movq mm2, mm0; copy source377 pcmpeqb mm0, mm4 ; create mask378 pand mm1, mm0; mask dest379 add esi, 4;380 pandn mm 2,mm0; mask source381 por mm1, mm2; or them369 movq mm2, mm0 ; copy source 370 pcmpeqb mm0, mm4 ; create mask 371 pand mm1, mm0 ; mask dest 372 add esi, 4 373 pandn mm0, mm2 ; mask source 374 por mm1, mm0 ; or them 382 375 movd dword ptr[edi], mm1 383 add edi,4384 jmp blt1MMX8376 add edi,4 377 jmp blt1MMX8 385 378 386 379 blt4MMX8: 387 380 movd mm0, dword ptr[esi] 388 381 movd mm1, dword ptr[edi] 389 movq mm2,mm0 ; copy source382 movq mm2,mm0 ; copy source 390 383 pcmpeqb mm0,mm4 ; create mask 391 pand mm1,mm0 ; mask dest392 pandn mm 2,mm0; mask source393 por mm1,mm 2; or them394 movd dword ptr [edi], mm1 ; write back result395 jmp bltEndMMX8384 pand mm1,mm0 ; mask dest 385 pandn mm0,mm2 ; mask source 386 por mm1,mm0 ; or them 387 movd dword ptr [edi], mm1 ; write back result 388 jmp bltEndMMX8 396 389 ; 397 390 ; loading a dword into mm0/mm1 might be faster for 3-2... 398 391 ; 399 392 blt3MMX8: 400 mov ax 401 mov bx 393 mov ax, word ptr [esi] 394 mov bx, word ptr [edi] 402 395 shl eax,8 ; 3 Pixel left to blit 403 396 shl ebx,8 ; so shift the buffers … … 408 401 movq mm2,mm0 409 402 pcmpeqb mm0,mm4 ; create mask 410 pand mm1,mm0 ; mask dest411 pandn mm 2,mm0; mask source412 por mm1,mm 2; or them413 movd eax, mm1; write back result414 mov byte ptr[edi+2],al415 shr eax,8416 mov word ptr[edi],ax403 pand mm1,mm0 ; mask dest 404 pandn mm0,mm2 ; mask source 405 por mm1,mm0 ; or them 406 movd eax, mm1 ; write back result 407 mov byte ptr[edi+2], al 408 shr eax, 8 409 mov word ptr[edi], ax 417 410 jmp bltEndMMX8 418 411 419 412 blt2MMX8: 420 413 mov al, byte ptr [esi] 421 cmp al,dl422 je blt1aMMX8414 cmp al, dl 415 je blt1aMMX8 423 416 mov byte ptr [edi], al 424 mov bl, byte ptr [esi+1]425 cmp bl,dl426 jebltEndMMX8427 mov byte ptr [edi+1], bl428 jmpbltEndMMX8417 ; mov bl, byte ptr [esi+1] 418 ; cmp bl, dl 419 ; je bltEndMMX8 420 ; mov byte ptr [edi+1], bl 421 ; jmp bltEndMMX8 429 422 blt1aMMX8: 430 add esi, 1431 add edi, 1423 add esi, 1 424 add edi, 1 432 425 blt1MMX8: 433 426 mov al, byte ptr [esi] 434 cmp al,dl435 je bltEndMMX8427 cmp al, dl 428 je bltEndMMX8 436 429 mov byte ptr [edi], al 437 430 … … 461 454 mov edx, dword ptr [ebp+16] ; colorkey 462 455 mov edi, dword ptr [ebp+8] ; dest 463 mov eax, dword ptr [ebp+16] ; colorkey464 shr edx,16;465 456 mov ecx, dword ptr [ebp+20] ; linesize in pixel! 466 mov dx,ax ; extend colorkey to 32 Bit 457 458 mov eax, edx 459 shl edx, 16; 460 mov dx, ax ; extend colorKey to 32 bit 461 467 462 mov esi, dword ptr [ebp+12] ; src 468 463 mov eax, ecx ; copy of linesize … … 476 471 477 472 bltLoopMMX16: 478 movq mm0,qword ptr [esi] ; get source dword479 movq mm1,qword ptr [edi] ; get destination480 movq mm2,mm0 ; copy source481 pcmpeqw mm0,mm4 ; create mask482 pand mm1,mm0 ; mask dest483 add esi, 8484 pandn mm 2,mm0 ; masksource485 por mm1,mm 2; or them473 movq mm0,qword ptr [esi] ; get source dword 474 movq mm1,qword ptr [edi] ; get destination 475 movq mm2,mm0 ; copy source 476 pcmpeqw mm0,mm4 ; create mask in mm0 477 pand mm1,mm0 ; mask dest 478 add esi, 8 ; point to next source qword 479 pandn mm0,mm2 ; NOT mask AND source 480 por mm1,mm0 ; or them 486 481 movq qword ptr [edi], mm1 ; write back result 487 482 add edi, 8 … … 505 500 movd mm0, dword ptr[esi] 506 501 movd mm1, dword ptr[edi] 507 movq mm2,mm0 ; copy source502 movq mm2,mm0 ; copy source 508 503 add esi,4 509 504 pcmpeqw mm0,mm4 ; create mask 16 bit 510 pand mm1,mm0 ; mask dest511 pandn mm2,mm0; mask source505 pand mm1,mm0 ; mask dest 506 pandn mm0,mm2 ; mask source 512 507 add edi,4 513 por mm1,mm 2; or them508 por mm1,mm0 ; or them 514 509 movd dword ptr[edi-4], mm1 515 510 jmp blt1MMX16 … … 521 516 pcmpeqw mm0,mm4 ; create mask 16 bit 522 517 pand mm1,mm0 ; mask dest 523 pandn mm 2,mm0; mask source524 por mm1,mm 2; or them518 pandn mm0,mm2 ; mask source 519 por mm1,mm0 ; or them 525 520 movd dword ptr [edi], mm1 ; write back result 526 521 jmp bltEndMMX16 … … 1942 1937 inc eax 1943 1938 Return: 1939 ; mov eax, 0 ; pretend no MMX is available 1944 1940 pop edx 1945 1941 pop ecx -
trunk/src/ddraw/new/asmutil.asm
r3345 r4322 1 ; $Id: asmutil.asm,v 1. 1 2000-04-07 18:21:09mike Exp $1 ; $Id: asmutil.asm,v 1.2 2000-09-25 18:53:45 mike Exp $ 2 2 3 3 ; … … 57 57 push edx 58 58 59 mov edi, dword ptr [ebp+8] ;dest59 mov edi, dword ptr [ebp+8] ;dest 60 60 mov esi, dword ptr [ebp+12] ;src 61 61 mov ecx, dword ptr [ebp+20] ;linesize … … 71 71 or edx, eax ; edx now contains the colorkey in each byte 72 72 shr ecx, 2 ;linesize in dwords 73 jz blitremain ; less then 4 bytes73 jz blitremain ; less then 4 bytes 74 74 jmp blitStart 75 75 blitloop: … … 79 79 mov ebx, dword ptr [esi] 80 80 mov eax, dword ptr [edi] 81 cmp ebx, edx ; All 4 bytes transparent?81 cmp ebx, edx ; All 4 bytes transparent? 82 82 jz TTTT 83 cmp bx, dx ; lower 2 bytes transparent?83 cmp bx, dx ; lower 2 bytes transparent? 84 84 jz XXTT 85 cmp bl, dl ; lower byte trans 85 cmp bl, dl ; lower byte transparent? 86 86 jz XXOT 87 87 mov al, bl … … 93 93 ror eax, 16 94 94 ror ebx, 16 95 cmp bx, dx95 cmp bx, dx 96 96 jz skipbyte4 97 97 cmp bl, dl … … 99 99 mov al, bl 100 100 skipbyte3: 101 cmp bh, d l101 cmp bh, dh 102 102 je skipbyte4 103 103 mov ah, bh … … 107 107 TTTT: 108 108 dec ecx 109 j zblitloop109 jnz blitloop 110 110 blitremain: 111 111 pop ecx … … 114 114 jz blit2 115 115 test ecx,ecx 116 jz endofblit117 mov eax, dword ptr [esi]116 jz endofblit 117 mov eax, dword ptr [esi] 118 118 mov ebx, dword ptr [edi] 119 rol eax, 8120 rol ebx, 8121 119 cmp al,dl 122 jz endofblit120 jz endofblit 123 121 mov bl,al 124 ror ebx, 8125 122 mov dword ptr[edi],ebx 126 123 jmp endofblit 127 124 128 125 blit3: 129 mov eax, dword ptr [esi]126 mov eax, dword ptr [esi] 130 127 mov ebx, dword ptr [edi] 131 ror eax, 16132 ror ebx, 16133 128 cmp ax, dx 134 jz TTX135 cmp ah, dh136 jz TXX137 mov bh,ah129 jz TTX 130 cmp ah, dh 131 jz TXX 132 mov bh, ah 138 133 TXX: 139 cmp al,dl140 jz TTX141 mov bl,al134 cmp al, dl 135 jz TTX 136 mov bl, al 142 137 TTX: 143 138 ror eax, 16 144 139 ror ebx, 16 145 cmp a h,dh146 jz Cpyback147 mov dh,ah140 cmp al, dl 141 jz Cpyback 142 mov bl, al 148 143 Cpyback: 144 ror ebx, 16 149 145 mov dword ptr [edi], ebx 150 jmp endofblit146 jmp endofblit 151 147 152 148 blit2: 153 mov eax, dword ptr [esi]149 mov eax, dword ptr [esi] 154 150 mov ebx, dword ptr [edi] 155 ror eax, 16156 ror ebx, 16157 151 cmp ax, dx ; both bytes transparent ? 158 jz endofblit159 cmp ah, dh160 jz TX161 mov bh,ah152 jz endofblit 153 cmp ah, dh 154 jz TX 155 mov bh, ah 162 156 TX: 163 cmp al,dl164 jz OT165 mov bl,al157 cmp al, dl 158 jz OT 159 mov bl, al 166 160 OT: 167 ror ebx, 16168 161 mov dword ptr[edi], ebx 169 162 … … 202 195 push edx 203 196 204 mov edi, dword ptr [ebp+8] ;dest197 mov edi, dword ptr [ebp+8] ;dest 205 198 mov esi, dword ptr [ebp+12] ;src 206 199 mov ecx, dword ptr [ebp+20] ;linesize … … 212 205 or edx,eax ; create dwColorKey 213 206 shr ecx, 1 ; linesize in dwords 214 jz OnePixel 207 jz OnePixel ; FIXME: BUG if ecx was really 1! 215 208 216 209 blitloop16: 217 210 mov eax, dword ptr [esi] 218 211 mov ebx, dword ptr [edi] 219 add esi , 4 220 cmp eax, edx ; are both pixel transparent? 221 je LoopUp ; Yes, then Jump to loopend 222 cmp ax,dx ; Is lower pixel transparent 223 je DrawOT ; Yes So We got OT (OPAQUE/Transparent 224 mov bx,ax ; No so copy the lower pixel 225 ror eax,16 ; 226 cmp ax,dx ; Is higher pixel transparent 227 je CopyBack ; 212 add esi, 4 213 cmp eax, edx ; are both pixel transparent? 214 je LoopUp ; Yes, then Jump to loopend 215 cmp ax, dx ; Is lower pixel transparent 216 je DrawOT ; Yes So We got OT (OPAQUE/Transparent 217 mov bx, ax ; No so copy the lower pixel 228 218 DrawOT: 219 ror eax, 16 ; 220 cmp ax, dx ; Is higher pixel transparent 221 je CopyBack ; 222 mov bx, ax 223 CopyBack: 229 224 ror ebx,16 230 mov bx,ax231 ror ebx,16232 CopyBack:233 225 mov dword ptr[edi], ebx ; copy back the result in ebx 234 226 LoopUp: 235 227 mov ebx, dword ptr [ebp+20] ; V load this this in case we are done 236 add edi , 4; U228 add edi, 4 ; U 237 229 OnePixel: 238 230 dec ecx 239 jnz blitloop16231 jnz blitloop16 240 232 test ebx, 1 ; Do we have an odd linesize 241 jz endofblit16233 jz endofblit16 242 234 mov eax, dword ptr [esi] 243 235 mov ebx, dword ptr [edi] 244 ror eax,16245 ror ebx,16246 236 cmp ax, dx 247 je endofblit16 ; last pixel is transparent 248 mov bx,ax ; No so copy the lower pixel 249 ror ebx,16 250 mov dword ptr[edi], ebx ; copy back the result in ebx 237 je endofblit16 ; last pixel is transparent 238 mov bx,ax ; No so copy the lower pixel 239 mov dword ptr [edi], ebx ; copy back the result in ebx 251 240 252 241 endofblit16: … … 276 265 push edx 277 266 278 mov edx, [ebp+16] ;colorkey279 mov edi, [ebp+8];dest280 mov dh,dl281 mov esi, [ebp+12] ;src282 mov ax,dx283 mov ecx, dword ptr [ebp+20] ;linesize284 sh redx,16285 mov dx,ax286 movd mm4,edx287 movd mm5,edx267 mov edx, [ebp+16] ;colorkey (in dl) 268 mov edi, [ebp+8] ;dest 269 mov esi, [ebp+12] ;src 270 mov ecx, dword ptr [ebp+20] ;linesize 271 mov dh,dl 272 mov eax,edx 273 shl edx,16 274 mov dx,ax 275 movd mm4,edx 276 movd mm5,edx 288 277 psllq mm4,32 289 por mm4,mm5290 shr ecx,3278 por mm4,mm5 279 shr ecx,3 291 280 jz BltRemain8 292 281 293 282 bltLoopMMX8: 294 movq mm0, [esi] ; get source qword295 movq mm1, [edi] ; get dest qword296 movq mm2, mm0; copy source297 PCMPEQB mm0,mm4 ; create mask298 pand mm1, mm0; mask dest299 pandn mm 2,mm0 ; masksource300 por mm1, mm2; or them283 movq mm0, [esi] ; get source qword 284 movq mm1, [edi] ; get dest qword 285 movq mm2, mm0 ; copy source 286 pcmpeqb mm0, mm4 ; create mask 287 pand mm1, mm0 ; mask dest 288 pandn mm0, mm2 ; NOT mask AND source 289 por mm1, mm0 ; or them 301 290 movq qword ptr [edi], mm1 ; write back result 302 291 add esi, 8 … … 326 315 ; 327 316 blt7MMX8: 328 movd mm0, dword ptr[esi] 329 mov ax, word ptr[esi+4] 330 mov bx, word ptr[edi+4] 331 movd mm1, dword ptr[edi] 332 psllq mm0,32 333 shl eax,8 334 shl ebx,8 335 mov al, byte ptr[esi+6] 336 mov bl, byte ptr[edi+6] 337 movd mm0,eax 338 psllq mm1,32 339 movd mm1,ebx 340 movq mm2,mm0 ; copy source 341 PCMPEQB mm0,mm4 ; create mask 342 pand mm1,mm0 ; mask dest 343 pandn mm2,mm0 ; mask source 344 por mm1,mm2 ; or them 317 movd mm0, dword ptr[esi] 318 mov ax, word ptr[esi+4] 319 mov bx, word ptr[edi+4] 320 movd mm1, dword ptr[edi] 321 psllq mm0, 32 322 shl eax, 8 323 shl ebx, 8 324 mov al, byte ptr[esi+6] 325 mov bl, byte ptr[edi+6] 326 movd mm5, eax 327 por mm0, mm5 328 psllq mm1, 32 329 movd mm6, ebx 330 por mm1, mm6 331 movq mm2, mm0 ; copy source 332 pcmpeqb mm0, mm4 ; create mask 333 pand mm1, mm0 ; mask dest 334 pandn mm0, mm2 ; mask source 335 por mm1, mm0 ; or them 345 336 movd eax, mm1 346 337 psrlq mm1,32 347 mov byte ptr[edi+6], al338 mov byte ptr[edi+6], al 348 339 movd dword ptr[edi], mm1 349 shr eax,8350 mov word ptr[edi+4],ax351 jmp bltEndMMX8340 shr eax,8 341 mov word ptr[edi+4], ax 342 jmp bltEndMMX8 352 343 353 344 blt6MMX8: 354 movd mm0, dword ptr[esi] 355 mov ax, word ptr[esi+4] 356 mov bx, word ptr[edi+4] 357 movd mm1, dword ptr[edi] 358 psllq mm0,32 359 psllq mm1,32 360 movd mm0,eax 361 movd mm1,ebx 362 movq mm2,mm0 ; copy source 363 pcmpeqb mm0,mm4 ; create mask 364 pand mm1,mm0 ; mask dest 365 pandn mm2,mm0 ; mask source 366 por mm1,mm2 ; or them 345 movd mm0, dword ptr[esi] 346 mov ax, word ptr[esi+4] 347 mov bx, word ptr[edi+4] 348 movd mm1, dword ptr[edi] 349 psllq mm0, 32 350 psllq mm1, 32 351 movd mm5, eax 352 por mm0, mm5 353 movd mm6, ebx 354 por mm1, mm6 355 movq mm2, mm0 ; copy source 356 pcmpeqb mm0, mm4 ; create mask 357 pand mm1, mm0 ; mask dest 358 pandn mm0, mm2 ; mask source 359 por mm1, mm0 ; or them 367 360 movd eax, mm1 368 361 psrlq mm1,32 369 mov word ptr[edi+4],ax362 mov word ptr[edi+4], ax 370 363 movd dword ptr[edi], mm1 371 jmp bltEndMMX8364 jmp bltEndMMX8 372 365 373 366 blt5MMX8: 374 367 movd mm0, dword ptr[esi] 375 368 movd mm1, dword ptr[edi] 376 movq mm2, mm0; copy source377 pcmpeqb mm0, mm4 ; create mask378 pand mm1, mm0; mask dest379 add esi, 4;380 pandn mm 2,mm0; mask source381 por mm1, mm2; or them369 movq mm2, mm0 ; copy source 370 pcmpeqb mm0, mm4 ; create mask 371 pand mm1, mm0 ; mask dest 372 add esi, 4 373 pandn mm0, mm2 ; mask source 374 por mm1, mm0 ; or them 382 375 movd dword ptr[edi], mm1 383 add edi,4384 jmp blt1MMX8376 add edi,4 377 jmp blt1MMX8 385 378 386 379 blt4MMX8: 387 380 movd mm0, dword ptr[esi] 388 381 movd mm1, dword ptr[edi] 389 movq mm2,mm0 ; copy source382 movq mm2,mm0 ; copy source 390 383 pcmpeqb mm0,mm4 ; create mask 391 pand mm1,mm0 ; mask dest392 pandn mm 2,mm0; mask source393 por mm1,mm 2; or them394 movd dword ptr [edi], mm1 ; write back result395 jmp bltEndMMX8384 pand mm1,mm0 ; mask dest 385 pandn mm0,mm2 ; mask source 386 por mm1,mm0 ; or them 387 movd dword ptr [edi], mm1 ; write back result 388 jmp bltEndMMX8 396 389 ; 397 390 ; loading a dword into mm0/mm1 might be faster for 3-2... 398 391 ; 399 392 blt3MMX8: 400 mov ax 401 mov bx 393 mov ax, word ptr [esi] 394 mov bx, word ptr [edi] 402 395 shl eax,8 ; 3 Pixel left to blit 403 396 shl ebx,8 ; so shift the buffers … … 408 401 movq mm2,mm0 409 402 pcmpeqb mm0,mm4 ; create mask 410 pand mm1,mm0 ; mask dest411 pandn mm 2,mm0; mask source412 por mm1,mm 2; or them413 movd eax, mm1; write back result414 mov byte ptr[edi+2],al415 shr eax,8416 mov word ptr[edi],ax403 pand mm1,mm0 ; mask dest 404 pandn mm0,mm2 ; mask source 405 por mm1,mm0 ; or them 406 movd eax, mm1 ; write back result 407 mov byte ptr[edi+2], al 408 shr eax, 8 409 mov word ptr[edi], ax 417 410 jmp bltEndMMX8 418 411 419 412 blt2MMX8: 420 413 mov al, byte ptr [esi] 421 cmp al,dl422 je blt1aMMX8414 cmp al, dl 415 je blt1aMMX8 423 416 mov byte ptr [edi], al 424 mov bl, byte ptr [esi+1]425 cmp bl,dl426 jebltEndMMX8427 mov byte ptr [edi+1], bl428 jmpbltEndMMX8417 ; mov bl, byte ptr [esi+1] 418 ; cmp bl, dl 419 ; je bltEndMMX8 420 ; mov byte ptr [edi+1], bl 421 ; jmp bltEndMMX8 429 422 blt1aMMX8: 430 add esi, 1431 add edi, 1423 add esi, 1 424 add edi, 1 432 425 blt1MMX8: 433 426 mov al, byte ptr [esi] 434 cmp al,dl435 je bltEndMMX8427 cmp al, dl 428 je bltEndMMX8 436 429 mov byte ptr [edi], al 437 430 … … 461 454 mov edx, dword ptr [ebp+16] ; colorkey 462 455 mov edi, dword ptr [ebp+8] ; dest 463 mov eax, dword ptr [ebp+16] ; colorkey464 shr edx,16;465 456 mov ecx, dword ptr [ebp+20] ; linesize in pixel! 466 mov dx,ax ; extend colorkey to 32 Bit 457 458 mov eax, edx 459 shl edx, 16; 460 mov dx, ax ; extend colorKey to 32 bit 461 467 462 mov esi, dword ptr [ebp+12] ; src 468 463 mov eax, ecx ; copy of linesize … … 476 471 477 472 bltLoopMMX16: 478 movq mm0,qword ptr [esi] ; get source dword479 movq mm1,qword ptr [edi] ; get destination480 movq mm2,mm0 ; copy source481 pcmpeqw mm0,mm4 ; create mask482 pand mm1,mm0 ; mask dest483 add esi, 8484 pandn mm 2,mm0 ; masksource485 por mm1,mm 2; or them473 movq mm0,qword ptr [esi] ; get source dword 474 movq mm1,qword ptr [edi] ; get destination 475 movq mm2,mm0 ; copy source 476 pcmpeqw mm0,mm4 ; create mask in mm0 477 pand mm1,mm0 ; mask dest 478 add esi, 8 ; point to next source qword 479 pandn mm0,mm2 ; NOT mask AND source 480 por mm1,mm0 ; or them 486 481 movq qword ptr [edi], mm1 ; write back result 487 482 add edi, 8 … … 505 500 movd mm0, dword ptr[esi] 506 501 movd mm1, dword ptr[edi] 507 movq mm2,mm0 ; copy source502 movq mm2,mm0 ; copy source 508 503 add esi,4 509 504 pcmpeqw mm0,mm4 ; create mask 16 bit 510 pand mm1,mm0 ; mask dest511 pandn mm2,mm0; mask source505 pand mm1,mm0 ; mask dest 506 pandn mm0,mm2 ; mask source 512 507 add edi,4 513 por mm1,mm 2; or them508 por mm1,mm0 ; or them 514 509 movd dword ptr[edi-4], mm1 515 510 jmp blt1MMX16 … … 521 516 pcmpeqw mm0,mm4 ; create mask 16 bit 522 517 pand mm1,mm0 ; mask dest 523 pandn mm 2,mm0; mask source524 por mm1,mm 2; or them518 pandn mm0,mm2 ; mask source 519 por mm1,mm0 ; or them 525 520 movd dword ptr [edi], mm1 ; write back result 526 521 jmp bltEndMMX16 … … 1942 1937 inc eax 1943 1938 Return: 1939 ; mov eax, 0 ; pretend no MMX is available 1944 1940 pop edx 1945 1941 pop ecx
Note:
See TracChangeset
for help on using the changeset viewer.