; * This file is part of uniaud.dll. ; * ; * Copyright (c) 2010 Mensys BV ; * Copyright (c) 2007 Vlad Stelmahovsky aka Vladest ; * ; * This library is free software: you can redistribute it and/or modify ; * it under the terms of the GNU Lesser General Public License as ; * published by the Free Software Foundation, either version 3 of ; * the License, or (at your option) any later version. ; * ; * This library is distributed in the hope that it will be useful, ; * but WITHOUT ANY WARRANTY; without even the implied warranty of ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ; * GNU Lesser General Public License for more details. ; * ; * You should have received a copy of the GNU Lesser General Public ; * License and the GNU General Public License along with this library. ; * If not, see . CODE32 segment public use32 'CODE' public _memcpy public sse_memcpy public mmx_memcpy public mmx2_memcpy arg0 EQU 8 arg1 EQU 12 arg2 EQU 16 .686 .XMM3 _memcpy proc near push ebp mov ebp, esp push edi push esi sub esp, 0Ch cmp dword ptr [ebp+arg2], 3 ja f0 mov edi, [ebp+arg0] mov esi, [ebp+arg1] mov ecx, [ebp+arg2] rep movsb mov [ebp+arg0], edi jmp memcpy_done align 4 f0: mov ecx, [ebp+arg2] shr ecx, 2 mov edi, [ebp+arg0] mov esi, [ebp+arg1] mov eax, [ebp+arg2] rep movsd test al, 2 jz f1 movsw f1: test al, 1 jz f2 movsb f2: mov eax, ecx mov [ebp-0Ch], eax mov eax, edi mov [ebp-10h], eax mov eax, esi mov [ebp-14h], eax memcpy_done: mov eax, [ebp+arg0] add esp, 0Ch pop esi pop edi pop ebp ret _memcpy endp sse_memcpy proc near push ebp mov ebp, esp push edi push esi sub esp, 10h mov eax, [ebp+arg0] mov [ebp-0Ch], eax mov eax, [ebp+arg1] prefetchnta [eax] prefetchnta [eax+40h] prefetchnta [eax+80h] prefetchnta [eax+0C0h] prefetchnta [eax+100h] cmp dword ptr [ebp+arg2], 3Fh ; MIN_LEN jae sse_1 jmp sse_9 sse_1: mov eax, [ebp+arg0] and eax, 0Fh mov [ebp-14h], eax cmp dword ptr [ebp-14h], 0 jz sse_2 mov eax, 10h sub eax, [ebp-14h] mov [ebp-14h], eax lea eax, [ebp+arg2] mov edx, [ebp-14h] sub [eax], edx mov edi, [ebp+arg0] mov esi, [ebp+arg1] mov ecx, [ebp-14h] rep movsb mov eax, edi mov [ebp+arg0], eax mov eax, esi mov [ebp+arg1], eax sse_2: mov eax, [ebp+arg2] shr eax, 6 mov [ebp-10h], eax lea eax, [ebp+arg2] and dword ptr [eax], 3Fh mov eax, [ebp+arg1] and eax, 0Fh test eax, eax jz sse_5 sse_3: cmp dword ptr [ebp-10h], 0 jnz sse_4 jmp sse_8 sse_4: mov edx, [ebp+arg1] mov eax, [ebp+arg0] prefetchnta [edx+140h] movups xmm0, [edx] movups xmm1, [edx+10h] movups xmm2, [edx+20h] movups xmm3, [edx+30h] movntps [eax], xmm0 movntps [eax+10h], xmm1 movntps [eax+20h], xmm2 movntps [eax+30h], xmm3 lea eax, [ebp+arg1] add dword ptr [eax], 40h add dword ptr [ebp+arg0], 40h lea eax, [ebp-10h] dec dword ptr [eax] jmp sse_3 sse_5: lea esi, [esi+0] sse_6: cmp dword ptr [ebp-10h], 0 jnz sse_7 jmp sse_8 sse_7: mov edx, [ebp+arg1] mov eax, [ebp+arg0] prefetchnta [edx+140h] movaps xmm0, [edx] movaps xmm1, [edx+10h] movaps xmm2, [edx+20h] movaps xmm3, [edx+30h] movntps [eax], xmm0 movntps [eax+10h], xmm1 movntps [eax+20h], xmm2 movntps [eax+30h], xmm3 lea eax, [ebp+arg1] add dword ptr [eax], 40h add dword ptr [ebp+arg0], 40h lea eax, [ebp-10h] dec dword ptr [eax] jmp sse_6 sse_8: sfence emms sse_9: cmp dword ptr [ebp+arg2], 0 jz sse_10 sub esp, 4 push dword ptr [ebp+arg2] push dword ptr [ebp+arg1] push dword ptr [ebp+arg0] call _memcpy add esp, 10h sse_10: mov eax, [ebp-0Ch] lea esp, [ebp-8] pop esi pop edi pop ebp ret sse_memcpy endp mmx_memcpy proc near push ebp mov ebp, esp push edi push esi sub esp, 10h mov eax, [ebp+arg0] mov [ebp-0Ch], eax cmp dword ptr [ebp+arg2], 7FFh ; MMX1_MIN_LEN jae mmx_1 jmp mmx_6 mmx_1: mov eax, [ebp+arg0] and eax, 7 mov [ebp-14h], eax cmp dword ptr [ebp-14h], 0 jz mmx_2 mov eax, 8 sub eax, [ebp-14h] mov [ebp-14h], eax lea eax, [ebp+arg2] mov edx, [ebp-14h] sub [eax], edx mov edi, [ebp+arg0] mov esi, [ebp+arg1] mov ecx, [ebp-14h] rep movsb mov eax, edi mov [ebp+arg0], eax mov eax, esi mov [ebp+arg1], eax mmx_2: mov eax, [ebp+arg2] shr eax, 6 mov [ebp-10h], eax lea eax, [ebp+arg2] and dword ptr [eax], 3Fh lea esi, [esi+0] mmx_3: cmp dword ptr [ebp-10h], 0 jnz mmx_4 jmp mmx_5 mmx_4: mov edx, [ebp+arg1] mov eax, [ebp+arg0] movq mm0, qword ptr [edx] movq mm1, qword ptr [edx+8] movq mm2, qword ptr [edx+10h] movq mm3, qword ptr [edx+18h] movq mm4, qword ptr [edx+20h] movq mm5, qword ptr [edx+28h] movq mm6, qword ptr [edx+30h] movq mm7, qword ptr [edx+38h] movq qword ptr [eax], mm0 movq qword ptr [eax+8], mm1 movq qword ptr [eax+10h], mm2 movq qword ptr [eax+18h], mm3 movq qword ptr [eax+20h], mm4 movq qword ptr [eax+28h], mm5 movq qword ptr [eax+30h], mm6 movq qword ptr [eax+38h], mm7 lea eax, [ebp+arg1] add dword ptr [eax], 40h add dword ptr [ebp+arg0], 40h lea eax, [ebp-10h] dec dword ptr [eax] jmp mmx_3 align 4 mmx_5: emms mmx_6: cmp dword ptr [ebp+arg2], 0 jz mmx_7 sub esp, 4 push dword ptr [ebp+arg2] push dword ptr [ebp+arg1] push dword ptr [ebp+arg0] call _memcpy add esp, 10h mmx_7: mov eax, [ebp-0Ch] lea esp, [ebp-8] pop esi pop edi pop ebp ret mmx_memcpy endp mmx2_memcpy proc near push ebp mov ebp, esp push edi push esi sub esp, 10h mov eax, [ebp+arg0] mov [ebp-0Ch], eax mov eax, [ebp+arg1] prefetchnta [eax] prefetchnta [eax+40h] prefetchnta [eax+80h] prefetchnta [eax+0C0h] prefetchnta [eax+100h] cmp dword ptr [ebp+arg2], 3Fh ; MIN_LEN jae mmx2_1 jmp mmx2_6 mmx2_1: mov eax, [ebp+arg0] and eax, 7 mov [ebp-14h], eax cmp dword ptr [ebp-14h], 0 jz mmx2_2 mov eax, 8 sub eax, [ebp-14h] mov [ebp-14h], eax lea eax, [ebp+arg2] mov edx, [ebp-14h] sub [eax], edx mov edi, [ebp+arg0] mov esi, [ebp+arg1] mov ecx, [ebp-14h] rep movsb mov eax, edi mov [ebp+arg0], eax mov eax, esi mov [ebp+arg1], eax mmx2_2: mov eax, [ebp+arg2] shr eax, 6 mov [ebp-10h], eax lea eax, [ebp+arg2] and dword ptr [eax], 3Fh lea esi, [esi+0] mmx2_3: cmp dword ptr [ebp-10h], 0 jnz mmx2_4 jmp mmx2_5 mmx2_4: mov edx, [ebp+arg1] mov eax, [ebp+arg0] prefetchnta [edx+140h] movq mm0, qword ptr [edx] movq mm1, qword ptr [edx+8] movq mm2, qword ptr [edx+10h] movq mm3, qword ptr [edx+18h] movq mm4, qword ptr [edx+20h] movq mm5, qword ptr [edx+28h] movq mm6, qword ptr [edx+30h] movq mm7, qword ptr [edx+38h] movntq qword ptr [eax], mm0 movntq qword ptr [eax+8], mm1 movntq qword ptr [eax+10h], mm2 movntq qword ptr [eax+18h], mm3 movntq qword ptr [eax+20h], mm4 movntq qword ptr [eax+28h], mm5 movntq qword ptr [eax+30h], mm6 movntq qword ptr [eax+38h], mm7 lea eax, [ebp+arg1] add dword ptr [eax], 40h add dword ptr [ebp+arg0], 40h lea eax, [ebp-10h] dec dword ptr [eax] jmp mmx2_3 mmx2_5: sfence emms mmx2_6: cmp dword ptr [ebp+arg2], 0 jz short mmx2_7 sub esp, 4 push dword ptr [ebp+arg2] push dword ptr [ebp+arg1] push dword ptr [ebp+arg0] call _memcpy add esp, 10h mmx2_7: mov eax, [ebp-0Ch] lea esp, [ebp-8] pop esi pop edi pop ebp ret mmx2_memcpy endp CODE32 ends END