Changeset 3586 for trunk/kStuff/kProfiler2/prfamd64msc.asm
- Timestamp:
- Sep 4, 2007, 10:43:53 PM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/kStuff/kProfiler2/prfamd64msc.asm
r3566 r3586 74 74 _penter: 75 75 ; save volatile register and get the time stamp. 76 push eax77 push edx76 push rax 77 push rdx 78 78 rdtsc 79 pushfd 80 push ecx 81 82 ; setting up the enter call frame (cdecl). 83 sub esp, 4 + 4 + 8 84 mov [esp + 0ch], edx ; Param 3 - the timestamp 85 mov [esp + 08h], eax 86 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us) 87 mov [esp + 04h], edx 88 mov eax, [esp + 20h] ; Param 1 - The function address 89 sub eax, 5 ; call instruction 90 mov [esp], eax 91 79 pushfq 80 push rcx 81 push r8 82 push r9 83 push r10 84 push r11 85 sub rsp, 30h ; rsp is aligned at this point (7 pushes). 86 ; reserve 20h for spill, and 8 bytes for ts. 87 88 ; setting up the enter call frame 89 mov r8d, edx 90 shl r8, 32 91 or r8, rax ; param 3 - the timestamp 92 mov [rsp + 20h], r8 ; save the tsc for later use. 93 lea rdx, [rsp + 7*8 + 30h] ; Param 2 - frame pointer (pointer to the return address of the function calling us) 94 mov rcx, [rdx] ; Param 1 - The function address 92 95 call KPRF_ENTER 93 96 jmp common_return_path … … 115 118 _pexit: 116 119 ; save volatile register and get the time stamp. 117 push eax118 push edx120 push rax 121 push rdx 119 122 rdtsc 120 pushfd 121 push ecx 122 123 ; setting up the leave call frame (cdecl). 124 sub esp, 4 + 4 + 8 125 mov [esp + 0ch], edx ; Param 3 - the timestamp 126 mov [esp + 08h], eax 127 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us) 128 mov [esp + 04h], edx 129 mov eax, [esp + 20h] ; Param 1 - Some address in the function. 130 sub eax, 5 ; call instruction 131 mov [esp], eax 132 123 pushfq 124 push rcx 125 push r8 126 push r9 127 push r10 128 push r11 129 sub rsp, 30h ; rsp is aligned at this point (7 pushes). 130 ; reserve 20h for spill, and 8 bytes for ts. 131 132 ; setting up the leave call frame. 133 mov r8d, edx 134 shl r8, 32 135 or r8, rax ; param 3 - the timestamp 136 mov [rsp + 20h], r8 ; save the tsc for later use. 137 lea rdx, [rsp + 7*8 + 30h] ; Param 2 - frame pointer (pointer to the return address of the function calling us) 138 mov rcx, [rdx] ; Param 1 - The function address 133 139 call KPRF_LEAVE 134 140 jmp common_return_path … … 144 150 test eax, eax 145 151 jz common_no_overhead 146 cmp byte [g_fCalibrated ], 0152 cmp byte [g_fCalibrated wrt rip], 0 147 153 jnz common_overhead 148 154 call calibrate 149 155 common_overhead: 150 mov ecx, eax ; ecx <- pointer to overhead counter. 151 mov eax, [g_OverheadAdj] ; apply the adjustment before reading tsc 152 sub [esp + 08h], eax 153 sbb dword [esp + 0ch], 0 156 mov rcx, rax ; rcx <- pointer to overhead counter. 157 mov eax, [g_OverheadAdj wrt rip]; apply the adjustment before reading tsc 158 sub [rsp + 20h], rax 154 159 155 160 rdtsc 156 s ub eax, [esp + 08h]157 sbb edx, [esp + 0ch]158 add [ecx], eax159 adc [ecx + 4], edx161 shl rdx, 32 162 or rdx, rax ; rdx = 64-bit timestamp 163 sub rdx, [rsp + 20h] ; rdx = elapsed 164 lock add [rcx], rdx ; update counter. 160 165 common_no_overhead: 161 add esp, 4 + 4 + 8162 166 163 167 ; restore volatile registers. 164 pop ecx 165 popfd 166 pop edx 167 pop eax 168 add rsp, 30h 169 pop r11 170 pop r10 171 pop r9 172 pop r8 173 pop rcx 174 popfq 175 pop rdx 176 pop rax 168 177 ret 169 178 170 179 ;; 171 ; Data esi points to while we're calibrating.180 ; Data rsi points to while we're calibrating. 172 181 struc CALIBDATA 173 182 .OverheadLo resd 1 … … 188 197 ; 189 198 calibrate: 190 ; prolog 191 push ebp 192 mov ebp, esp 193 pushfd 194 pushad 195 sub esp, CALIBDATA_size 196 mov esi, esp ; esi points to the CALIBDATA 199 ; prolog - save everything 200 push rbp 201 pushfq 202 push rax ; pushaq 203 push rbx 204 push rcx 205 push rdx 206 push rdi 207 push rsi 208 push r8 209 push r9 210 push r10 211 push r11 212 push r12 213 push r13 214 push r14 215 push r15 216 mov rbp, rsp 217 218 sub rsp, CALIBDATA_size 219 mov rsi, rsp ; rsi points to the CALIBDATA 220 221 and rsp, -15 197 222 198 223 ; … … 200 225 ; 201 226 mov eax, 1 202 xchg dword [g_fCalibrated ], eax227 xchg dword [g_fCalibrated wrt rip], eax 203 228 204 229 ; … … 213 238 ; 214 239 mov ecx, 200h 215 mov dword [ esi + CALIBDATA.MinLo], 0ffffffffh216 mov dword [ esi + CALIBDATA.MinHi], 07fffffffh240 mov dword [rsi + CALIBDATA.MinLo], 0ffffffffh 241 mov dword [rsi + CALIBDATA.MinHi], 07fffffffh 217 242 calib_inner_loop: 218 243 219 244 ; zero the overhead and profiled times. 220 245 xor eax, eax 221 mov [esi + CALIBDATA.OverheadLo], eax 222 mov [esi + CALIBDATA.OverheadHi], eax 223 mov [esi + CALIBDATA.ProfiledLo], eax 224 mov [esi + CALIBDATA.ProfiledHi], eax 246 mov [rsi + CALIBDATA.OverheadLo], rax 247 mov [rsi + CALIBDATA.ProfiledLo], rax 225 248 call calib_nullproc 226 249 227 250 ; subtract the overhead 228 mov eax, [esi + CALIBDATA.ProfiledLo] 229 mov edx, [esi + CALIBDATA.ProfiledHi] 230 sub eax, [esi + CALIBDATA.OverheadLo] 231 sbb edx, [esi + CALIBDATA.OverheadHi] 251 mov rax, [rsi + CALIBDATA.ProfiledLo] 252 sub rax, [rsi + CALIBDATA.OverheadLo] 232 253 233 254 ; update the minimum value. 234 test edx, 080000000h 235 jnz near calib_outer_dec ; if negative, just simplify and shortcut 236 cmp edx, [esi + CALIBDATA.MinHi] 237 jg calib_inner_next 238 jl calib_inner_update_minimum 239 cmp eax, [esi + CALIBDATA.MinLo] 255 bt rax, 63 256 jc near calib_outer_dec ; if negative, just simplify and shortcut 257 cmp rax, [rsi + CALIBDATA.MinHi] 240 258 jge calib_inner_next 241 259 calib_inner_update_minimum: 242 mov [esi + CALIBDATA.MinLo], eax 243 mov [esi + CALIBDATA.MinHi], edx 260 mov [rsi + CALIBDATA.MinLo], rax 244 261 calib_inner_next: 245 262 loop calib_inner_loop 246 263 247 264 ; Is the minimum value acceptable? 248 test dword [ esi + CALIBDATA.MinHi], 80000000h265 test dword [rsi + CALIBDATA.MinLo + 4], 80000000h 249 266 jnz calib_outer_dec ; simplify if negative. 250 cmp dword [ esi + CALIBDATA.MinHi], 0267 cmp dword [rsi + CALIBDATA.MinHi + 4], 0 251 268 jnz calib_outer_inc ; this shouldn't be possible 252 cmp dword [ esi + CALIBDATA.MinLo], 1fh269 cmp dword [rsi + CALIBDATA.MinLo], 1fh 253 270 jbe calib_outer_dec ; too low - 2 ticks per pair is the minimum! 254 cmp dword [ esi + CALIBDATA.MinLo], 30h271 cmp dword [rsi + CALIBDATA.MinLo], 30h 255 272 jbe calib_done ; this is fine! 256 273 calib_outer_inc: 257 inc dword [g_OverheadAdj ]274 inc dword [g_OverheadAdj wrt rip] 258 275 jmp calib_outer_next 259 276 calib_outer_dec: 260 cmp dword [g_OverheadAdj ], 1277 cmp dword [g_OverheadAdj wrt rip], 1 261 278 je calib_done 262 dec dword [g_OverheadAdj ]279 dec dword [g_OverheadAdj wrt rip] 263 280 calib_outer_next: 264 281 dec ebx … … 266 283 calib_done: 267 284 268 ; epilog 269 add esp, CALIBDATA_size 270 popad 271 popfd 285 ; epilog - restore it all. 272 286 leave 287 pop r15 288 pop r14 289 pop r13 290 pop r12 291 pop r11 292 pop r10 293 pop r9 294 pop r8 295 pop rsi 296 pop rdi 297 pop rdx 298 pop rcx 299 pop rbx 300 pop rax 301 popfq 273 302 ret 274 303 … … 280 309 align 16 281 310 calib_penter: 282 ; This part must be identical 283 push eax284 push edx311 ; This part must be identical past the rdtsc. 312 push rax 313 push rdx 285 314 rdtsc 286 pushfd 287 push ecx 288 289 ; store the entry 290 mov [esi + CALIBDATA.EnterTSLo], eax 291 mov [esi + CALIBDATA.EnterTSHi], edx 292 293 ; create the call frame 294 push edx 295 push eax 296 push 0 297 push 0 298 299 lea eax, [esi + CALIBDATA.OverheadLo] 315 pushfq 316 push rcx 317 push r8 318 push r9 319 push r10 320 push r11 321 sub rsp, 30h ; rsp is aligned at this point (7 pushes). 322 ; reserve 20h for spill, and 8 bytes for ts. 323 324 ; store the entry / stack frame. 325 mov r8d, edx 326 shl r8, 32 327 or r8, rax 328 mov [rsp + 20h], r8 329 330 mov [rsi + CALIBDATA.EnterTSLo], r8 331 332 lea rax, [rsi + CALIBDATA.OverheadLo] 300 333 jmp common_overhead 301 334 … … 305 338 align 16 306 339 calib_pexit: 307 ; This part must be identical 308 push eax309 push edx340 ; This part must be identical past the rdtsc. 341 push rax 342 push rdx 310 343 rdtsc 311 pushfd 312 push ecx 313 314 ; update the time 315 push eax 316 push edx 317 sub eax, [esi + CALIBDATA.EnterTSLo] 318 sbb edx, [esi + CALIBDATA.EnterTSHi] 319 add [esi + CALIBDATA.ProfiledLo], eax 320 adc [esi + CALIBDATA.ProfiledHi], edx 321 pop edx 322 pop eax 323 324 ; create the call frame 325 push edx 326 push eax 327 push 0 328 push 0 329 330 lea eax, [esi + CALIBDATA.EnterTSLo] 344 pushfq 345 push rcx 346 push r8 347 push r9 348 push r10 349 push r11 350 sub rsp, 30h ; rsp is aligned at this point (7 pushes). 351 ; reserve 20h for spill, and 8 bytes for ts. 352 353 ; store the entry / stack frame. 354 mov r8d, edx 355 shl r8, 32 356 or r8, rax 357 mov [rsp + 20h], r8 358 359 sub r8, [rsi + CALIBDATA.EnterTSLo] 360 add [rsi + CALIBDATA.ProfiledLo], r8 361 362 lea rax, [rsi + CALIBDATA.EnterTSLo] 331 363 jmp common_overhead 332 364 … … 388 420 ret 389 421 422 423 ; 424 ; Dummy stack check function. 425 ; 426 global __chkstk 427 __chkstk: 428 ret
Note:
See TracChangeset
for help on using the changeset viewer.