| 1 | ; $Id: prfx86msc.asm 29 2009-07-01 20:30:29Z bird $
 | 
|---|
| 2 | ;; @file
 | 
|---|
| 3 | ; kProfiler Mark 2 - Microsoft C/C++ Compiler Interaction, x86.
 | 
|---|
| 4 | ;
 | 
|---|
| 5 | 
 | 
|---|
| 6 | ;
 | 
|---|
| 7 | ; Copyright (c) 2006-2007 Knut St. Osmundsen <bird-kStuff-spamix@anduin.net>
 | 
|---|
| 8 | ;
 | 
|---|
| 9 | ; Permission is hereby granted, free of charge, to any person
 | 
|---|
| 10 | ; obtaining a copy of this software and associated documentation
 | 
|---|
| 11 | ; files (the "Software"), to deal in the Software without
 | 
|---|
| 12 | ; restriction, including without limitation the rights to use,
 | 
|---|
| 13 | ; copy, modify, merge, publish, distribute, sublicense, and/or sell
 | 
|---|
| 14 | ; copies of the Software, and to permit persons to whom the
 | 
|---|
| 15 | ; Software is furnished to do so, subject to the following
 | 
|---|
| 16 | ; conditions:
 | 
|---|
| 17 | ;
 | 
|---|
| 18 | ; The above copyright notice and this permission notice shall be
 | 
|---|
| 19 | ; included in all copies or substantial portions of the Software.
 | 
|---|
| 20 | ;
 | 
|---|
| 21 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 | 
|---|
| 22 | ; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 | 
|---|
| 23 | ; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 | 
|---|
| 24 | ; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 | 
|---|
| 25 | ; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 | 
|---|
| 26 | ; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 | 
|---|
| 27 | ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 | 
|---|
| 28 | ; OTHER DEALINGS IN THE SOFTWARE.
 | 
|---|
| 29 | ;
 | 
|---|
| 30 | 
 | 
|---|
| 31 | [section .data]
 | 
|---|
| 32 | ;
 | 
|---|
| 33 | g_fCalibrated:
 | 
|---|
| 34 |         dd 0
 | 
|---|
| 35 | g_OverheadAdj:
 | 
|---|
| 36 |         dd 0
 | 
|---|
| 37 | 
 | 
|---|
| 38 | [section .text]
 | 
|---|
| 39 | 
 | 
|---|
| 40 | extern KPRF_ENTER
 | 
|---|
| 41 | extern KPRF_LEAVE
 | 
|---|
| 42 | 
 | 
|---|
| 43 | global __penter
 | 
|---|
| 44 | global __pexit
 | 
|---|
| 45 | 
 | 
|---|
| 46 | ;ifdef  UNDEFINED
 | 
|---|
| 47 | global common_return_path
 | 
|---|
| 48 | global common_overhead
 | 
|---|
| 49 | global common_no_overhead
 | 
|---|
| 50 | global calibrate
 | 
|---|
| 51 | global calib_inner_update_minimum
 | 
|---|
| 52 | global calib_inner_next
 | 
|---|
| 53 | global calib_outer_dec
 | 
|---|
| 54 | global calib_outer_inc
 | 
|---|
| 55 | global calib_done
 | 
|---|
| 56 | global calib_nullproc
 | 
|---|
| 57 | ;endif
 | 
|---|
| 58 | 
 | 
|---|
| 59 | 
 | 
|---|
| 60 | ;;
 | 
|---|
| 61 | ; On x86 the call to this function has been observed to be put before
 | 
|---|
| 62 | ; creating the stack frame, as the very first instruction in the function.
 | 
|---|
| 63 | ;
 | 
|---|
| 64 | ; Thus the stack layout is as follows:
 | 
|---|
| 65 | ;       24      return address of the calling function.
 | 
|---|
| 66 | ;       20      our return address - the address of the calling function + 5.
 | 
|---|
| 67 | ;       1c      eax
 | 
|---|
| 68 | ;       18      edx
 | 
|---|
| 69 | ;       14      eflags
 | 
|---|
| 70 | ;       10      ecx
 | 
|---|
| 71 | ;       c       tsc high       - param 3
 | 
|---|
| 72 | ;       8       tsc low
 | 
|---|
| 73 | ;       4       frame pointer  - param 2
 | 
|---|
| 74 | ;       0       function ptr   - param 1
 | 
|---|
| 75 | ;
 | 
|---|
| 76 | ;
 | 
|---|
| 77 | align 16
 | 
|---|
| 78 | __penter:
 | 
|---|
| 79 |         ; save volatile register and get the time stamp.
 | 
|---|
| 80 |         push    eax
 | 
|---|
| 81 |         push    edx
 | 
|---|
| 82 |         rdtsc
 | 
|---|
| 83 |         pushfd
 | 
|---|
| 84 |         push    ecx
 | 
|---|
| 85 | 
 | 
|---|
| 86 |         ; setting up the enter call frame (cdecl).
 | 
|---|
| 87 |         sub     esp, 4 + 4 + 8
 | 
|---|
| 88 |         mov     [esp + 0ch], edx        ; Param 3 - the timestamp
 | 
|---|
| 89 |         mov     [esp + 08h], eax
 | 
|---|
| 90 |         lea     edx, [esp + 24h]        ; Param 2 - frame pointer (pointer to the return address of the function calling us)
 | 
|---|
| 91 |         mov     [esp + 04h], edx
 | 
|---|
| 92 |         mov     eax, [esp + 20h]        ; Param 1 - The function address
 | 
|---|
| 93 |         sub     eax, 5                  ; call instruction
 | 
|---|
| 94 |         mov     [esp], eax
 | 
|---|
| 95 | 
 | 
|---|
| 96 |         call    KPRF_ENTER
 | 
|---|
| 97 |         jmp     common_return_path
 | 
|---|
| 98 | 
 | 
|---|
| 99 | 
 | 
|---|
| 100 | ;;
 | 
|---|
| 101 | ; On x86 the call to this function has been observed to be put right before
 | 
|---|
| 102 | ; return instruction. This fact matters since since we have to calc the same
 | 
|---|
| 103 | ; stack address as in _penter.
 | 
|---|
| 104 | ;
 | 
|---|
| 105 | ; Thus the stack layout is as follows:
 | 
|---|
| 106 | ;       24      return address of the calling function.
 | 
|---|
| 107 | ;       20      our return address - the address of the calling function + 5.
 | 
|---|
| 108 | ;       1c      eax
 | 
|---|
| 109 | ;       18      edx
 | 
|---|
| 110 | ;       14      eflags
 | 
|---|
| 111 | ;       10      ecx
 | 
|---|
| 112 | ;       c       tsc high       - param 3
 | 
|---|
| 113 | ;       8       tsc low
 | 
|---|
| 114 | ;       4       frame pointer  - param 2
 | 
|---|
| 115 | ;       0       function ptr   - param 1
 | 
|---|
| 116 | ;
 | 
|---|
| 117 | ;
 | 
|---|
| 118 | align 16
 | 
|---|
| 119 | __pexit:
 | 
|---|
| 120 |         ; save volatile register and get the time stamp.
 | 
|---|
| 121 |         push    eax
 | 
|---|
| 122 |         push    edx
 | 
|---|
| 123 |         rdtsc
 | 
|---|
| 124 |         pushfd
 | 
|---|
| 125 |         push    ecx
 | 
|---|
| 126 | 
 | 
|---|
| 127 |         ; setting up the leave call frame (cdecl).
 | 
|---|
| 128 |         sub     esp, 4 + 4 + 8
 | 
|---|
| 129 |         mov     [esp + 0ch], edx        ; Param 3 - the timestamp
 | 
|---|
| 130 |         mov     [esp + 08h], eax
 | 
|---|
| 131 |         lea     edx, [esp + 24h]        ; Param 2 - frame pointer (pointer to the return address of the function calling us)
 | 
|---|
| 132 |         mov     [esp + 04h], edx
 | 
|---|
| 133 |         mov     eax, [esp + 20h]        ; Param 1 - Some address in the function.
 | 
|---|
| 134 |         sub     eax, 5                  ; call instruction
 | 
|---|
| 135 |         mov     [esp], eax
 | 
|---|
| 136 | 
 | 
|---|
| 137 |         call    KPRF_LEAVE
 | 
|---|
| 138 |         jmp common_return_path
 | 
|---|
| 139 | 
 | 
|---|
| 140 | 
 | 
|---|
| 141 | ;;
 | 
|---|
| 142 | ; This is the common return path for both the enter and exit hooks.
 | 
|---|
| 143 | ; It's kept common because we can then use the same overhead adjustment
 | 
|---|
| 144 | ; and save some calibration efforts. It also saves space :-)
 | 
|---|
| 145 | align 16
 | 
|---|
| 146 | common_return_path:
 | 
|---|
| 147 |         ; Update overhead
 | 
|---|
| 148 |         test    eax, eax
 | 
|---|
| 149 |         jz      common_no_overhead
 | 
|---|
| 150 |         cmp     byte [g_fCalibrated], 0
 | 
|---|
| 151 |         jnz     common_overhead
 | 
|---|
| 152 |         call    calibrate
 | 
|---|
| 153 | common_overhead:
 | 
|---|
| 154 |         mov     ecx, eax                ; ecx <- pointer to overhead counter.
 | 
|---|
| 155 |         mov     eax, [g_OverheadAdj]    ; apply the adjustment before reading tsc
 | 
|---|
| 156 |         sub     [esp + 08h], eax
 | 
|---|
| 157 |         sbb     dword [esp + 0ch], 0
 | 
|---|
| 158 | 
 | 
|---|
| 159 |         rdtsc
 | 
|---|
| 160 |         sub     eax, [esp + 08h]
 | 
|---|
| 161 |         sbb     edx, [esp + 0ch]
 | 
|---|
| 162 |         add     [ecx], eax
 | 
|---|
| 163 |         adc     [ecx + 4], edx
 | 
|---|
| 164 | common_no_overhead:
 | 
|---|
| 165 |         add     esp, 4 + 4 + 8
 | 
|---|
| 166 | 
 | 
|---|
| 167 |         ; restore volatile registers.
 | 
|---|
| 168 |         pop     ecx
 | 
|---|
| 169 |         popfd
 | 
|---|
| 170 |         pop     edx
 | 
|---|
| 171 |         pop     eax
 | 
|---|
| 172 |         ret
 | 
|---|
| 173 | 
 | 
|---|
| 174 | ;;
 | 
|---|
| 175 | ; Data esi points to while we're calibrating.
 | 
|---|
| 176 | struc CALIBDATA
 | 
|---|
| 177 |     .OverheadLo resd 1
 | 
|---|
| 178 |     .OverheadHi resd 1
 | 
|---|
| 179 |     .ProfiledLo resd 1
 | 
|---|
| 180 |     .ProfiledHi resd 1
 | 
|---|
| 181 |     .EnterTSLo  resd 1
 | 
|---|
| 182 |     .EnterTSHi  resd 1
 | 
|---|
| 183 |     .MinLo      resd 1
 | 
|---|
| 184 |     .MinHi      resd 1
 | 
|---|
| 185 | endstruc
 | 
|---|
| 186 | 
 | 
|---|
| 187 | 
 | 
|---|
| 188 | 
 | 
|---|
| 189 | align 16
 | 
|---|
| 190 | ;;
 | 
|---|
| 191 | ; Do necessary calibrations.
 | 
|---|
| 192 | ;
 | 
|---|
| 193 | calibrate:
 | 
|---|
| 194 |         ; prolog
 | 
|---|
| 195 |         push    ebp
 | 
|---|
| 196 |         mov     ebp, esp
 | 
|---|
| 197 |         pushfd
 | 
|---|
| 198 |         pushad
 | 
|---|
| 199 |         sub     esp, CALIBDATA_size
 | 
|---|
| 200 |         mov     esi, esp                ; esi points to the CALIBDATA
 | 
|---|
| 201 | 
 | 
|---|
| 202 |         ;
 | 
|---|
| 203 |         ; Indicate that we have finished calibrating.
 | 
|---|
| 204 |         ;
 | 
|---|
| 205 |         mov     eax, 1
 | 
|---|
| 206 |         xchg    dword [g_fCalibrated], eax
 | 
|---|
| 207 | 
 | 
|---|
| 208 |         ;
 | 
|---|
| 209 |         ; The outer loop - find the right adjustment.
 | 
|---|
| 210 |         ;
 | 
|---|
| 211 |         mov     ebx, 200h               ; loop counter.
 | 
|---|
| 212 | calib_outer_loop:
 | 
|---|
| 213 | 
 | 
|---|
| 214 |         ;
 | 
|---|
| 215 |         ; The inner loop - calls the function number of times to establish a
 | 
|---|
| 216 |         ;                  good minimum value
 | 
|---|
| 217 |         ;
 | 
|---|
| 218 |         mov     ecx, 200h
 | 
|---|
| 219 |         mov     dword [esi + CALIBDATA.MinLo], 0ffffffffh
 | 
|---|
| 220 |         mov     dword [esi + CALIBDATA.MinHi], 07fffffffh
 | 
|---|
| 221 | calib_inner_loop:
 | 
|---|
| 222 | 
 | 
|---|
| 223 |         ; zero the overhead and profiled times.
 | 
|---|
| 224 |         xor     eax, eax
 | 
|---|
| 225 |         mov     [esi + CALIBDATA.OverheadLo], eax
 | 
|---|
| 226 |         mov     [esi + CALIBDATA.OverheadHi], eax
 | 
|---|
| 227 |         mov     [esi + CALIBDATA.ProfiledLo], eax
 | 
|---|
| 228 |         mov     [esi + CALIBDATA.ProfiledHi], eax
 | 
|---|
| 229 |         call    calib_nullproc
 | 
|---|
| 230 | 
 | 
|---|
| 231 |         ; subtract the overhead
 | 
|---|
| 232 |         mov     eax, [esi + CALIBDATA.ProfiledLo]
 | 
|---|
| 233 |         mov     edx, [esi + CALIBDATA.ProfiledHi]
 | 
|---|
| 234 |         sub     eax, [esi + CALIBDATA.OverheadLo]
 | 
|---|
| 235 |         sbb     edx, [esi + CALIBDATA.OverheadHi]
 | 
|---|
| 236 | 
 | 
|---|
| 237 |         ; update the minimum value.
 | 
|---|
| 238 |         test    edx, 080000000h
 | 
|---|
| 239 |         jnz near calib_outer_dec        ; if negative, just simplify and shortcut
 | 
|---|
| 240 |         cmp     edx, [esi + CALIBDATA.MinHi]
 | 
|---|
| 241 |         jg      calib_inner_next
 | 
|---|
| 242 |         jl      calib_inner_update_minimum
 | 
|---|
| 243 |         cmp     eax, [esi + CALIBDATA.MinLo]
 | 
|---|
| 244 |         jge     calib_inner_next
 | 
|---|
| 245 | calib_inner_update_minimum:
 | 
|---|
| 246 |         mov     [esi + CALIBDATA.MinLo], eax
 | 
|---|
| 247 |         mov     [esi + CALIBDATA.MinHi], edx
 | 
|---|
| 248 | calib_inner_next:
 | 
|---|
| 249 |         loop    calib_inner_loop
 | 
|---|
| 250 | 
 | 
|---|
| 251 |         ; Is the minimum value acceptable?
 | 
|---|
| 252 |         test    dword [esi + CALIBDATA.MinHi], 80000000h
 | 
|---|
| 253 |         jnz     calib_outer_dec         ; simplify if negative.
 | 
|---|
| 254 |         cmp     dword [esi + CALIBDATA.MinHi], 0
 | 
|---|
| 255 |         jnz     calib_outer_inc         ; this shouldn't be possible
 | 
|---|
| 256 |         cmp     dword [esi + CALIBDATA.MinLo], 1fh
 | 
|---|
| 257 |         jbe     calib_outer_dec         ; too low - 2 ticks per pair is the minimum!
 | 
|---|
| 258 |         cmp     dword [esi + CALIBDATA.MinLo], 30h
 | 
|---|
| 259 |         jbe     calib_done              ; this is fine!
 | 
|---|
| 260 | calib_outer_inc:
 | 
|---|
| 261 |         inc     dword [g_OverheadAdj]
 | 
|---|
| 262 |         jmp     calib_outer_next
 | 
|---|
| 263 | calib_outer_dec:
 | 
|---|
| 264 |         cmp     dword [g_OverheadAdj], 1
 | 
|---|
| 265 |         je      calib_done
 | 
|---|
| 266 |         dec     dword [g_OverheadAdj]
 | 
|---|
| 267 | calib_outer_next:
 | 
|---|
| 268 |         dec     ebx
 | 
|---|
| 269 |         jnz     calib_outer_loop
 | 
|---|
| 270 | calib_done:
 | 
|---|
| 271 | 
 | 
|---|
| 272 |         ; epilog
 | 
|---|
| 273 |         add     esp, CALIBDATA_size
 | 
|---|
| 274 |         popad
 | 
|---|
| 275 |         popfd
 | 
|---|
| 276 |         leave
 | 
|---|
| 277 |         ret
 | 
|---|
| 278 | 
 | 
|---|
| 279 | 
 | 
|---|
| 280 | 
 | 
|---|
| 281 | 
 | 
|---|
| 282 | ;;
 | 
|---|
| 283 | ; The calibration __penter - this must be identical to the real thing except for the KPRF call.
 | 
|---|
| 284 | align 16
 | 
|---|
| 285 | calib_penter:
 | 
|---|
| 286 |         ; This part must be identical
 | 
|---|
| 287 |         push    eax
 | 
|---|
| 288 |         push    edx
 | 
|---|
| 289 |         rdtsc
 | 
|---|
| 290 |         pushfd
 | 
|---|
| 291 |         push    ecx
 | 
|---|
| 292 | 
 | 
|---|
| 293 |         ; store the entry
 | 
|---|
| 294 |         mov     [esi + CALIBDATA.EnterTSLo], eax
 | 
|---|
| 295 |         mov     [esi + CALIBDATA.EnterTSHi], edx
 | 
|---|
| 296 | 
 | 
|---|
| 297 |         ; create the call frame
 | 
|---|
| 298 |         push    edx
 | 
|---|
| 299 |         push    eax
 | 
|---|
| 300 |         push    0
 | 
|---|
| 301 |         push    0
 | 
|---|
| 302 | 
 | 
|---|
| 303 |         lea     eax, [esi + CALIBDATA.OverheadLo]
 | 
|---|
| 304 |         jmp     common_overhead
 | 
|---|
| 305 | 
 | 
|---|
| 306 | 
 | 
|---|
| 307 | ;;
 | 
|---|
| 308 | ; The calibration __pexit - this must be identical to the real thing except for the KPRF call.
 | 
|---|
| 309 | align 16
 | 
|---|
| 310 | calib_pexit:
 | 
|---|
| 311 |         ; This part must be identical
 | 
|---|
| 312 |         push    eax
 | 
|---|
| 313 |         push    edx
 | 
|---|
| 314 |         rdtsc
 | 
|---|
| 315 |         pushfd
 | 
|---|
| 316 |         push    ecx
 | 
|---|
| 317 | 
 | 
|---|
| 318 |         ; update the time
 | 
|---|
| 319 |         push    eax
 | 
|---|
| 320 |         push    edx
 | 
|---|
| 321 |         sub     eax, [esi + CALIBDATA.EnterTSLo]
 | 
|---|
| 322 |         sbb     edx, [esi + CALIBDATA.EnterTSHi]
 | 
|---|
| 323 |         add     [esi + CALIBDATA.ProfiledLo], eax
 | 
|---|
| 324 |         adc     [esi + CALIBDATA.ProfiledHi], edx
 | 
|---|
| 325 |         pop     edx
 | 
|---|
| 326 |         pop     eax
 | 
|---|
| 327 | 
 | 
|---|
| 328 |         ; create the call frame
 | 
|---|
| 329 |         push    edx
 | 
|---|
| 330 |         push    eax
 | 
|---|
| 331 |         push    0
 | 
|---|
| 332 |         push    0
 | 
|---|
| 333 | 
 | 
|---|
| 334 |         lea     eax, [esi + CALIBDATA.EnterTSLo]
 | 
|---|
| 335 |         jmp     common_overhead
 | 
|---|
| 336 | 
 | 
|---|
| 337 | 
 | 
|---|
| 338 | ;;
 | 
|---|
| 339 | ; The 'function' we're profiling.
 | 
|---|
| 340 | ; The general idea is that each pair should take something like 2-10 ticks.
 | 
|---|
| 341 | ;
 | 
|---|
| 342 | ; (Btw. If we don't use multiple pairs here, we end up with the wrong result.)
 | 
|---|
| 343 | align 16
 | 
|---|
| 344 | calib_nullproc:
 | 
|---|
| 345 |         call    calib_penter ;0
 | 
|---|
| 346 |         call    calib_pexit
 | 
|---|
| 347 | 
 | 
|---|
| 348 |         call    calib_penter ;1
 | 
|---|
| 349 |         call    calib_pexit
 | 
|---|
| 350 | 
 | 
|---|
| 351 |         call    calib_penter ;2
 | 
|---|
| 352 |         call    calib_pexit
 | 
|---|
| 353 | 
 | 
|---|
| 354 |         call    calib_penter ;3
 | 
|---|
| 355 |         call    calib_pexit
 | 
|---|
| 356 | 
 | 
|---|
| 357 |         call    calib_penter ;4
 | 
|---|
| 358 |         call    calib_pexit
 | 
|---|
| 359 | 
 | 
|---|
| 360 |         call    calib_penter ;5
 | 
|---|
| 361 |         call    calib_pexit
 | 
|---|
| 362 | 
 | 
|---|
| 363 |         call    calib_penter ;6
 | 
|---|
| 364 |         call    calib_pexit
 | 
|---|
| 365 | 
 | 
|---|
| 366 |         call    calib_penter ;7
 | 
|---|
| 367 |         call    calib_pexit
 | 
|---|
| 368 | 
 | 
|---|
| 369 |         call    calib_penter ;8
 | 
|---|
| 370 |         call    calib_pexit
 | 
|---|
| 371 | 
 | 
|---|
| 372 |         call    calib_penter ;9
 | 
|---|
| 373 |         call    calib_pexit
 | 
|---|
| 374 | 
 | 
|---|
| 375 |         call    calib_penter ;a
 | 
|---|
| 376 |         call    calib_pexit
 | 
|---|
| 377 | 
 | 
|---|
| 378 |         call    calib_penter ;b
 | 
|---|
| 379 |         call    calib_pexit
 | 
|---|
| 380 | 
 | 
|---|
| 381 |         call    calib_penter ;c
 | 
|---|
| 382 |         call    calib_pexit
 | 
|---|
| 383 | 
 | 
|---|
| 384 |         call    calib_penter ;d
 | 
|---|
| 385 |         call    calib_pexit
 | 
|---|
| 386 | 
 | 
|---|
| 387 |         call    calib_penter ;e
 | 
|---|
| 388 |         call    calib_pexit
 | 
|---|
| 389 | 
 | 
|---|
| 390 |         call    calib_penter ;f
 | 
|---|
| 391 |         call    calib_pexit
 | 
|---|
| 392 |         ret
 | 
|---|
| 393 | 
 | 
|---|