source: trunk/kStuff/kProfiler2/prfx86msc.asm@ 3874

Last change on this file since 3874 was 3609, checked in by bird, 18 years ago

keywords

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 10.3 KB
Line 
1; $Id: prfx86msc.asm 3609 2007-10-29 01:11:39Z bird $
2;; @file
3; kProfiler Mark 2 - Microsoft C/C++ Compiler Interaction, x86.
4;
5
6;
7; Copyright (c) 2006-2007 knut st. osmundsen <bird-src-spam@anduin.net>
8;
9; This file is part of kProfiler.
10;
11; kProfiler is free software; you can redistribute it and/or
12; modify it under the terms of the GNU Lesser General Public
13; License as published by the Free Software Foundation; either
14; version 2.1 of the License, or (at your option) any later version.
15;
16; kProfiler is distributed in the hope that it will be useful,
17; but WITHOUT ANY WARRANTY; without even the implied warranty of
18; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19; Lesser General Public License for more details.
20;
21; You should have received a copy of the GNU Lesser General Public
22; License along with kProfiler; if not, write to the Free Software
23; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24;
25
26
27[section .data]
28;
29g_fCalibrated:
30 dd 0
31g_OverheadAdj:
32 dd 0
33
34[section .text]
35
36extern KPRF_ENTER
37extern KPRF_LEAVE
38
39global __penter
40global __pexit
41
42;ifdef UNDEFINED
43global common_return_path
44global common_overhead
45global common_no_overhead
46global calibrate
47global calib_inner_update_minimum
48global calib_inner_next
49global calib_outer_dec
50global calib_outer_inc
51global calib_done
52global calib_nullproc
53;endif
54
55
56;;
57; On x86 the call to this function has been observed to be put before
58; creating the stack frame, as the very first instruction in the function.
59;
60; Thus the stack layout is as follows:
61; 24 return address of the calling function.
62; 20 our return address - the address of the calling function + 5.
63; 1c eax
64; 18 edx
65; 14 eflags
66; 10 ecx
67; c tsc high - param 3
68; 8 tsc low
69; 4 frame pointer - param 2
70; 0 function ptr - param 1
71;
72;
73align 16
74__penter:
75 ; save volatile register and get the time stamp.
76 push eax
77 push edx
78 rdtsc
79 pushfd
80 push ecx
81
82 ; setting up the enter call frame (cdecl).
83 sub esp, 4 + 4 + 8
84 mov [esp + 0ch], edx ; Param 3 - the timestamp
85 mov [esp + 08h], eax
86 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
87 mov [esp + 04h], edx
88 mov eax, [esp + 20h] ; Param 1 - The function address
89 sub eax, 5 ; call instruction
90 mov [esp], eax
91
92 call KPRF_ENTER
93 jmp common_return_path
94
95
96;;
97; On x86 the call to this function has been observed to be put right before
98; return instruction. This fact matters since since we have to calc the same
99; stack address as in _penter.
100;
101; Thus the stack layout is as follows:
102; 24 return address of the calling function.
103; 20 our return address - the address of the calling function + 5.
104; 1c eax
105; 18 edx
106; 14 eflags
107; 10 ecx
108; c tsc high - param 3
109; 8 tsc low
110; 4 frame pointer - param 2
111; 0 function ptr - param 1
112;
113;
114align 16
115__pexit:
116 ; save volatile register and get the time stamp.
117 push eax
118 push edx
119 rdtsc
120 pushfd
121 push ecx
122
123 ; setting up the leave call frame (cdecl).
124 sub esp, 4 + 4 + 8
125 mov [esp + 0ch], edx ; Param 3 - the timestamp
126 mov [esp + 08h], eax
127 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
128 mov [esp + 04h], edx
129 mov eax, [esp + 20h] ; Param 1 - Some address in the function.
130 sub eax, 5 ; call instruction
131 mov [esp], eax
132
133 call KPRF_LEAVE
134 jmp common_return_path
135
136
137;;
138; This is the common return path for both the enter and exit hooks.
139; It's kept common because we can then use the same overhead adjustment
140; and save some calibration efforts. It also saves space :-)
141align 16
142common_return_path:
143 ; Update overhead
144 test eax, eax
145 jz common_no_overhead
146 cmp byte [g_fCalibrated], 0
147 jnz common_overhead
148 call calibrate
149common_overhead:
150 mov ecx, eax ; ecx <- pointer to overhead counter.
151 mov eax, [g_OverheadAdj] ; apply the adjustment before reading tsc
152 sub [esp + 08h], eax
153 sbb dword [esp + 0ch], 0
154
155 rdtsc
156 sub eax, [esp + 08h]
157 sbb edx, [esp + 0ch]
158 add [ecx], eax
159 adc [ecx + 4], edx
160common_no_overhead:
161 add esp, 4 + 4 + 8
162
163 ; restore volatile registers.
164 pop ecx
165 popfd
166 pop edx
167 pop eax
168 ret
169
170;;
171; Data esi points to while we're calibrating.
172struc CALIBDATA
173 .OverheadLo resd 1
174 .OverheadHi resd 1
175 .ProfiledLo resd 1
176 .ProfiledHi resd 1
177 .EnterTSLo resd 1
178 .EnterTSHi resd 1
179 .MinLo resd 1
180 .MinHi resd 1
181endstruc
182
183
184
185align 16
186;;
187; Do necessary calibrations.
188;
189calibrate:
190 ; prolog
191 push ebp
192 mov ebp, esp
193 pushfd
194 pushad
195 sub esp, CALIBDATA_size
196 mov esi, esp ; esi points to the CALIBDATA
197
198 ;
199 ; Indicate that we have finished calibrating.
200 ;
201 mov eax, 1
202 xchg dword [g_fCalibrated], eax
203
204 ;
205 ; The outer loop - find the right adjustment.
206 ;
207 mov ebx, 200h ; loop counter.
208calib_outer_loop:
209
210 ;
211 ; The inner loop - calls the function number of times to establish a
212 ; good minimum value
213 ;
214 mov ecx, 200h
215 mov dword [esi + CALIBDATA.MinLo], 0ffffffffh
216 mov dword [esi + CALIBDATA.MinHi], 07fffffffh
217calib_inner_loop:
218
219 ; zero the overhead and profiled times.
220 xor eax, eax
221 mov [esi + CALIBDATA.OverheadLo], eax
222 mov [esi + CALIBDATA.OverheadHi], eax
223 mov [esi + CALIBDATA.ProfiledLo], eax
224 mov [esi + CALIBDATA.ProfiledHi], eax
225 call calib_nullproc
226
227 ; subtract the overhead
228 mov eax, [esi + CALIBDATA.ProfiledLo]
229 mov edx, [esi + CALIBDATA.ProfiledHi]
230 sub eax, [esi + CALIBDATA.OverheadLo]
231 sbb edx, [esi + CALIBDATA.OverheadHi]
232
233 ; update the minimum value.
234 test edx, 080000000h
235 jnz near calib_outer_dec ; if negative, just simplify and shortcut
236 cmp edx, [esi + CALIBDATA.MinHi]
237 jg calib_inner_next
238 jl calib_inner_update_minimum
239 cmp eax, [esi + CALIBDATA.MinLo]
240 jge calib_inner_next
241calib_inner_update_minimum:
242 mov [esi + CALIBDATA.MinLo], eax
243 mov [esi + CALIBDATA.MinHi], edx
244calib_inner_next:
245 loop calib_inner_loop
246
247 ; Is the minimum value acceptable?
248 test dword [esi + CALIBDATA.MinHi], 80000000h
249 jnz calib_outer_dec ; simplify if negative.
250 cmp dword [esi + CALIBDATA.MinHi], 0
251 jnz calib_outer_inc ; this shouldn't be possible
252 cmp dword [esi + CALIBDATA.MinLo], 1fh
253 jbe calib_outer_dec ; too low - 2 ticks per pair is the minimum!
254 cmp dword [esi + CALIBDATA.MinLo], 30h
255 jbe calib_done ; this is fine!
256calib_outer_inc:
257 inc dword [g_OverheadAdj]
258 jmp calib_outer_next
259calib_outer_dec:
260 cmp dword [g_OverheadAdj], 1
261 je calib_done
262 dec dword [g_OverheadAdj]
263calib_outer_next:
264 dec ebx
265 jnz calib_outer_loop
266calib_done:
267
268 ; epilog
269 add esp, CALIBDATA_size
270 popad
271 popfd
272 leave
273 ret
274
275
276
277
278;;
279; The calibration __penter - this must be identical to the real thing except for the KPRF call.
280align 16
281calib_penter:
282 ; This part must be identical
283 push eax
284 push edx
285 rdtsc
286 pushfd
287 push ecx
288
289 ; store the entry
290 mov [esi + CALIBDATA.EnterTSLo], eax
291 mov [esi + CALIBDATA.EnterTSHi], edx
292
293 ; create the call frame
294 push edx
295 push eax
296 push 0
297 push 0
298
299 lea eax, [esi + CALIBDATA.OverheadLo]
300 jmp common_overhead
301
302
303;;
304; The calibration __pexit - this must be identical to the real thing except for the KPRF call.
305align 16
306calib_pexit:
307 ; This part must be identical
308 push eax
309 push edx
310 rdtsc
311 pushfd
312 push ecx
313
314 ; update the time
315 push eax
316 push edx
317 sub eax, [esi + CALIBDATA.EnterTSLo]
318 sbb edx, [esi + CALIBDATA.EnterTSHi]
319 add [esi + CALIBDATA.ProfiledLo], eax
320 adc [esi + CALIBDATA.ProfiledHi], edx
321 pop edx
322 pop eax
323
324 ; create the call frame
325 push edx
326 push eax
327 push 0
328 push 0
329
330 lea eax, [esi + CALIBDATA.EnterTSLo]
331 jmp common_overhead
332
333
334;;
335; The 'function' we're profiling.
336; The general idea is that each pair should take something like 2-10 ticks.
337;
338; (Btw. If we don't use multiple pairs here, we end up with the wrong result.)
339align 16
340calib_nullproc:
341 call calib_penter ;0
342 call calib_pexit
343
344 call calib_penter ;1
345 call calib_pexit
346
347 call calib_penter ;2
348 call calib_pexit
349
350 call calib_penter ;3
351 call calib_pexit
352
353 call calib_penter ;4
354 call calib_pexit
355
356 call calib_penter ;5
357 call calib_pexit
358
359 call calib_penter ;6
360 call calib_pexit
361
362 call calib_penter ;7
363 call calib_pexit
364
365 call calib_penter ;8
366 call calib_pexit
367
368 call calib_penter ;9
369 call calib_pexit
370
371 call calib_penter ;a
372 call calib_pexit
373
374 call calib_penter ;b
375 call calib_pexit
376
377 call calib_penter ;c
378 call calib_pexit
379
380 call calib_penter ;d
381 call calib_pexit
382
383 call calib_penter ;e
384 call calib_pexit
385
386 call calib_penter ;f
387 call calib_pexit
388 ret
389
Note: See TracBrowser for help on using the repository browser.