source: trunk/kProfile/prfx86msc.asm@ 3526

Last change on this file since 3526 was 3526, checked in by bird, 18 years ago

made it build again.

File size: 10.6 KB
Line 
1; $Id: $
2;; @file
3;
4; kProfiler MK2 - Microsoft C/C++ Compiler Interaction.
5;
6;
7; Copyright (c) 2006 knut st. osmundsen <bird-src-spam@anduin.net.de>
8;
9;
10; This file is part of kLIBC.
11;
12; kLIBC is free software; you can redistribute it and/or modify
13; it under the terms of the GNU General Public License as published by
14; the Free Software Foundation; either version 2 of the License, or
15; (at your option) any later version.
16;
17; kLIBC is distributed in the hope that it will be useful,
18; but WITHOUT ANY WARRANTY; without even the implied warranty of
19; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20; GNU General Public License for more details.
21;
22; You should have received a copy of the GNU General Public License
23; along with kLIBC; if not, write to the Free Software
24; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25;
26;
27
28[section .data]
29;
30g_fCalibrated:
31 dd 0
32g_OverheadAdj:
33 dd 0
34
35[section .text]
36
37extern KPRF_ENTER
38extern KPRF_LEAVE
39
40global __penter
41global __pexit
42
43;ifdef UNDEFINED
44global common_return_path
45global common_overhead
46global common_no_overhead
47global calibrate
48global calib_inner_update_minimum
49global calib_inner_next
50global calib_outer_dec
51global calib_outer_inc
52global calib_done
53global calib_nullproc
54;endif
55
56
57;;
58; On x86 the call to this function has been observed to be put before
59; creating the stack frame, as the very first instruction in the function.
60;
61; Thus the stack layout is as follows:
62; 24 return address of the calling function.
63; 20 our return address - the address of the calling function + 5.
64; 1c eax
65; 18 edx
66; 14 eflags
67; 10 ecx
68; c tsc high - param 3
69; 8 tsc low
70; 4 frame pointer - param 2
71; 0 function ptr - param 1
72;
73;
74align 16
75__penter:
76 ; save volatile register and get the time stamp.
77 push eax
78 push edx
79 rdtsc
80 pushfd
81 push ecx
82
83 ; setting up the enter call frame (cdecl).
84 sub esp, 4 + 4 + 8
85 mov [esp + 0ch], edx ; Param 3 - the timestamp
86 mov [esp + 08h], eax
87 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
88 mov [esp + 04h], edx
89 mov eax, [esp + 20h] ; Param 1 - The function address
90 sub eax, 5 ; call instruction
91 mov [esp], eax
92
93 call KPRF_ENTER
94 jmp common_return_path
95
96
97;;
98; On x86 the call to this function has been observed to be put right before
99; return instruction. This fact matters since since we have to calc the same
100; stack address as in _penter.
101;
102; Thus the stack layout is as follows:
103; 24 return address of the calling function.
104; 20 our return address - the address of the calling function + 5.
105; 1c eax
106; 18 edx
107; 14 eflags
108; 10 ecx
109; c tsc high - param 3
110; 8 tsc low
111; 4 frame pointer - param 2
112; 0 function ptr - param 1
113;
114;
115align 16
116__pexit:
117 ; save volatile register and get the time stamp.
118 push eax
119 push edx
120 rdtsc
121 pushfd
122 push ecx
123
124 ; setting up the leave call frame (cdecl).
125 sub esp, 4 + 4 + 8
126 mov [esp + 0ch], edx ; Param 3 - the timestamp
127 mov [esp + 08h], eax
128 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
129 mov [esp + 04h], edx
130 mov eax, [esp + 20h] ; Param 1 - Some address in the function.
131 sub eax, 5 ; call instruction
132 mov [esp], eax
133
134 call KPRF_LEAVE
135 jmp common_return_path
136
137
138;;
139; This is the common return path for both the enter and exit hooks.
140; It's kept common because we can then use the same overhead adjustment
141; and save some calibration efforts. It also saves space :-)
142align 16
143common_return_path:
144 ; Update overhead
145 test eax, eax
146 jz common_no_overhead
147 cmp byte [g_fCalibrated], 0
148 jnz common_overhead
149 call calibrate
150common_overhead:
151 mov ecx, eax ; ecx <- pointer to overhead counter.
152 mov eax, [g_OverheadAdj] ; apply the adjustment before reading tsc
153 sub [esp + 08h], eax
154 sbb dword [esp + 0ch], 0
155
156 rdtsc
157 sub eax, [esp + 08h]
158 sbb edx, [esp + 0ch]
159 add [ecx], eax
160 adc [ecx + 4], edx
161common_no_overhead:
162 add esp, 4 + 4 + 8
163
164 ; restore volatile registers.
165 pop ecx
166 popfd
167 pop edx
168 pop eax
169 ret
170
171;;
172; Data esi points to while we're calibrating.
173struc CALIBDATA
174 .OverheadLo resd 1
175 .OverheadHi resd 1
176 .ProfiledLo resd 1
177 .ProfiledHi resd 1
178 .EnterTSLo resd 1
179 .EnterTSHi resd 1
180 .MinLo resd 1
181 .MinHi resd 1
182endstruc
183
184
185
186align 16
187;;
188; Do necessary calibrations.
189;
190calibrate:
191 ; prolog
192 push ebp
193 mov ebp, esp
194 pushfd
195 pushad
196 sub esp, CALIBDATA_size
197 mov esi, esp ; esi points to the CALIBDATA
198
199 ;
200 ; Indicate that we have finished calibrating.
201 ;
202 mov eax, 1
203 xchg dword [g_fCalibrated], eax
204
205 ;
206 ; The outer loop - find the right adjustment.
207 ;
208 mov ebx, 200h ; loop counter.
209calib_outer_loop:
210
211 ;
212 ; The inner loop - calls the function number of times to establish a
213 ; good minimum value
214 ;
215 mov ecx, 200h
216 mov dword [esi + CALIBDATA.MinLo], 0ffffffffh
217 mov dword [esi + CALIBDATA.MinHi], 07fffffffh
218calib_inner_loop:
219
220 ; zero the overhead and profiled times.
221 xor eax, eax
222 mov [esi + CALIBDATA.OverheadLo], eax
223 mov [esi + CALIBDATA.OverheadHi], eax
224 mov [esi + CALIBDATA.ProfiledLo], eax
225 mov [esi + CALIBDATA.ProfiledHi], eax
226 call calib_nullproc
227
228 ; subtract the overhead
229 mov eax, [esi + CALIBDATA.ProfiledLo]
230 mov edx, [esi + CALIBDATA.ProfiledHi]
231 sub eax, [esi + CALIBDATA.OverheadLo]
232 sbb edx, [esi + CALIBDATA.OverheadHi]
233
234 ; update the minimum value.
235 test edx, 080000000h
236 jnz near calib_outer_dec ; if negative, just simplify and shortcut
237 cmp edx, [esi + CALIBDATA.MinHi]
238 jg calib_inner_next
239 jl calib_inner_update_minimum
240 cmp eax, [esi + CALIBDATA.MinLo]
241 jge calib_inner_next
242calib_inner_update_minimum:
243 mov [esi + CALIBDATA.MinLo], eax
244 mov [esi + CALIBDATA.MinHi], edx
245calib_inner_next:
246 loop calib_inner_loop
247
248 ; Is the minimum value acceptable?
249 test dword [esi + CALIBDATA.MinHi], 80000000h
250 jnz calib_outer_dec ; simplify if negative.
251 cmp dword [esi + CALIBDATA.MinHi], 0
252 jnz calib_outer_inc ; this shouldn't be possible
253 cmp dword [esi + CALIBDATA.MinLo], 1fh
254 jbe calib_outer_dec ; too low - 2 ticks per pair is the minimum!
255 cmp dword [esi + CALIBDATA.MinLo], 30h
256 jbe calib_done ; this is fine!
257calib_outer_inc:
258 inc dword [g_OverheadAdj]
259 jmp calib_outer_next
260calib_outer_dec:
261 cmp dword [g_OverheadAdj], 1
262 je calib_done
263 dec dword [g_OverheadAdj]
264calib_outer_next:
265 dec ebx
266 jnz calib_outer_loop
267calib_done:
268
269 ; epilog
270 add esp, CALIBDATA_size
271 popad
272 popfd
273 leave
274 ret
275
276
277
278
279;;
280; The calibration __penter - this must be identical to the real thing except for the KPRF call.
281align 16
282calib_penter:
283 ; This part must be identical
284 push eax
285 push edx
286 rdtsc
287 pushfd
288 push ecx
289
290 ; store the entry
291 mov [esi + CALIBDATA.EnterTSLo], eax
292 mov [esi + CALIBDATA.EnterTSHi], edx
293
294 ; create the call frame
295 push edx
296 push eax
297 push 0
298 push 0
299
300 lea eax, [esi + CALIBDATA.OverheadLo]
301 jmp common_overhead
302
303
304;;
305; The calibration __pexit - this must be identical to the real thing except for the KPRF call.
306align 16
307calib_pexit:
308 ; This part must be identical
309 push eax
310 push edx
311 rdtsc
312 pushfd
313 push ecx
314
315 ; update the time
316 push eax
317 push edx
318 sub eax, [esi + CALIBDATA.EnterTSLo]
319 sbb edx, [esi + CALIBDATA.EnterTSHi]
320 add [esi + CALIBDATA.ProfiledLo], eax
321 adc [esi + CALIBDATA.ProfiledHi], edx
322 pop edx
323 pop eax
324
325 ; create the call frame
326 push edx
327 push eax
328 push 0
329 push 0
330
331 lea eax, [esi + CALIBDATA.EnterTSLo]
332 jmp common_overhead
333
334
335;;
336; The 'function' we're profiling.
337; The general idea is that each pair should take something like 2-10 ticks.
338;
339; (Btw. If we don't use multiple pairs here, we end up with the wrong result.)
340align 16
341calib_nullproc:
342 call calib_penter ;0
343 call calib_pexit
344
345 call calib_penter ;1
346 call calib_pexit
347
348 call calib_penter ;2
349 call calib_pexit
350
351 call calib_penter ;3
352 call calib_pexit
353
354 call calib_penter ;4
355 call calib_pexit
356
357 call calib_penter ;5
358 call calib_pexit
359
360 call calib_penter ;6
361 call calib_pexit
362
363 call calib_penter ;7
364 call calib_pexit
365
366 call calib_penter ;8
367 call calib_pexit
368
369 call calib_penter ;9
370 call calib_pexit
371
372 call calib_penter ;a
373 call calib_pexit
374
375 call calib_penter ;b
376 call calib_pexit
377
378 call calib_penter ;c
379 call calib_pexit
380
381 call calib_penter ;d
382 call calib_pexit
383
384 call calib_penter ;e
385 call calib_pexit
386
387 call calib_penter ;f
388 call calib_pexit
389 ret
390
Note: See TracBrowser for help on using the repository browser.