source: trunk/kProfile/prfx86msc.asm@ 3525

Last change on this file since 3525 was 3524, checked in by bird, 18 years ago

kProfile Mark II. Some early/old code.

File size: 10.7 KB
Line 
1; $Id: $
2;; @file
3;
4; kProfiler MK2 - Microsoft C/C++ Compiler Interaction.
5;
6;
7; Copyright (c) 2006 knut st. osmundsen <bird-src-spam@anduin.net.de>
8;
9;
10; This file is part of kLIBC.
11;
12; kLIBC is free software; you can redistribute it and/or modify
13; it under the terms of the GNU General Public License as published by
14; the Free Software Foundation; either version 2 of the License, or
15; (at your option) any later version.
16;
17; kLIBC is distributed in the hope that it will be useful,
18; but WITHOUT ANY WARRANTY; without even the implied warranty of
19; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20; GNU General Public License for more details.
21;
22; You should have received a copy of the GNU General Public License
23; along with kLIBC; if not, write to the Free Software
24; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25;
26;
27
28[section .data]
29;
30g_fCalibrated:
31 dd 0
32g_OverheadAdj:
33 dd 0
34
35[section .text]
36
37extern KPRF_ENTER
38extern KPRF_LEAVE
39
40global __penter
41global __pexit
42
43;ifdef UNDEFINED
44global common_return_path
45global common_overhead
46global common_no_overhead
47global calibrate
48global outer_calibration_loop
49global inner_calibration_loop
50global calib_inner_update_minimum
51global calib_inner_next
52global calib_outer_dec
53global calib_outer_inc
54global calib_done
55global calib_nullproc
56;endif
57
58
59;;
60; On x86 the call to this function has been observed to be put before
61; creating the stack frame, as the very first instruction in the function.
62;
63; Thus the stack layout is as follows:
64; 24 return address of the calling function.
65; 20 our return address - the address of the calling function + 5.
66; 1c eax
67; 18 edx
68; 14 eflags
69; 10 ecx
70; c tsc high - param 3
71; 8 tsc low
72; 4 frame pointer - param 2
73; 0 function ptr - param 1
74;
75;
76align 16
77__penter:
78 ; save volatile register and get the time stamp.
79 push eax
80 push edx
81 rdtsc
82 pushfd
83 push ecx
84
85 ; setting up the enter call frame (cdecl).
86 sub esp, 4 + 4 + 8
87 mov [esp + 0ch], edx ; Param 3 - the timestamp
88 mov [esp + 08h], eax
89 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
90 mov [esp + 04h], edx
91 mov eax, [esp + 20h] ; Param 1 - The function address
92 sub eax, 5 ; call instruction
93 mov [esp], eax
94
95 call KPRF_ENTER
96 jmp common_return_path
97
98
99;;
100; On x86 the call to this function has been observed to be put right before
101; return instruction. This fact matters since since we have to calc the same
102; stack address as in _penter.
103;
104; Thus the stack layout is as follows:
105; 24 return address of the calling function.
106; 20 our return address - the address of the calling function + 5.
107; 1c eax
108; 18 edx
109; 14 eflags
110; 10 ecx
111; c tsc high - param 3
112; 8 tsc low
113; 4 frame pointer - param 2
114; 0 function ptr - param 1
115;
116;
117align 16
118__pexit:
119 ; save volatile register and get the time stamp.
120 push eax
121 push edx
122 rdtsc
123 pushfd
124 push ecx
125
126 ; setting up the leave call frame (cdecl).
127 sub esp, 4 + 4 + 8
128 mov [esp + 0ch], edx ; Param 3 - the timestamp
129 mov [esp + 08h], eax
130 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
131 mov [esp + 04h], edx
132 mov eax, [esp + 20h] ; Param 1 - Some address in the function.
133 sub eax, 5 ; call instruction
134 mov [esp], eax
135
136 call KPRF_LEAVE
137 jmp common_return_path
138
139
140;;
141; This is the common return path for both the enter and exit hooks.
142; It's kept common because we can then use the same overhead adjustment
143; and save some calibration efforts. It also saves space :-)
144align 16
145common_return_path:
146 ; Update overhead
147 test eax, eax
148 jz common_no_overhead
149 cmp byte [g_fCalibrated], 0
150 jnz common_overhead
151 call calibrate
152common_overhead:
153 mov ecx, eax ; ecx <- pointer to overhead counter.
154 mov eax, [g_OverheadAdj] ; apply the adjustment before reading tsc
155 sub [esp + 08h], eax
156 sbb dword [esp + 0ch], 0
157
158 rdtsc
159 sub eax, [esp + 08h]
160 sbb edx, [esp + 0ch]
161 add [ecx], eax
162 adc [ecx + 4], edx
163common_no_overhead:
164 add esp, 4 + 4 + 8
165
166 ; restore volatile registers.
167 pop ecx
168 popfd
169 pop edx
170 pop eax
171 ret
172
173;;
174; Data esi points to while we're calibrating.
175struc CALIBDATA
176 .OverheadLo resd 1
177 .OverheadHi resd 1
178 .ProfiledLo resd 1
179 .ProfiledHi resd 1
180 .EnterTSLo resd 1
181 .EnterTSHi resd 1
182 .MinLo resd 1
183 .MinHi resd 1
184endstruc
185
186
187
188align 16
189;;
190; Do necessary calibrations.
191;
192calibrate:
193 ; prolog
194 push ebp
195 mov ebp, esp
196 pushfd
197 pushad
198 sub esp, CALIBDATA_size
199 mov esi, esp ; esi points to the CALIBDATA
200
201 ;
202 ; Indicate that we have finished calibrating.
203 ;
204 mov eax, 1
205 xchg dword [g_fCalibrated], eax
206
207 ;
208 ; The outer loop - find the right adjustment.
209 ;
210 mov ebx, 200h ; loop counter.
211calib_outer_loop:
212
213 ;
214 ; The inner loop - calls the function number of times to establish a
215 ; good minimum value
216 ;
217 mov ecx, 200h
218 mov dword [esi + CALIBDATA.MinLo], 0ffffffffh
219 mov dword [esi + CALIBDATA.MinHi], 07fffffffh
220calib_inner_loop:
221
222 ; zero the overhead and profiled times.
223 xor eax, eax
224 mov [esi + CALIBDATA.OverheadLo], eax
225 mov [esi + CALIBDATA.OverheadHi], eax
226 mov [esi + CALIBDATA.ProfiledLo], eax
227 mov [esi + CALIBDATA.ProfiledHi], eax
228 call calib_nullproc
229
230 ; subtract the overhead
231 mov eax, [esi + CALIBDATA.ProfiledLo]
232 mov edx, [esi + CALIBDATA.ProfiledHi]
233 sub eax, [esi + CALIBDATA.OverheadLo]
234 sbb edx, [esi + CALIBDATA.OverheadHi]
235
236 ; update the minimum value.
237 test edx, 080000000h
238 jnz near calib_outer_dec ; if negative, just simplify and shortcut
239 cmp edx, [esi + CALIBDATA.MinHi]
240 jg calib_inner_next
241 jl calib_inner_update_minimum
242 cmp eax, [esi + CALIBDATA.MinLo]
243 jge calib_inner_next
244calib_inner_update_minimum:
245 mov [esi + CALIBDATA.MinLo], eax
246 mov [esi + CALIBDATA.MinHi], edx
247calib_inner_next:
248 loop calib_inner_loop
249
250 ; Is the minimum value acceptable?
251 test dword [esi + CALIBDATA.MinHi], 80000000h
252 jnz calib_outer_dec ; simplify if negative.
253 cmp dword [esi + CALIBDATA.MinHi], 0
254 jnz calib_outer_inc ; this shouldn't be possible
255 cmp dword [esi + CALIBDATA.MinLo], 1fh
256 jbe calib_outer_dec ; too low - 2 ticks per pair is the minimum!
257 cmp dword [esi + CALIBDATA.MinLo], 30h
258 jbe calib_done ; this is fine!
259calib_outer_inc:
260 inc dword [g_OverheadAdj]
261 jmp calib_outer_next
262calib_outer_dec:
263 cmp dword [g_OverheadAdj], 1
264 je calib_done
265 dec dword [g_OverheadAdj]
266calib_outer_next:
267 dec ebx
268 jnz calib_outer_loop
269calib_done:
270
271 ; epilog
272 add esp, CALIBDATA_size
273 popad
274 popfd
275 leave
276 ret
277
278
279
280
281;;
282; The calibration __penter - this must be identical to the real thing except for the KPRF call.
283align 16
284calib_penter:
285 ; This part must be identical
286 push eax
287 push edx
288 rdtsc
289 pushfd
290 push ecx
291
292 ; store the entry
293 mov [esi + CALIBDATA.EnterTSLo], eax
294 mov [esi + CALIBDATA.EnterTSHi], edx
295
296 ; create the call frame
297 push edx
298 push eax
299 push 0
300 push 0
301
302 lea eax, [esi + CALIBDATA.OverheadLo]
303 jmp common_overhead
304
305
306;;
307; The calibration __pexit - this must be identical to the real thing except for the KPRF call.
308align 16
309calib_pexit:
310 ; This part must be identical
311 push eax
312 push edx
313 rdtsc
314 pushfd
315 push ecx
316
317 ; update the time
318 push eax
319 push edx
320 sub eax, [esi + CALIBDATA.EnterTSLo]
321 sbb edx, [esi + CALIBDATA.EnterTSHi]
322 add [esi + CALIBDATA.ProfiledLo], eax
323 adc [esi + CALIBDATA.ProfiledHi], edx
324 pop edx
325 pop eax
326
327 ; create the call frame
328 push edx
329 push eax
330 push 0
331 push 0
332
333 lea eax, [esi + CALIBDATA.EnterTSLo]
334 jmp common_overhead
335
336
337;;
338; The 'function' we're profiling.
339; The general idea is that each pair should take something like 2-10 ticks.
340;
341; (Btw. If we don't use multiple pairs here, we end up with the wrong result.)
342align 16
343calib_nullproc:
344 call calib_penter ;0
345 call calib_pexit
346
347 call calib_penter ;1
348 call calib_pexit
349
350 call calib_penter ;2
351 call calib_pexit
352
353 call calib_penter ;3
354 call calib_pexit
355
356 call calib_penter ;4
357 call calib_pexit
358
359 call calib_penter ;5
360 call calib_pexit
361
362 call calib_penter ;6
363 call calib_pexit
364
365 call calib_penter ;7
366 call calib_pexit
367
368 call calib_penter ;8
369 call calib_pexit
370
371 call calib_penter ;9
372 call calib_pexit
373
374 call calib_penter ;a
375 call calib_pexit
376
377 call calib_penter ;b
378 call calib_pexit
379
380 call calib_penter ;c
381 call calib_pexit
382
383 call calib_penter ;d
384 call calib_pexit
385
386 call calib_penter ;e
387 call calib_pexit
388
389 call calib_penter ;f
390 call calib_pexit
391 ret
392
Note: See TracBrowser for help on using the repository browser.