source: python/trunk/Lib/profile.py@ 608

Last change on this file since 608 was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 22.2 KB
Line 
1#! /usr/bin/env python
2#
3# Class for profiling python code. rev 1.0 6/2/94
4#
5# Written by James Roskind
6# Based on prior profile module by Sjoerd Mullender...
7# which was hacked somewhat by: Guido van Rossum
8
9"""Class for profiling Python code."""
10
11# Copyright Disney Enterprises, Inc. All Rights Reserved.
12# Licensed to PSF under a Contributor Agreement
13#
14# Licensed under the Apache License, Version 2.0 (the "License");
15# you may not use this file except in compliance with the License.
16# You may obtain a copy of the License at
17#
18# http://www.apache.org/licenses/LICENSE-2.0
19#
20# Unless required by applicable law or agreed to in writing, software
21# distributed under the License is distributed on an "AS IS" BASIS,
22# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23# either express or implied. See the License for the specific language
24# governing permissions and limitations under the License.
25
26
27import sys
28import os
29import time
30import marshal
31from optparse import OptionParser
32
33__all__ = ["run", "runctx", "help", "Profile"]
34
35# Sample timer for use with
36#i_count = 0
37#def integer_timer():
38# global i_count
39# i_count = i_count + 1
40# return i_count
41#itimes = integer_timer # replace with C coded timer returning integers
42
43#**************************************************************************
44# The following are the static member functions for the profiler class
45# Note that an instance of Profile() is *not* needed to call them.
46#**************************************************************************
47
48def run(statement, filename=None, sort=-1):
49 """Run statement under profiler optionally saving results in filename
50
51 This function takes a single argument that can be passed to the
52 "exec" statement, and an optional file name. In all cases this
53 routine attempts to "exec" its first argument and gather profiling
54 statistics from the execution. If no file name is present, then this
55 function automatically prints a simple profiling report, sorted by the
56 standard name string (file/line/function-name) that is presented in
57 each line.
58 """
59 prof = Profile()
60 try:
61 prof = prof.run(statement)
62 except SystemExit:
63 pass
64 if filename is not None:
65 prof.dump_stats(filename)
66 else:
67 return prof.print_stats(sort)
68
69def runctx(statement, globals, locals, filename=None, sort=-1):
70 """Run statement under profiler, supplying your own globals and locals,
71 optionally saving results in filename.
72
73 statement and filename have the same semantics as profile.run
74 """
75 prof = Profile()
76 try:
77 prof = prof.runctx(statement, globals, locals)
78 except SystemExit:
79 pass
80
81 if filename is not None:
82 prof.dump_stats(filename)
83 else:
84 return prof.print_stats(sort)
85
86# Backwards compatibility.
87def help():
88 print "Documentation for the profile module can be found "
89 print "in the Python Library Reference, section 'The Python Profiler'."
90
91if hasattr(os, "times"):
92 def _get_time_times(timer=os.times):
93 t = timer()
94 return t[0] + t[1]
95
96# Using getrusage(3) is better than clock(3) if available:
97# on some systems (e.g. FreeBSD), getrusage has a higher resolution
98# Furthermore, on a POSIX system, returns microseconds, which
99# wrap around after 36min.
100_has_res = 0
101try:
102 import resource
103 resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
104 def _get_time_resource(timer=resgetrusage):
105 t = timer()
106 return t[0] + t[1]
107 _has_res = 1
108except ImportError:
109 pass
110
111class Profile:
112 """Profiler class.
113
114 self.cur is always a tuple. Each such tuple corresponds to a stack
115 frame that is currently active (self.cur[-2]). The following are the
116 definitions of its members. We use this external "parallel stack" to
117 avoid contaminating the program that we are profiling. (old profiler
118 used to write into the frames local dictionary!!) Derived classes
119 can change the definition of some entries, as long as they leave
120 [-2:] intact (frame and previous tuple). In case an internal error is
121 detected, the -3 element is used as the function name.
122
123 [ 0] = Time that needs to be charged to the parent frame's function.
124 It is used so that a function call will not have to access the
125 timing data for the parent frame.
126 [ 1] = Total time spent in this frame's function, excluding time in
127 subfunctions (this latter is tallied in cur[2]).
128 [ 2] = Total time spent in subfunctions, excluding time executing the
129 frame's function (this latter is tallied in cur[1]).
130 [-3] = Name of the function that corresponds to this frame.
131 [-2] = Actual frame that we correspond to (used to sync exception handling).
132 [-1] = Our parent 6-tuple (corresponds to frame.f_back).
133
134 Timing data for each function is stored as a 5-tuple in the dictionary
135 self.timings[]. The index is always the name stored in self.cur[-3].
136 The following are the definitions of the members:
137
138 [0] = The number of times this function was called, not counting direct
139 or indirect recursion,
140 [1] = Number of times this function appears on the stack, minus one
141 [2] = Total time spent internal to this function
142 [3] = Cumulative time that this function was present on the stack. In
143 non-recursive functions, this is the total execution time from start
144 to finish of each invocation of a function, including time spent in
145 all subfunctions.
146 [4] = A dictionary indicating for each function name, the number of times
147 it was called by us.
148 """
149
150 bias = 0 # calibration constant
151
152 def __init__(self, timer=None, bias=None):
153 self.timings = {}
154 self.cur = None
155 self.cmd = ""
156 self.c_func_name = ""
157
158 if bias is None:
159 bias = self.bias
160 self.bias = bias # Materialize in local dict for lookup speed.
161
162 if not timer:
163 if _has_res:
164 self.timer = resgetrusage
165 self.dispatcher = self.trace_dispatch
166 self.get_time = _get_time_resource
167 elif hasattr(time, 'clock'):
168 self.timer = self.get_time = time.clock
169 self.dispatcher = self.trace_dispatch_i
170 elif hasattr(os, 'times'):
171 self.timer = os.times
172 self.dispatcher = self.trace_dispatch
173 self.get_time = _get_time_times
174 else:
175 self.timer = self.get_time = time.time
176 self.dispatcher = self.trace_dispatch_i
177 else:
178 self.timer = timer
179 t = self.timer() # test out timer function
180 try:
181 length = len(t)
182 except TypeError:
183 self.get_time = timer
184 self.dispatcher = self.trace_dispatch_i
185 else:
186 if length == 2:
187 self.dispatcher = self.trace_dispatch
188 else:
189 self.dispatcher = self.trace_dispatch_l
190 # This get_time() implementation needs to be defined
191 # here to capture the passed-in timer in the parameter
192 # list (for performance). Note that we can't assume
193 # the timer() result contains two values in all
194 # cases.
195 def get_time_timer(timer=timer, sum=sum):
196 return sum(timer())
197 self.get_time = get_time_timer
198 self.t = self.get_time()
199 self.simulate_call('profiler')
200
201 # Heavily optimized dispatch routine for os.times() timer
202
203 def trace_dispatch(self, frame, event, arg):
204 timer = self.timer
205 t = timer()
206 t = t[0] + t[1] - self.t - self.bias
207
208 if event == "c_call":
209 self.c_func_name = arg.__name__
210
211 if self.dispatch[event](self, frame,t):
212 t = timer()
213 self.t = t[0] + t[1]
214 else:
215 r = timer()
216 self.t = r[0] + r[1] - t # put back unrecorded delta
217
218 # Dispatch routine for best timer program (return = scalar, fastest if
219 # an integer but float works too -- and time.clock() relies on that).
220
221 def trace_dispatch_i(self, frame, event, arg):
222 timer = self.timer
223 t = timer() - self.t - self.bias
224
225 if event == "c_call":
226 self.c_func_name = arg.__name__
227
228 if self.dispatch[event](self, frame, t):
229 self.t = timer()
230 else:
231 self.t = timer() - t # put back unrecorded delta
232
233 # Dispatch routine for macintosh (timer returns time in ticks of
234 # 1/60th second)
235
236 def trace_dispatch_mac(self, frame, event, arg):
237 timer = self.timer
238 t = timer()/60.0 - self.t - self.bias
239
240 if event == "c_call":
241 self.c_func_name = arg.__name__
242
243 if self.dispatch[event](self, frame, t):
244 self.t = timer()/60.0
245 else:
246 self.t = timer()/60.0 - t # put back unrecorded delta
247
248 # SLOW generic dispatch routine for timer returning lists of numbers
249
250 def trace_dispatch_l(self, frame, event, arg):
251 get_time = self.get_time
252 t = get_time() - self.t - self.bias
253
254 if event == "c_call":
255 self.c_func_name = arg.__name__
256
257 if self.dispatch[event](self, frame, t):
258 self.t = get_time()
259 else:
260 self.t = get_time() - t # put back unrecorded delta
261
262 # In the event handlers, the first 3 elements of self.cur are unpacked
263 # into vrbls w/ 3-letter names. The last two characters are meant to be
264 # mnemonic:
265 # _pt self.cur[0] "parent time" time to be charged to parent frame
266 # _it self.cur[1] "internal time" time spent directly in the function
267 # _et self.cur[2] "external time" time spent in subfunctions
268
269 def trace_dispatch_exception(self, frame, t):
270 rpt, rit, ret, rfn, rframe, rcur = self.cur
271 if (rframe is not frame) and rcur:
272 return self.trace_dispatch_return(rframe, t)
273 self.cur = rpt, rit+t, ret, rfn, rframe, rcur
274 return 1
275
276
277 def trace_dispatch_call(self, frame, t):
278 if self.cur and frame.f_back is not self.cur[-2]:
279 rpt, rit, ret, rfn, rframe, rcur = self.cur
280 if not isinstance(rframe, Profile.fake_frame):
281 assert rframe.f_back is frame.f_back, ("Bad call", rfn,
282 rframe, rframe.f_back,
283 frame, frame.f_back)
284 self.trace_dispatch_return(rframe, 0)
285 assert (self.cur is None or \
286 frame.f_back is self.cur[-2]), ("Bad call",
287 self.cur[-3])
288 fcode = frame.f_code
289 fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
290 self.cur = (t, 0, 0, fn, frame, self.cur)
291 timings = self.timings
292 if fn in timings:
293 cc, ns, tt, ct, callers = timings[fn]
294 timings[fn] = cc, ns + 1, tt, ct, callers
295 else:
296 timings[fn] = 0, 0, 0, 0, {}
297 return 1
298
299 def trace_dispatch_c_call (self, frame, t):
300 fn = ("", 0, self.c_func_name)
301 self.cur = (t, 0, 0, fn, frame, self.cur)
302 timings = self.timings
303 if fn in timings:
304 cc, ns, tt, ct, callers = timings[fn]
305 timings[fn] = cc, ns+1, tt, ct, callers
306 else:
307 timings[fn] = 0, 0, 0, 0, {}
308 return 1
309
310 def trace_dispatch_return(self, frame, t):
311 if frame is not self.cur[-2]:
312 assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
313 self.trace_dispatch_return(self.cur[-2], 0)
314
315 # Prefix "r" means part of the Returning or exiting frame.
316 # Prefix "p" means part of the Previous or Parent or older frame.
317
318 rpt, rit, ret, rfn, frame, rcur = self.cur
319 rit = rit + t
320 frame_total = rit + ret
321
322 ppt, pit, pet, pfn, pframe, pcur = rcur
323 self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
324
325 timings = self.timings
326 cc, ns, tt, ct, callers = timings[rfn]
327 if not ns:
328 # This is the only occurrence of the function on the stack.
329 # Else this is a (directly or indirectly) recursive call, and
330 # its cumulative time will get updated when the topmost call to
331 # it returns.
332 ct = ct + frame_total
333 cc = cc + 1
334
335 if pfn in callers:
336 callers[pfn] = callers[pfn] + 1 # hack: gather more
337 # stats such as the amount of time added to ct courtesy
338 # of this specific call, and the contribution to cc
339 # courtesy of this call.
340 else:
341 callers[pfn] = 1
342
343 timings[rfn] = cc, ns - 1, tt + rit, ct, callers
344
345 return 1
346
347
348 dispatch = {
349 "call": trace_dispatch_call,
350 "exception": trace_dispatch_exception,
351 "return": trace_dispatch_return,
352 "c_call": trace_dispatch_c_call,
353 "c_exception": trace_dispatch_return, # the C function returned
354 "c_return": trace_dispatch_return,
355 }
356
357
358 # The next few functions play with self.cmd. By carefully preloading
359 # our parallel stack, we can force the profiled result to include
360 # an arbitrary string as the name of the calling function.
361 # We use self.cmd as that string, and the resulting stats look
362 # very nice :-).
363
364 def set_cmd(self, cmd):
365 if self.cur[-1]: return # already set
366 self.cmd = cmd
367 self.simulate_call(cmd)
368
369 class fake_code:
370 def __init__(self, filename, line, name):
371 self.co_filename = filename
372 self.co_line = line
373 self.co_name = name
374 self.co_firstlineno = 0
375
376 def __repr__(self):
377 return repr((self.co_filename, self.co_line, self.co_name))
378
379 class fake_frame:
380 def __init__(self, code, prior):
381 self.f_code = code
382 self.f_back = prior
383
384 def simulate_call(self, name):
385 code = self.fake_code('profile', 0, name)
386 if self.cur:
387 pframe = self.cur[-2]
388 else:
389 pframe = None
390 frame = self.fake_frame(code, pframe)
391 self.dispatch['call'](self, frame, 0)
392
393 # collect stats from pending stack, including getting final
394 # timings for self.cmd frame.
395
396 def simulate_cmd_complete(self):
397 get_time = self.get_time
398 t = get_time() - self.t
399 while self.cur[-1]:
400 # We *can* cause assertion errors here if
401 # dispatch_trace_return checks for a frame match!
402 self.dispatch['return'](self, self.cur[-2], t)
403 t = 0
404 self.t = get_time() - t
405
406
407 def print_stats(self, sort=-1):
408 import pstats
409 pstats.Stats(self).strip_dirs().sort_stats(sort). \
410 print_stats()
411
412 def dump_stats(self, file):
413 f = open(file, 'wb')
414 self.create_stats()
415 marshal.dump(self.stats, f)
416 f.close()
417
418 def create_stats(self):
419 self.simulate_cmd_complete()
420 self.snapshot_stats()
421
422 def snapshot_stats(self):
423 self.stats = {}
424 for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
425 callers = callers.copy()
426 nc = 0
427 for callcnt in callers.itervalues():
428 nc += callcnt
429 self.stats[func] = cc, nc, tt, ct, callers
430
431
432 # The following two methods can be called by clients to use
433 # a profiler to profile a statement, given as a string.
434
435 def run(self, cmd):
436 import __main__
437 dict = __main__.__dict__
438 return self.runctx(cmd, dict, dict)
439
440 def runctx(self, cmd, globals, locals):
441 self.set_cmd(cmd)
442 sys.setprofile(self.dispatcher)
443 try:
444 exec cmd in globals, locals
445 finally:
446 sys.setprofile(None)
447 return self
448
449 # This method is more useful to profile a single function call.
450 def runcall(self, func, *args, **kw):
451 self.set_cmd(repr(func))
452 sys.setprofile(self.dispatcher)
453 try:
454 return func(*args, **kw)
455 finally:
456 sys.setprofile(None)
457
458
459 #******************************************************************
460 # The following calculates the overhead for using a profiler. The
461 # problem is that it takes a fair amount of time for the profiler
462 # to stop the stopwatch (from the time it receives an event).
463 # Similarly, there is a delay from the time that the profiler
464 # re-starts the stopwatch before the user's code really gets to
465 # continue. The following code tries to measure the difference on
466 # a per-event basis.
467 #
468 # Note that this difference is only significant if there are a lot of
469 # events, and relatively little user code per event. For example,
470 # code with small functions will typically benefit from having the
471 # profiler calibrated for the current platform. This *could* be
472 # done on the fly during init() time, but it is not worth the
473 # effort. Also note that if too large a value specified, then
474 # execution time on some functions will actually appear as a
475 # negative number. It is *normal* for some functions (with very
476 # low call counts) to have such negative stats, even if the
477 # calibration figure is "correct."
478 #
479 # One alternative to profile-time calibration adjustments (i.e.,
480 # adding in the magic little delta during each event) is to track
481 # more carefully the number of events (and cumulatively, the number
482 # of events during sub functions) that are seen. If this were
483 # done, then the arithmetic could be done after the fact (i.e., at
484 # display time). Currently, we track only call/return events.
485 # These values can be deduced by examining the callees and callers
486 # vectors for each functions. Hence we *can* almost correct the
487 # internal time figure at print time (note that we currently don't
488 # track exception event processing counts). Unfortunately, there
489 # is currently no similar information for cumulative sub-function
490 # time. It would not be hard to "get all this info" at profiler
491 # time. Specifically, we would have to extend the tuples to keep
492 # counts of this in each frame, and then extend the defs of timing
493 # tuples to include the significant two figures. I'm a bit fearful
494 # that this additional feature will slow the heavily optimized
495 # event/time ratio (i.e., the profiler would run slower, fur a very
496 # low "value added" feature.)
497 #**************************************************************
498
499 def calibrate(self, m, verbose=0):
500 if self.__class__ is not Profile:
501 raise TypeError("Subclasses must override .calibrate().")
502
503 saved_bias = self.bias
504 self.bias = 0
505 try:
506 return self._calibrate_inner(m, verbose)
507 finally:
508 self.bias = saved_bias
509
510 def _calibrate_inner(self, m, verbose):
511 get_time = self.get_time
512
513 # Set up a test case to be run with and without profiling. Include
514 # lots of calls, because we're trying to quantify stopwatch overhead.
515 # Do not raise any exceptions, though, because we want to know
516 # exactly how many profile events are generated (one call event, +
517 # one return event, per Python-level call).
518
519 def f1(n):
520 for i in range(n):
521 x = 1
522
523 def f(m, f1=f1):
524 for i in range(m):
525 f1(100)
526
527 f(m) # warm up the cache
528
529 # elapsed_noprofile <- time f(m) takes without profiling.
530 t0 = get_time()
531 f(m)
532 t1 = get_time()
533 elapsed_noprofile = t1 - t0
534 if verbose:
535 print "elapsed time without profiling =", elapsed_noprofile
536
537 # elapsed_profile <- time f(m) takes with profiling. The difference
538 # is profiling overhead, only some of which the profiler subtracts
539 # out on its own.
540 p = Profile()
541 t0 = get_time()
542 p.runctx('f(m)', globals(), locals())
543 t1 = get_time()
544 elapsed_profile = t1 - t0
545 if verbose:
546 print "elapsed time with profiling =", elapsed_profile
547
548 # reported_time <- "CPU seconds" the profiler charged to f and f1.
549 total_calls = 0.0
550 reported_time = 0.0
551 for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
552 p.timings.items():
553 if funcname in ("f", "f1"):
554 total_calls += cc
555 reported_time += tt
556
557 if verbose:
558 print "'CPU seconds' profiler reported =", reported_time
559 print "total # calls =", total_calls
560 if total_calls != m + 1:
561 raise ValueError("internal error: total calls = %d" % total_calls)
562
563 # reported_time - elapsed_noprofile = overhead the profiler wasn't
564 # able to measure. Divide by twice the number of calls (since there
565 # are two profiler events per call in this test) to get the hidden
566 # overhead per event.
567 mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
568 if verbose:
569 print "mean stopwatch overhead per profile event =", mean
570 return mean
571
572#****************************************************************************
573def Stats(*args):
574 print 'Report generating functions are in the "pstats" module\a'
575
576def main():
577 usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
578 parser = OptionParser(usage=usage)
579 parser.allow_interspersed_args = False
580 parser.add_option('-o', '--outfile', dest="outfile",
581 help="Save stats to <outfile>", default=None)
582 parser.add_option('-s', '--sort', dest="sort",
583 help="Sort order when printing to stdout, based on pstats.Stats class",
584 default=-1)
585
586 if not sys.argv[1:]:
587 parser.print_usage()
588 sys.exit(2)
589
590 (options, args) = parser.parse_args()
591 sys.argv[:] = args
592
593 if len(args) > 0:
594 progname = args[0]
595 sys.path.insert(0, os.path.dirname(progname))
596 with open(progname, 'rb') as fp:
597 code = compile(fp.read(), progname, 'exec')
598 globs = {
599 '__file__': progname,
600 '__name__': '__main__',
601 '__package__': None,
602 }
603 runctx(code, globs, None, options.outfile, options.sort)
604 else:
605 parser.print_usage()
606 return parser
607
608# When invoked as main program, invoke the profiler on a script
609if __name__ == '__main__':
610 main()
Note: See TracBrowser for help on using the repository browser.