Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

profile.py@ 20

Last change on this file since 20 was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 23.0 KB

Line
1	#! /usr/bin/env python
2	#
3	# Class for profiling python code. rev 1.0 6/2/94
4	#
5	# Based on prior profile module by Sjoerd Mullender...
6	# which was hacked somewhat by: Guido van Rossum
7
8	"""Class for profiling Python code."""
9
10	# Copyright 1994, by InfoSeek Corporation, all rights reserved.
11	# Written by James Roskind
12	#
13	# Permission to use, copy, modify, and distribute this Python software
14	# and its associated documentation for any purpose (subject to the
15	# restriction in the following sentence) without fee is hereby granted,
16	# provided that the above copyright notice appears in all copies, and
17	# that both that copyright notice and this permission notice appear in
18	# supporting documentation, and that the name of InfoSeek not be used in
19	# advertising or publicity pertaining to distribution of the software
20	# without specific, written prior permission. This permission is
21	# explicitly restricted to the copying and modification of the software
22	# to remain in Python, compiled Python, or other languages (such as C)
23	# wherein the modified or derived code is exclusively imported into a
24	# Python module.
25	#
26	# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27	# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
28	# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
29	# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
30	# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
31	# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
32	# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
33
34
35
36	import sys
37	import os
38	import time
39	import marshal
40	from optparse import OptionParser
41
42	__all__ = ["run", "runctx", "help", "Profile"]
43
44	# Sample timer for use with
45	#i_count = 0
46	#def integer_timer():
47	# global i_count
48	# i_count = i_count + 1
49	# return i_count
50	#itimes = integer_timer # replace with C coded timer returning integers
51
52	#**************************************************************************
53	# The following are the static member functions for the profiler class
54	# Note that an instance of Profile() is not needed to call them.
55	#**************************************************************************
56
57	def run(statement, filename=None, sort=-1):
58	"""Run statement under profiler optionally saving results in filename
59
60	This function takes a single argument that can be passed to the
61	"exec" statement, and an optional file name. In all cases this
62	routine attempts to "exec" its first argument and gather profiling
63	statistics from the execution. If no file name is present, then this
64	function automatically prints a simple profiling report, sorted by the
65	standard name string (file/line/function-name) that is presented in
66	each line.
67	"""
68	prof = Profile()
69	try:
70	prof = prof.run(statement)
71	except SystemExit:
72	pass
73	if filename is not None:
74	prof.dump_stats(filename)
75	else:
76	return prof.print_stats(sort)
77
78	def runctx(statement, globals, locals, filename=None):
79	"""Run statement under profiler, supplying your own globals and locals,
80	optionally saving results in filename.
81
82	statement and filename have the same semantics as profile.run
83	"""
84	prof = Profile()
85	try:
86	prof = prof.runctx(statement, globals, locals)
87	except SystemExit:
88	pass
89
90	if filename is not None:
91	prof.dump_stats(filename)
92	else:
93	return prof.print_stats()
94
95	# Backwards compatibility.
96	def help():
97	print "Documentation for the profile module can be found "
98	print "in the Python Library Reference, section 'The Python Profiler'."
99
100	if os.name == "mac":
101	import MacOS
102	def _get_time_mac(timer=MacOS.GetTicks):
103	return timer() / 60.0
104
105	if hasattr(os, "times"):
106	def _get_time_times(timer=os.times):
107	t = timer()
108	return t[0] + t[1]
109
110	# Using getrusage(3) is better than clock(3) if available:
111	# on some systems (e.g. FreeBSD), getrusage has a higher resolution
112	# Furthermore, on a POSIX system, returns microseconds, which
113	# wrap around after 36min.
114	_has_res = 0
115	try:
116	import resource
117	resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
118	def _get_time_resource(timer=resgetrusage):
119	t = timer()
120	return t[0] + t[1]
121	_has_res = 1
122	except ImportError:
123	pass
124
125	class Profile:
126	"""Profiler class.
127
128	self.cur is always a tuple. Each such tuple corresponds to a stack
129	frame that is currently active (self.cur[-2]). The following are the
130	definitions of its members. We use this external "parallel stack" to
131	avoid contaminating the program that we are profiling. (old profiler
132	used to write into the frames local dictionary!!) Derived classes
133	can change the definition of some entries, as long as they leave
134	[-2:] intact (frame and previous tuple). In case an internal error is
135	detected, the -3 element is used as the function name.
136
137	[ 0] = Time that needs to be charged to the parent frame's function.
138	It is used so that a function call will not have to access the
139	timing data for the parent frame.
140	[ 1] = Total time spent in this frame's function, excluding time in
141	subfunctions (this latter is tallied in cur[2]).
142	[ 2] = Total time spent in subfunctions, excluding time executing the
143	frame's function (this latter is tallied in cur[1]).
144	[-3] = Name of the function that corresponds to this frame.
145	[-2] = Actual frame that we correspond to (used to sync exception handling).
146	[-1] = Our parent 6-tuple (corresponds to frame.f_back).
147
148	Timing data for each function is stored as a 5-tuple in the dictionary
149	self.timings[]. The index is always the name stored in self.cur[-3].
150	The following are the definitions of the members:
151
152	[0] = The number of times this function was called, not counting direct
153	or indirect recursion,
154	[1] = Number of times this function appears on the stack, minus one
155	[2] = Total time spent internal to this function
156	[3] = Cumulative time that this function was present on the stack. In
157	non-recursive functions, this is the total execution time from start
158	to finish of each invocation of a function, including time spent in
159	all subfunctions.
160	[4] = A dictionary indicating for each function name, the number of times
161	it was called by us.
162	"""
163
164	bias = 0 # calibration constant
165
166	def __init__(self, timer=None, bias=None):
167	self.timings = {}
168	self.cur = None
169	self.cmd = ""
170	self.c_func_name = ""
171
172	if bias is None:
173	bias = self.bias
174	self.bias = bias # Materialize in local dict for lookup speed.
175
176	if not timer:
177	if _has_res:
178	self.timer = resgetrusage
179	self.dispatcher = self.trace_dispatch
180	self.get_time = _get_time_resource
181	elif os.name == 'mac':
182	self.timer = MacOS.GetTicks
183	self.dispatcher = self.trace_dispatch_mac
184	self.get_time = _get_time_mac
185	elif hasattr(time, 'clock'):
186	self.timer = self.get_time = time.clock
187	self.dispatcher = self.trace_dispatch_i
188	elif hasattr(os, 'times'):
189	self.timer = os.times
190	self.dispatcher = self.trace_dispatch
191	self.get_time = _get_time_times
192	else:
193	self.timer = self.get_time = time.time
194	self.dispatcher = self.trace_dispatch_i
195	else:
196	self.timer = timer
197	t = self.timer() # test out timer function
198	try:
199	length = len(t)
200	except TypeError:
201	self.get_time = timer
202	self.dispatcher = self.trace_dispatch_i
203	else:
204	if length == 2:
205	self.dispatcher = self.trace_dispatch
206	else:
207	self.dispatcher = self.trace_dispatch_l
208	# This get_time() implementation needs to be defined
209	# here to capture the passed-in timer in the parameter
210	# list (for performance). Note that we can't assume
211	# the timer() result contains two values in all
212	# cases.
213	def get_time_timer(timer=timer, sum=sum):
214	return sum(timer())
215	self.get_time = get_time_timer
216	self.t = self.get_time()
217	self.simulate_call('profiler')
218
219	# Heavily optimized dispatch routine for os.times() timer
220
221	def trace_dispatch(self, frame, event, arg):
222	timer = self.timer
223	t = timer()
224	t = t[0] + t[1] - self.t - self.bias
225
226	if event == "c_call":
227	self.c_func_name = arg.__name__
228
229	if self.dispatch[event](self, frame,t):
230	t = timer()
231	self.t = t[0] + t[1]
232	else:
233	r = timer()
234	self.t = r[0] + r[1] - t # put back unrecorded delta
235
236	# Dispatch routine for best timer program (return = scalar, fastest if
237	# an integer but float works too -- and time.clock() relies on that).
238
239	def trace_dispatch_i(self, frame, event, arg):
240	timer = self.timer
241	t = timer() - self.t - self.bias
242
243	if event == "c_call":
244	self.c_func_name = arg.__name__
245
246	if self.dispatch[event](self, frame, t):
247	self.t = timer()
248	else:
249	self.t = timer() - t # put back unrecorded delta
250
251	# Dispatch routine for macintosh (timer returns time in ticks of
252	# 1/60th second)
253
254	def trace_dispatch_mac(self, frame, event, arg):
255	timer = self.timer
256	t = timer()/60.0 - self.t - self.bias
257
258	if event == "c_call":
259	self.c_func_name = arg.__name__
260
261	if self.dispatch[event](self, frame, t):
262	self.t = timer()/60.0
263	else:
264	self.t = timer()/60.0 - t # put back unrecorded delta
265
266	# SLOW generic dispatch routine for timer returning lists of numbers
267
268	def trace_dispatch_l(self, frame, event, arg):
269	get_time = self.get_time
270	t = get_time() - self.t - self.bias
271
272	if event == "c_call":
273	self.c_func_name = arg.__name__
274
275	if self.dispatch[event](self, frame, t):
276	self.t = get_time()
277	else:
278	self.t = get_time() - t # put back unrecorded delta
279
280	# In the event handlers, the first 3 elements of self.cur are unpacked
281	# into vrbls w/ 3-letter names. The last two characters are meant to be
282	# mnemonic:
283	# _pt self.cur[0] "parent time" time to be charged to parent frame
284	# _it self.cur[1] "internal time" time spent directly in the function
285	# _et self.cur[2] "external time" time spent in subfunctions
286
287	def trace_dispatch_exception(self, frame, t):
288	rpt, rit, ret, rfn, rframe, rcur = self.cur
289	if (rframe is not frame) and rcur:
290	return self.trace_dispatch_return(rframe, t)
291	self.cur = rpt, rit+t, ret, rfn, rframe, rcur
292	return 1
293
294
295	def trace_dispatch_call(self, frame, t):
296	if self.cur and frame.f_back is not self.cur[-2]:
297	rpt, rit, ret, rfn, rframe, rcur = self.cur
298	if not isinstance(rframe, Profile.fake_frame):
299	assert rframe.f_back is frame.f_back, ("Bad call", rfn,
300	rframe, rframe.f_back,
301	frame, frame.f_back)
302	self.trace_dispatch_return(rframe, 0)
303	assert (self.cur is None or \
304	frame.f_back is self.cur[-2]), ("Bad call",
305	self.cur[-3])
306	fcode = frame.f_code
307	fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
308	self.cur = (t, 0, 0, fn, frame, self.cur)
309	timings = self.timings
310	if fn in timings:
311	cc, ns, tt, ct, callers = timings[fn]
312	timings[fn] = cc, ns + 1, tt, ct, callers
313	else:
314	timings[fn] = 0, 0, 0, 0, {}
315	return 1
316
317	def trace_dispatch_c_call (self, frame, t):
318	fn = ("", 0, self.c_func_name)
319	self.cur = (t, 0, 0, fn, frame, self.cur)
320	timings = self.timings
321	if fn in timings:
322	cc, ns, tt, ct, callers = timings[fn]
323	timings[fn] = cc, ns+1, tt, ct, callers
324	else:
325	timings[fn] = 0, 0, 0, 0, {}
326	return 1
327
328	def trace_dispatch_return(self, frame, t):
329	if frame is not self.cur[-2]:
330	assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
331	self.trace_dispatch_return(self.cur[-2], 0)
332
333	# Prefix "r" means part of the Returning or exiting frame.
334	# Prefix "p" means part of the Previous or Parent or older frame.
335
336	rpt, rit, ret, rfn, frame, rcur = self.cur
337	rit = rit + t
338	frame_total = rit + ret
339
340	ppt, pit, pet, pfn, pframe, pcur = rcur
341	self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
342
343	timings = self.timings
344	cc, ns, tt, ct, callers = timings[rfn]
345	if not ns:
346	# This is the only occurrence of the function on the stack.
347	# Else this is a (directly or indirectly) recursive call, and
348	# its cumulative time will get updated when the topmost call to
349	# it returns.
350	ct = ct + frame_total
351	cc = cc + 1
352
353	if pfn in callers:
354	callers[pfn] = callers[pfn] + 1 # hack: gather more
355	# stats such as the amount of time added to ct courtesy
356	# of this specific call, and the contribution to cc
357	# courtesy of this call.
358	else:
359	callers[pfn] = 1
360
361	timings[rfn] = cc, ns - 1, tt + rit, ct, callers
362
363	return 1
364
365
366	dispatch = {
367	"call": trace_dispatch_call,
368	"exception": trace_dispatch_exception,
369	"return": trace_dispatch_return,
370	"c_call": trace_dispatch_c_call,
371	"c_exception": trace_dispatch_return, # the C function returned
372	"c_return": trace_dispatch_return,
373	}
374
375
376	# The next few functions play with self.cmd. By carefully preloading
377	# our parallel stack, we can force the profiled result to include
378	# an arbitrary string as the name of the calling function.
379	# We use self.cmd as that string, and the resulting stats look
380	# very nice :-).
381
382	def set_cmd(self, cmd):
383	if self.cur[-1]: return # already set
384	self.cmd = cmd
385	self.simulate_call(cmd)
386
387	class fake_code:
388	def __init__(self, filename, line, name):
389	self.co_filename = filename
390	self.co_line = line
391	self.co_name = name
392	self.co_firstlineno = 0
393
394	def __repr__(self):
395	return repr((self.co_filename, self.co_line, self.co_name))
396
397	class fake_frame:
398	def __init__(self, code, prior):
399	self.f_code = code
400	self.f_back = prior
401
402	def simulate_call(self, name):
403	code = self.fake_code('profile', 0, name)
404	if self.cur:
405	pframe = self.cur[-2]
406	else:
407	pframe = None
408	frame = self.fake_frame(code, pframe)
409	self.dispatch['call'](self, frame, 0)
410
411	# collect stats from pending stack, including getting final
412	# timings for self.cmd frame.
413
414	def simulate_cmd_complete(self):
415	get_time = self.get_time
416	t = get_time() - self.t
417	while self.cur[-1]:
418	# We can cause assertion errors here if
419	# dispatch_trace_return checks for a frame match!
420	self.dispatch['return'](self, self.cur[-2], t)
421	t = 0
422	self.t = get_time() - t
423
424
425	def print_stats(self, sort=-1):
426	import pstats
427	pstats.Stats(self).strip_dirs().sort_stats(sort). \
428	print_stats()
429
430	def dump_stats(self, file):
431	f = open(file, 'wb')
432	self.create_stats()
433	marshal.dump(self.stats, f)
434	f.close()
435
436	def create_stats(self):
437	self.simulate_cmd_complete()
438	self.snapshot_stats()
439
440	def snapshot_stats(self):
441	self.stats = {}
442	for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
443	callers = callers.copy()
444	nc = 0
445	for callcnt in callers.itervalues():
446	nc += callcnt
447	self.stats[func] = cc, nc, tt, ct, callers
448
449
450	# The following two methods can be called by clients to use
451	# a profiler to profile a statement, given as a string.
452
453	def run(self, cmd):
454	import __main__
455	dict = __main__.__dict__
456	return self.runctx(cmd, dict, dict)
457
458	def runctx(self, cmd, globals, locals):
459	self.set_cmd(cmd)
460	sys.setprofile(self.dispatcher)
461	try:
462	exec cmd in globals, locals
463	finally:
464	sys.setprofile(None)
465	return self
466
467	# This method is more useful to profile a single function call.
468	def runcall(self, func, args, *kw):
469	self.set_cmd(repr(func))
470	sys.setprofile(self.dispatcher)
471	try:
472	return func(args, *kw)
473	finally:
474	sys.setprofile(None)
475
476
477	#******************************************************************
478	# The following calculates the overhead for using a profiler. The
479	# problem is that it takes a fair amount of time for the profiler
480	# to stop the stopwatch (from the time it receives an event).
481	# Similarly, there is a delay from the time that the profiler
482	# re-starts the stopwatch before the user's code really gets to
483	# continue. The following code tries to measure the difference on
484	# a per-event basis.
485	#
486	# Note that this difference is only significant if there are a lot of
487	# events, and relatively little user code per event. For example,
488	# code with small functions will typically benefit from having the
489	# profiler calibrated for the current platform. This could be
490	# done on the fly during init() time, but it is not worth the
491	# effort. Also note that if too large a value specified, then
492	# execution time on some functions will actually appear as a
493	# negative number. It is normal for some functions (with very
494	# low call counts) to have such negative stats, even if the
495	# calibration figure is "correct."
496	#
497	# One alternative to profile-time calibration adjustments (i.e.,
498	# adding in the magic little delta during each event) is to track
499	# more carefully the number of events (and cumulatively, the number
500	# of events during sub functions) that are seen. If this were
501	# done, then the arithmetic could be done after the fact (i.e., at
502	# display time). Currently, we track only call/return events.
503	# These values can be deduced by examining the callees and callers
504	# vectors for each functions. Hence we can almost correct the
505	# internal time figure at print time (note that we currently don't
506	# track exception event processing counts). Unfortunately, there
507	# is currently no similar information for cumulative sub-function
508	# time. It would not be hard to "get all this info" at profiler
509	# time. Specifically, we would have to extend the tuples to keep
510	# counts of this in each frame, and then extend the defs of timing
511	# tuples to include the significant two figures. I'm a bit fearful
512	# that this additional feature will slow the heavily optimized
513	# event/time ratio (i.e., the profiler would run slower, fur a very
514	# low "value added" feature.)
515	#**************************************************************
516
517	def calibrate(self, m, verbose=0):
518	if self.__class__ is not Profile:
519	raise TypeError("Subclasses must override .calibrate().")
520
521	saved_bias = self.bias
522	self.bias = 0
523	try:
524	return self._calibrate_inner(m, verbose)
525	finally:
526	self.bias = saved_bias
527
528	def _calibrate_inner(self, m, verbose):
529	get_time = self.get_time
530
531	# Set up a test case to be run with and without profiling. Include
532	# lots of calls, because we're trying to quantify stopwatch overhead.
533	# Do not raise any exceptions, though, because we want to know
534	# exactly how many profile events are generated (one call event, +
535	# one return event, per Python-level call).
536
537	def f1(n):
538	for i in range(n):
539	x = 1
540
541	def f(m, f1=f1):
542	for i in range(m):
543	f1(100)
544
545	f(m) # warm up the cache
546
547	# elapsed_noprofile <- time f(m) takes without profiling.
548	t0 = get_time()
549	f(m)
550	t1 = get_time()
551	elapsed_noprofile = t1 - t0
552	if verbose:
553	print "elapsed time without profiling =", elapsed_noprofile
554
555	# elapsed_profile <- time f(m) takes with profiling. The difference
556	# is profiling overhead, only some of which the profiler subtracts
557	# out on its own.
558	p = Profile()
559	t0 = get_time()
560	p.runctx('f(m)', globals(), locals())
561	t1 = get_time()
562	elapsed_profile = t1 - t0
563	if verbose:
564	print "elapsed time with profiling =", elapsed_profile
565
566	# reported_time <- "CPU seconds" the profiler charged to f and f1.
567	total_calls = 0.0
568	reported_time = 0.0
569	for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
570	p.timings.items():
571	if funcname in ("f", "f1"):
572	total_calls += cc
573	reported_time += tt
574
575	if verbose:
576	print "'CPU seconds' profiler reported =", reported_time
577	print "total # calls =", total_calls
578	if total_calls != m + 1:
579	raise ValueError("internal error: total calls = %d" % total_calls)
580
581	# reported_time - elapsed_noprofile = overhead the profiler wasn't
582	# able to measure. Divide by twice the number of calls (since there
583	# are two profiler events per call in this test) to get the hidden
584	# overhead per event.
585	mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
586	if verbose:
587	print "mean stopwatch overhead per profile event =", mean
588	return mean
589
590	#****************************************************************************
591	def Stats(*args):
592	print 'Report generating functions are in the "pstats" module\a'
593
594	def main():
595	usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
596	parser = OptionParser(usage=usage)
597	parser.allow_interspersed_args = False
598	parser.add_option('-o', '--outfile', dest="outfile",
599	help="Save stats to <outfile>", default=None)
600	parser.add_option('-s', '--sort', dest="sort",
601	help="Sort order when printing to stdout, based on pstats.Stats class", default=-1)
602
603	if not sys.argv[1:]:
604	parser.print_usage()
605	sys.exit(2)
606
607	(options, args) = parser.parse_args()
608
609	if (len(args) > 0):
610	sys.argv[:] = args
611	sys.path.insert(0, os.path.dirname(sys.argv[0]))
612	run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort)
613	else:
614	parser.print_usage()
615	return parser
616
617	# When invoked as main program, invoke the profiler on a script
618	if __name__ == '__main__':
619	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/profile.py@ 20

Download in other formats: