Context Navigation

profile.py@ 608

Last change on this file since 608 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 22.2 KB

Line
1	#! /usr/bin/env python
2	#
3	# Class for profiling python code. rev 1.0 6/2/94
4	#
5	# Written by James Roskind
6	# Based on prior profile module by Sjoerd Mullender...
7	# which was hacked somewhat by: Guido van Rossum
8
9	"""Class for profiling Python code."""
10
11	# Copyright Disney Enterprises, Inc. All Rights Reserved.
12	# Licensed to PSF under a Contributor Agreement
13	#
14	# Licensed under the Apache License, Version 2.0 (the "License");
15	# you may not use this file except in compliance with the License.
16	# You may obtain a copy of the License at
17	#
18	# http://www.apache.org/licenses/LICENSE-2.0
19	#
20	# Unless required by applicable law or agreed to in writing, software
21	# distributed under the License is distributed on an "AS IS" BASIS,
22	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23	# either express or implied. See the License for the specific language
24	# governing permissions and limitations under the License.
25
26
27	import sys
28	import os
29	import time
30	import marshal
31	from optparse import OptionParser
32
33	__all__ = ["run", "runctx", "help", "Profile"]
34
35	# Sample timer for use with
36	#i_count = 0
37	#def integer_timer():
38	# global i_count
39	# i_count = i_count + 1
40	# return i_count
41	#itimes = integer_timer # replace with C coded timer returning integers
42
43	#**************************************************************************
44	# The following are the static member functions for the profiler class
45	# Note that an instance of Profile() is not needed to call them.
46	#**************************************************************************
47
48	def run(statement, filename=None, sort=-1):
49	"""Run statement under profiler optionally saving results in filename
50
51	This function takes a single argument that can be passed to the
52	"exec" statement, and an optional file name. In all cases this
53	routine attempts to "exec" its first argument and gather profiling
54	statistics from the execution. If no file name is present, then this
55	function automatically prints a simple profiling report, sorted by the
56	standard name string (file/line/function-name) that is presented in
57	each line.
58	"""
59	prof = Profile()
60	try:
61	prof = prof.run(statement)
62	except SystemExit:
63	pass
64	if filename is not None:
65	prof.dump_stats(filename)
66	else:
67	return prof.print_stats(sort)
68
69	def runctx(statement, globals, locals, filename=None, sort=-1):
70	"""Run statement under profiler, supplying your own globals and locals,
71	optionally saving results in filename.
72
73	statement and filename have the same semantics as profile.run
74	"""
75	prof = Profile()
76	try:
77	prof = prof.runctx(statement, globals, locals)
78	except SystemExit:
79	pass
80
81	if filename is not None:
82	prof.dump_stats(filename)
83	else:
84	return prof.print_stats(sort)
85
86	# Backwards compatibility.
87	def help():
88	print "Documentation for the profile module can be found "
89	print "in the Python Library Reference, section 'The Python Profiler'."
90
91	if hasattr(os, "times"):
92	def _get_time_times(timer=os.times):
93	t = timer()
94	return t[0] + t[1]
95
96	# Using getrusage(3) is better than clock(3) if available:
97	# on some systems (e.g. FreeBSD), getrusage has a higher resolution
98	# Furthermore, on a POSIX system, returns microseconds, which
99	# wrap around after 36min.
100	_has_res = 0
101	try:
102	import resource
103	resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
104	def _get_time_resource(timer=resgetrusage):
105	t = timer()
106	return t[0] + t[1]
107	_has_res = 1
108	except ImportError:
109	pass
110
111	class Profile:
112	"""Profiler class.
113
114	self.cur is always a tuple. Each such tuple corresponds to a stack
115	frame that is currently active (self.cur[-2]). The following are the
116	definitions of its members. We use this external "parallel stack" to
117	avoid contaminating the program that we are profiling. (old profiler
118	used to write into the frames local dictionary!!) Derived classes
119	can change the definition of some entries, as long as they leave
120	[-2:] intact (frame and previous tuple). In case an internal error is
121	detected, the -3 element is used as the function name.
122
123	[ 0] = Time that needs to be charged to the parent frame's function.
124	It is used so that a function call will not have to access the
125	timing data for the parent frame.
126	[ 1] = Total time spent in this frame's function, excluding time in
127	subfunctions (this latter is tallied in cur[2]).
128	[ 2] = Total time spent in subfunctions, excluding time executing the
129	frame's function (this latter is tallied in cur[1]).
130	[-3] = Name of the function that corresponds to this frame.
131	[-2] = Actual frame that we correspond to (used to sync exception handling).
132	[-1] = Our parent 6-tuple (corresponds to frame.f_back).
133
134	Timing data for each function is stored as a 5-tuple in the dictionary
135	self.timings[]. The index is always the name stored in self.cur[-3].
136	The following are the definitions of the members:
137
138	[0] = The number of times this function was called, not counting direct
139	or indirect recursion,
140	[1] = Number of times this function appears on the stack, minus one
141	[2] = Total time spent internal to this function
142	[3] = Cumulative time that this function was present on the stack. In
143	non-recursive functions, this is the total execution time from start
144	to finish of each invocation of a function, including time spent in
145	all subfunctions.
146	[4] = A dictionary indicating for each function name, the number of times
147	it was called by us.
148	"""
149
150	bias = 0 # calibration constant
151
152	def __init__(self, timer=None, bias=None):
153	self.timings = {}
154	self.cur = None
155	self.cmd = ""
156	self.c_func_name = ""
157
158	if bias is None:
159	bias = self.bias
160	self.bias = bias # Materialize in local dict for lookup speed.
161
162	if not timer:
163	if _has_res:
164	self.timer = resgetrusage
165	self.dispatcher = self.trace_dispatch
166	self.get_time = _get_time_resource
167	elif hasattr(time, 'clock'):
168	self.timer = self.get_time = time.clock
169	self.dispatcher = self.trace_dispatch_i
170	elif hasattr(os, 'times'):
171	self.timer = os.times
172	self.dispatcher = self.trace_dispatch
173	self.get_time = _get_time_times
174	else:
175	self.timer = self.get_time = time.time
176	self.dispatcher = self.trace_dispatch_i
177	else:
178	self.timer = timer
179	t = self.timer() # test out timer function
180	try:
181	length = len(t)
182	except TypeError:
183	self.get_time = timer
184	self.dispatcher = self.trace_dispatch_i
185	else:
186	if length == 2:
187	self.dispatcher = self.trace_dispatch
188	else:
189	self.dispatcher = self.trace_dispatch_l
190	# This get_time() implementation needs to be defined
191	# here to capture the passed-in timer in the parameter
192	# list (for performance). Note that we can't assume
193	# the timer() result contains two values in all
194	# cases.
195	def get_time_timer(timer=timer, sum=sum):
196	return sum(timer())
197	self.get_time = get_time_timer
198	self.t = self.get_time()
199	self.simulate_call('profiler')
200
201	# Heavily optimized dispatch routine for os.times() timer
202
203	def trace_dispatch(self, frame, event, arg):
204	timer = self.timer
205	t = timer()
206	t = t[0] + t[1] - self.t - self.bias
207
208	if event == "c_call":
209	self.c_func_name = arg.__name__
210
211	if self.dispatch[event](self, frame,t):
212	t = timer()
213	self.t = t[0] + t[1]
214	else:
215	r = timer()
216	self.t = r[0] + r[1] - t # put back unrecorded delta
217
218	# Dispatch routine for best timer program (return = scalar, fastest if
219	# an integer but float works too -- and time.clock() relies on that).
220
221	def trace_dispatch_i(self, frame, event, arg):
222	timer = self.timer
223	t = timer() - self.t - self.bias
224
225	if event == "c_call":
226	self.c_func_name = arg.__name__
227
228	if self.dispatch[event](self, frame, t):
229	self.t = timer()
230	else:
231	self.t = timer() - t # put back unrecorded delta
232
233	# Dispatch routine for macintosh (timer returns time in ticks of
234	# 1/60th second)
235
236	def trace_dispatch_mac(self, frame, event, arg):
237	timer = self.timer
238	t = timer()/60.0 - self.t - self.bias
239
240	if event == "c_call":
241	self.c_func_name = arg.__name__
242
243	if self.dispatch[event](self, frame, t):
244	self.t = timer()/60.0
245	else:
246	self.t = timer()/60.0 - t # put back unrecorded delta
247
248	# SLOW generic dispatch routine for timer returning lists of numbers
249
250	def trace_dispatch_l(self, frame, event, arg):
251	get_time = self.get_time
252	t = get_time() - self.t - self.bias
253
254	if event == "c_call":
255	self.c_func_name = arg.__name__
256
257	if self.dispatch[event](self, frame, t):
258	self.t = get_time()
259	else:
260	self.t = get_time() - t # put back unrecorded delta
261
262	# In the event handlers, the first 3 elements of self.cur are unpacked
263	# into vrbls w/ 3-letter names. The last two characters are meant to be
264	# mnemonic:
265	# _pt self.cur[0] "parent time" time to be charged to parent frame
266	# _it self.cur[1] "internal time" time spent directly in the function
267	# _et self.cur[2] "external time" time spent in subfunctions
268
269	def trace_dispatch_exception(self, frame, t):
270	rpt, rit, ret, rfn, rframe, rcur = self.cur
271	if (rframe is not frame) and rcur:
272	return self.trace_dispatch_return(rframe, t)
273	self.cur = rpt, rit+t, ret, rfn, rframe, rcur
274	return 1
275
276
277	def trace_dispatch_call(self, frame, t):
278	if self.cur and frame.f_back is not self.cur[-2]:
279	rpt, rit, ret, rfn, rframe, rcur = self.cur
280	if not isinstance(rframe, Profile.fake_frame):
281	assert rframe.f_back is frame.f_back, ("Bad call", rfn,
282	rframe, rframe.f_back,
283	frame, frame.f_back)
284	self.trace_dispatch_return(rframe, 0)
285	assert (self.cur is None or \
286	frame.f_back is self.cur[-2]), ("Bad call",
287	self.cur[-3])
288	fcode = frame.f_code
289	fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
290	self.cur = (t, 0, 0, fn, frame, self.cur)
291	timings = self.timings
292	if fn in timings:
293	cc, ns, tt, ct, callers = timings[fn]
294	timings[fn] = cc, ns + 1, tt, ct, callers
295	else:
296	timings[fn] = 0, 0, 0, 0, {}
297	return 1
298
299	def trace_dispatch_c_call (self, frame, t):
300	fn = ("", 0, self.c_func_name)
301	self.cur = (t, 0, 0, fn, frame, self.cur)
302	timings = self.timings
303	if fn in timings:
304	cc, ns, tt, ct, callers = timings[fn]
305	timings[fn] = cc, ns+1, tt, ct, callers
306	else:
307	timings[fn] = 0, 0, 0, 0, {}
308	return 1
309
310	def trace_dispatch_return(self, frame, t):
311	if frame is not self.cur[-2]:
312	assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
313	self.trace_dispatch_return(self.cur[-2], 0)
314
315	# Prefix "r" means part of the Returning or exiting frame.
316	# Prefix "p" means part of the Previous or Parent or older frame.
317
318	rpt, rit, ret, rfn, frame, rcur = self.cur
319	rit = rit + t
320	frame_total = rit + ret
321
322	ppt, pit, pet, pfn, pframe, pcur = rcur
323	self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
324
325	timings = self.timings
326	cc, ns, tt, ct, callers = timings[rfn]
327	if not ns:
328	# This is the only occurrence of the function on the stack.
329	# Else this is a (directly or indirectly) recursive call, and
330	# its cumulative time will get updated when the topmost call to
331	# it returns.
332	ct = ct + frame_total
333	cc = cc + 1
334
335	if pfn in callers:
336	callers[pfn] = callers[pfn] + 1 # hack: gather more
337	# stats such as the amount of time added to ct courtesy
338	# of this specific call, and the contribution to cc
339	# courtesy of this call.
340	else:
341	callers[pfn] = 1
342
343	timings[rfn] = cc, ns - 1, tt + rit, ct, callers
344
345	return 1
346
347
348	dispatch = {
349	"call": trace_dispatch_call,
350	"exception": trace_dispatch_exception,
351	"return": trace_dispatch_return,
352	"c_call": trace_dispatch_c_call,
353	"c_exception": trace_dispatch_return, # the C function returned
354	"c_return": trace_dispatch_return,
355	}
356
357
358	# The next few functions play with self.cmd. By carefully preloading
359	# our parallel stack, we can force the profiled result to include
360	# an arbitrary string as the name of the calling function.
361	# We use self.cmd as that string, and the resulting stats look
362	# very nice :-).
363
364	def set_cmd(self, cmd):
365	if self.cur[-1]: return # already set
366	self.cmd = cmd
367	self.simulate_call(cmd)
368
369	class fake_code:
370	def __init__(self, filename, line, name):
371	self.co_filename = filename
372	self.co_line = line
373	self.co_name = name
374	self.co_firstlineno = 0
375
376	def __repr__(self):
377	return repr((self.co_filename, self.co_line, self.co_name))
378
379	class fake_frame:
380	def __init__(self, code, prior):
381	self.f_code = code
382	self.f_back = prior
383
384	def simulate_call(self, name):
385	code = self.fake_code('profile', 0, name)
386	if self.cur:
387	pframe = self.cur[-2]
388	else:
389	pframe = None
390	frame = self.fake_frame(code, pframe)
391	self.dispatch['call'](self, frame, 0)
392
393	# collect stats from pending stack, including getting final
394	# timings for self.cmd frame.
395
396	def simulate_cmd_complete(self):
397	get_time = self.get_time
398	t = get_time() - self.t
399	while self.cur[-1]:
400	# We can cause assertion errors here if
401	# dispatch_trace_return checks for a frame match!
402	self.dispatch['return'](self, self.cur[-2], t)
403	t = 0
404	self.t = get_time() - t
405
406
407	def print_stats(self, sort=-1):
408	import pstats
409	pstats.Stats(self).strip_dirs().sort_stats(sort). \
410	print_stats()
411
412	def dump_stats(self, file):
413	f = open(file, 'wb')
414	self.create_stats()
415	marshal.dump(self.stats, f)
416	f.close()
417
418	def create_stats(self):
419	self.simulate_cmd_complete()
420	self.snapshot_stats()
421
422	def snapshot_stats(self):
423	self.stats = {}
424	for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
425	callers = callers.copy()
426	nc = 0
427	for callcnt in callers.itervalues():
428	nc += callcnt
429	self.stats[func] = cc, nc, tt, ct, callers
430
431
432	# The following two methods can be called by clients to use
433	# a profiler to profile a statement, given as a string.
434
435	def run(self, cmd):
436	import __main__
437	dict = __main__.__dict__
438	return self.runctx(cmd, dict, dict)
439
440	def runctx(self, cmd, globals, locals):
441	self.set_cmd(cmd)
442	sys.setprofile(self.dispatcher)
443	try:
444	exec cmd in globals, locals
445	finally:
446	sys.setprofile(None)
447	return self
448
449	# This method is more useful to profile a single function call.
450	def runcall(self, func, args, *kw):
451	self.set_cmd(repr(func))
452	sys.setprofile(self.dispatcher)
453	try:
454	return func(args, *kw)
455	finally:
456	sys.setprofile(None)
457
458
459	#******************************************************************
460	# The following calculates the overhead for using a profiler. The
461	# problem is that it takes a fair amount of time for the profiler
462	# to stop the stopwatch (from the time it receives an event).
463	# Similarly, there is a delay from the time that the profiler
464	# re-starts the stopwatch before the user's code really gets to
465	# continue. The following code tries to measure the difference on
466	# a per-event basis.
467	#
468	# Note that this difference is only significant if there are a lot of
469	# events, and relatively little user code per event. For example,
470	# code with small functions will typically benefit from having the
471	# profiler calibrated for the current platform. This could be
472	# done on the fly during init() time, but it is not worth the
473	# effort. Also note that if too large a value specified, then
474	# execution time on some functions will actually appear as a
475	# negative number. It is normal for some functions (with very
476	# low call counts) to have such negative stats, even if the
477	# calibration figure is "correct."
478	#
479	# One alternative to profile-time calibration adjustments (i.e.,
480	# adding in the magic little delta during each event) is to track
481	# more carefully the number of events (and cumulatively, the number
482	# of events during sub functions) that are seen. If this were
483	# done, then the arithmetic could be done after the fact (i.e., at
484	# display time). Currently, we track only call/return events.
485	# These values can be deduced by examining the callees and callers
486	# vectors for each functions. Hence we can almost correct the
487	# internal time figure at print time (note that we currently don't
488	# track exception event processing counts). Unfortunately, there
489	# is currently no similar information for cumulative sub-function
490	# time. It would not be hard to "get all this info" at profiler
491	# time. Specifically, we would have to extend the tuples to keep
492	# counts of this in each frame, and then extend the defs of timing
493	# tuples to include the significant two figures. I'm a bit fearful
494	# that this additional feature will slow the heavily optimized
495	# event/time ratio (i.e., the profiler would run slower, fur a very
496	# low "value added" feature.)
497	#**************************************************************
498
499	def calibrate(self, m, verbose=0):
500	if self.__class__ is not Profile:
501	raise TypeError("Subclasses must override .calibrate().")
502
503	saved_bias = self.bias
504	self.bias = 0
505	try:
506	return self._calibrate_inner(m, verbose)
507	finally:
508	self.bias = saved_bias
509
510	def _calibrate_inner(self, m, verbose):
511	get_time = self.get_time
512
513	# Set up a test case to be run with and without profiling. Include
514	# lots of calls, because we're trying to quantify stopwatch overhead.
515	# Do not raise any exceptions, though, because we want to know
516	# exactly how many profile events are generated (one call event, +
517	# one return event, per Python-level call).
518
519	def f1(n):
520	for i in range(n):
521	x = 1
522
523	def f(m, f1=f1):
524	for i in range(m):
525	f1(100)
526
527	f(m) # warm up the cache
528
529	# elapsed_noprofile <- time f(m) takes without profiling.
530	t0 = get_time()
531	f(m)
532	t1 = get_time()
533	elapsed_noprofile = t1 - t0
534	if verbose:
535	print "elapsed time without profiling =", elapsed_noprofile
536
537	# elapsed_profile <- time f(m) takes with profiling. The difference
538	# is profiling overhead, only some of which the profiler subtracts
539	# out on its own.
540	p = Profile()
541	t0 = get_time()
542	p.runctx('f(m)', globals(), locals())
543	t1 = get_time()
544	elapsed_profile = t1 - t0
545	if verbose:
546	print "elapsed time with profiling =", elapsed_profile
547
548	# reported_time <- "CPU seconds" the profiler charged to f and f1.
549	total_calls = 0.0
550	reported_time = 0.0
551	for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
552	p.timings.items():
553	if funcname in ("f", "f1"):
554	total_calls += cc
555	reported_time += tt
556
557	if verbose:
558	print "'CPU seconds' profiler reported =", reported_time
559	print "total # calls =", total_calls
560	if total_calls != m + 1:
561	raise ValueError("internal error: total calls = %d" % total_calls)
562
563	# reported_time - elapsed_noprofile = overhead the profiler wasn't
564	# able to measure. Divide by twice the number of calls (since there
565	# are two profiler events per call in this test) to get the hidden
566	# overhead per event.
567	mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
568	if verbose:
569	print "mean stopwatch overhead per profile event =", mean
570	return mean
571
572	#****************************************************************************
573	def Stats(*args):
574	print 'Report generating functions are in the "pstats" module\a'
575
576	def main():
577	usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
578	parser = OptionParser(usage=usage)
579	parser.allow_interspersed_args = False
580	parser.add_option('-o', '--outfile', dest="outfile",
581	help="Save stats to <outfile>", default=None)
582	parser.add_option('-s', '--sort', dest="sort",
583	help="Sort order when printing to stdout, based on pstats.Stats class",
584	default=-1)
585
586	if not sys.argv[1:]:
587	parser.print_usage()
588	sys.exit(2)
589
590	(options, args) = parser.parse_args()
591	sys.argv[:] = args
592
593	if len(args) > 0:
594	progname = args[0]
595	sys.path.insert(0, os.path.dirname(progname))
596	with open(progname, 'rb') as fp:
597	code = compile(fp.read(), progname, 'exec')
598	globs = {
599	'__file__': progname,
600	'__name__': '__main__',
601	'__package__': None,
602	}
603	runctx(code, globs, None, options.outfile, options.sort)
604	else:
605	parser.print_usage()
606	return parser
607
608	# When invoked as main program, invoke the profiler on a script
609	if __name__ == '__main__':
610	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/profile.py@ 608

Download in other formats: