Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

profile.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 22.2 KB

Rev	Line
[2]	1	#! /usr/bin/env python
	2	#
	3	# Class for profiling python code. rev 1.0 6/2/94
	4	#
[391]	5	# Written by James Roskind
[2]	6	# Based on prior profile module by Sjoerd Mullender...
	7	# which was hacked somewhat by: Guido van Rossum
	8
	9	"""Class for profiling Python code."""
	10
[391]	11	# Copyright Disney Enterprises, Inc. All Rights Reserved.
	12	# Licensed to PSF under a Contributor Agreement
[2]	13	#
[391]	14	# Licensed under the Apache License, Version 2.0 (the "License");
	15	# you may not use this file except in compliance with the License.
	16	# You may obtain a copy of the License at
[2]	17	#
[391]	18	# http://www.apache.org/licenses/LICENSE-2.0
	19	#
	20	# Unless required by applicable law or agreed to in writing, software
	21	# distributed under the License is distributed on an "AS IS" BASIS,
	22	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
	23	# either express or implied. See the License for the specific language
	24	# governing permissions and limitations under the License.
[2]	25
	26
	27	import sys
	28	import os
	29	import time
	30	import marshal
	31	from optparse import OptionParser
	32
	33	__all__ = ["run", "runctx", "help", "Profile"]
	34
	35	# Sample timer for use with
	36	#i_count = 0
	37	#def integer_timer():
	38	# global i_count
	39	# i_count = i_count + 1
	40	# return i_count
	41	#itimes = integer_timer # replace with C coded timer returning integers
	42
	43	#**************************************************************************
	44	# The following are the static member functions for the profiler class
	45	# Note that an instance of Profile() is not needed to call them.
	46	#**************************************************************************
	47
	48	def run(statement, filename=None, sort=-1):
	49	"""Run statement under profiler optionally saving results in filename
	50
	51	This function takes a single argument that can be passed to the
	52	"exec" statement, and an optional file name. In all cases this
	53	routine attempts to "exec" its first argument and gather profiling
	54	statistics from the execution. If no file name is present, then this
	55	function automatically prints a simple profiling report, sorted by the
	56	standard name string (file/line/function-name) that is presented in
	57	each line.
	58	"""
	59	prof = Profile()
	60	try:
	61	prof = prof.run(statement)
	62	except SystemExit:
	63	pass
	64	if filename is not None:
	65	prof.dump_stats(filename)
	66	else:
	67	return prof.print_stats(sort)
	68
[391]	69	def runctx(statement, globals, locals, filename=None, sort=-1):
[2]	70	"""Run statement under profiler, supplying your own globals and locals,
	71	optionally saving results in filename.
	72
	73	statement and filename have the same semantics as profile.run
	74	"""
	75	prof = Profile()
	76	try:
	77	prof = prof.runctx(statement, globals, locals)
	78	except SystemExit:
	79	pass
	80
	81	if filename is not None:
	82	prof.dump_stats(filename)
	83	else:
[391]	84	return prof.print_stats(sort)
[2]	85
	86	# Backwards compatibility.
	87	def help():
	88	print "Documentation for the profile module can be found "
	89	print "in the Python Library Reference, section 'The Python Profiler'."
	90
	91	if hasattr(os, "times"):
	92	def _get_time_times(timer=os.times):
	93	t = timer()
	94	return t[0] + t[1]
	95
	96	# Using getrusage(3) is better than clock(3) if available:
	97	# on some systems (e.g. FreeBSD), getrusage has a higher resolution
	98	# Furthermore, on a POSIX system, returns microseconds, which
	99	# wrap around after 36min.
	100	_has_res = 0
	101	try:
	102	import resource
	103	resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
	104	def _get_time_resource(timer=resgetrusage):
	105	t = timer()
	106	return t[0] + t[1]
	107	_has_res = 1
	108	except ImportError:
	109	pass
	110
	111	class Profile:
	112	"""Profiler class.
	113
	114	self.cur is always a tuple. Each such tuple corresponds to a stack
	115	frame that is currently active (self.cur[-2]). The following are the
	116	definitions of its members. We use this external "parallel stack" to
	117	avoid contaminating the program that we are profiling. (old profiler
	118	used to write into the frames local dictionary!!) Derived classes
	119	can change the definition of some entries, as long as they leave
	120	[-2:] intact (frame and previous tuple). In case an internal error is
	121	detected, the -3 element is used as the function name.
	122
	123	[ 0] = Time that needs to be charged to the parent frame's function.
	124	It is used so that a function call will not have to access the
	125	timing data for the parent frame.
	126	[ 1] = Total time spent in this frame's function, excluding time in
	127	subfunctions (this latter is tallied in cur[2]).
	128	[ 2] = Total time spent in subfunctions, excluding time executing the
	129	frame's function (this latter is tallied in cur[1]).
	130	[-3] = Name of the function that corresponds to this frame.
	131	[-2] = Actual frame that we correspond to (used to sync exception handling).
	132	[-1] = Our parent 6-tuple (corresponds to frame.f_back).
	133
	134	Timing data for each function is stored as a 5-tuple in the dictionary
	135	self.timings[]. The index is always the name stored in self.cur[-3].
	136	The following are the definitions of the members:
	137
	138	[0] = The number of times this function was called, not counting direct
	139	or indirect recursion,
	140	[1] = Number of times this function appears on the stack, minus one
	141	[2] = Total time spent internal to this function
	142	[3] = Cumulative time that this function was present on the stack. In
	143	non-recursive functions, this is the total execution time from start
	144	to finish of each invocation of a function, including time spent in
	145	all subfunctions.
	146	[4] = A dictionary indicating for each function name, the number of times
	147	it was called by us.
	148	"""
	149
	150	bias = 0 # calibration constant
	151
	152	def __init__(self, timer=None, bias=None):
	153	self.timings = {}
	154	self.cur = None
	155	self.cmd = ""
	156	self.c_func_name = ""
	157
	158	if bias is None:
	159	bias = self.bias
	160	self.bias = bias # Materialize in local dict for lookup speed.
	161
	162	if not timer:
	163	if _has_res:
	164	self.timer = resgetrusage
	165	self.dispatcher = self.trace_dispatch
	166	self.get_time = _get_time_resource
	167	elif hasattr(time, 'clock'):
	168	self.timer = self.get_time = time.clock
	169	self.dispatcher = self.trace_dispatch_i
	170	elif hasattr(os, 'times'):
	171	self.timer = os.times
	172	self.dispatcher = self.trace_dispatch
	173	self.get_time = _get_time_times
	174	else:
	175	self.timer = self.get_time = time.time
	176	self.dispatcher = self.trace_dispatch_i
	177	else:
	178	self.timer = timer
	179	t = self.timer() # test out timer function
	180	try:
	181	length = len(t)
	182	except TypeError:
	183	self.get_time = timer
	184	self.dispatcher = self.trace_dispatch_i
	185	else:
	186	if length == 2:
	187	self.dispatcher = self.trace_dispatch
	188	else:
	189	self.dispatcher = self.trace_dispatch_l
	190	# This get_time() implementation needs to be defined
	191	# here to capture the passed-in timer in the parameter
	192	# list (for performance). Note that we can't assume
	193	# the timer() result contains two values in all
	194	# cases.
	195	def get_time_timer(timer=timer, sum=sum):
	196	return sum(timer())
	197	self.get_time = get_time_timer
	198	self.t = self.get_time()
	199	self.simulate_call('profiler')
	200
	201	# Heavily optimized dispatch routine for os.times() timer
	202
	203	def trace_dispatch(self, frame, event, arg):
	204	timer = self.timer
	205	t = timer()
	206	t = t[0] + t[1] - self.t - self.bias
	207
	208	if event == "c_call":
	209	self.c_func_name = arg.__name__
	210
	211	if self.dispatch[event](self, frame,t):
	212	t = timer()
	213	self.t = t[0] + t[1]
	214	else:
	215	r = timer()
	216	self.t = r[0] + r[1] - t # put back unrecorded delta
	217
	218	# Dispatch routine for best timer program (return = scalar, fastest if
	219	# an integer but float works too -- and time.clock() relies on that).
	220
	221	def trace_dispatch_i(self, frame, event, arg):
	222	timer = self.timer
	223	t = timer() - self.t - self.bias
	224
	225	if event == "c_call":
	226	self.c_func_name = arg.__name__
	227
	228	if self.dispatch[event](self, frame, t):
	229	self.t = timer()
	230	else:
	231	self.t = timer() - t # put back unrecorded delta
	232
	233	# Dispatch routine for macintosh (timer returns time in ticks of
	234	# 1/60th second)
	235
	236	def trace_dispatch_mac(self, frame, event, arg):
	237	timer = self.timer
	238	t = timer()/60.0 - self.t - self.bias
	239
	240	if event == "c_call":
	241	self.c_func_name = arg.__name__
	242
	243	if self.dispatch[event](self, frame, t):
	244	self.t = timer()/60.0
	245	else:
	246	self.t = timer()/60.0 - t # put back unrecorded delta
	247
	248	# SLOW generic dispatch routine for timer returning lists of numbers
	249
	250	def trace_dispatch_l(self, frame, event, arg):
	251	get_time = self.get_time
	252	t = get_time() - self.t - self.bias
	253
	254	if event == "c_call":
	255	self.c_func_name = arg.__name__
	256
	257	if self.dispatch[event](self, frame, t):
	258	self.t = get_time()
	259	else:
	260	self.t = get_time() - t # put back unrecorded delta
	261
	262	# In the event handlers, the first 3 elements of self.cur are unpacked
	263	# into vrbls w/ 3-letter names. The last two characters are meant to be
	264	# mnemonic:
	265	# _pt self.cur[0] "parent time" time to be charged to parent frame
	266	# _it self.cur[1] "internal time" time spent directly in the function
	267	# _et self.cur[2] "external time" time spent in subfunctions
	268
	269	def trace_dispatch_exception(self, frame, t):
	270	rpt, rit, ret, rfn, rframe, rcur = self.cur
	271	if (rframe is not frame) and rcur:
	272	return self.trace_dispatch_return(rframe, t)
	273	self.cur = rpt, rit+t, ret, rfn, rframe, rcur
	274	return 1
	275
	276
	277	def trace_dispatch_call(self, frame, t):
	278	if self.cur and frame.f_back is not self.cur[-2]:
	279	rpt, rit, ret, rfn, rframe, rcur = self.cur
	280	if not isinstance(rframe, Profile.fake_frame):
	281	assert rframe.f_back is frame.f_back, ("Bad call", rfn,
	282	rframe, rframe.f_back,
	283	frame, frame.f_back)
	284	self.trace_dispatch_return(rframe, 0)
	285	assert (self.cur is None or \
	286	frame.f_back is self.cur[-2]), ("Bad call",
	287	self.cur[-3])
	288	fcode = frame.f_code
	289	fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
	290	self.cur = (t, 0, 0, fn, frame, self.cur)
	291	timings = self.timings
	292	if fn in timings:
	293	cc, ns, tt, ct, callers = timings[fn]
	294	timings[fn] = cc, ns + 1, tt, ct, callers
	295	else:
	296	timings[fn] = 0, 0, 0, 0, {}
	297	return 1
	298
	299	def trace_dispatch_c_call (self, frame, t):
	300	fn = ("", 0, self.c_func_name)
	301	self.cur = (t, 0, 0, fn, frame, self.cur)
	302	timings = self.timings
	303	if fn in timings:
	304	cc, ns, tt, ct, callers = timings[fn]
	305	timings[fn] = cc, ns+1, tt, ct, callers
	306	else:
	307	timings[fn] = 0, 0, 0, 0, {}
	308	return 1
	309
	310	def trace_dispatch_return(self, frame, t):
	311	if frame is not self.cur[-2]:
	312	assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
	313	self.trace_dispatch_return(self.cur[-2], 0)
	314
	315	# Prefix "r" means part of the Returning or exiting frame.
	316	# Prefix "p" means part of the Previous or Parent or older frame.
	317
	318	rpt, rit, ret, rfn, frame, rcur = self.cur
	319	rit = rit + t
	320	frame_total = rit + ret
	321
	322	ppt, pit, pet, pfn, pframe, pcur = rcur
	323	self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
	324
	325	timings = self.timings
	326	cc, ns, tt, ct, callers = timings[rfn]
	327	if not ns:
	328	# This is the only occurrence of the function on the stack.
	329	# Else this is a (directly or indirectly) recursive call, and
	330	# its cumulative time will get updated when the topmost call to
	331	# it returns.
	332	ct = ct + frame_total
	333	cc = cc + 1
	334
	335	if pfn in callers:
	336	callers[pfn] = callers[pfn] + 1 # hack: gather more
	337	# stats such as the amount of time added to ct courtesy
	338	# of this specific call, and the contribution to cc
	339	# courtesy of this call.
	340	else:
	341	callers[pfn] = 1
	342
	343	timings[rfn] = cc, ns - 1, tt + rit, ct, callers
	344
	345	return 1
	346
	347
	348	dispatch = {
	349	"call": trace_dispatch_call,
	350	"exception": trace_dispatch_exception,
	351	"return": trace_dispatch_return,
	352	"c_call": trace_dispatch_c_call,
	353	"c_exception": trace_dispatch_return, # the C function returned
	354	"c_return": trace_dispatch_return,
	355	}
	356
	357
	358	# The next few functions play with self.cmd. By carefully preloading
	359	# our parallel stack, we can force the profiled result to include
	360	# an arbitrary string as the name of the calling function.
	361	# We use self.cmd as that string, and the resulting stats look
	362	# very nice :-).
	363
	364	def set_cmd(self, cmd):
	365	if self.cur[-1]: return # already set
	366	self.cmd = cmd
	367	self.simulate_call(cmd)
	368
	369	class fake_code:
	370	def __init__(self, filename, line, name):
	371	self.co_filename = filename
	372	self.co_line = line
	373	self.co_name = name
	374	self.co_firstlineno = 0
	375
	376	def __repr__(self):
	377	return repr((self.co_filename, self.co_line, self.co_name))
	378
	379	class fake_frame:
	380	def __init__(self, code, prior):
	381	self.f_code = code
	382	self.f_back = prior
	383
	384	def simulate_call(self, name):
	385	code = self.fake_code('profile', 0, name)
	386	if self.cur:
	387	pframe = self.cur[-2]
	388	else:
	389	pframe = None
	390	frame = self.fake_frame(code, pframe)
	391	self.dispatch['call'](self, frame, 0)
	392
	393	# collect stats from pending stack, including getting final
	394	# timings for self.cmd frame.
	395
	396	def simulate_cmd_complete(self):
	397	get_time = self.get_time
	398	t = get_time() - self.t
	399	while self.cur[-1]:
	400	# We can cause assertion errors here if
	401	# dispatch_trace_return checks for a frame match!
	402	self.dispatch['return'](self, self.cur[-2], t)
	403	t = 0
	404	self.t = get_time() - t
	405
	406
	407	def print_stats(self, sort=-1):
	408	import pstats
	409	pstats.Stats(self).strip_dirs().sort_stats(sort). \
	410	print_stats()
	411
	412	def dump_stats(self, file):
	413	f = open(file, 'wb')
	414	self.create_stats()
	415	marshal.dump(self.stats, f)
	416	f.close()
	417
	418	def create_stats(self):
	419	self.simulate_cmd_complete()
	420	self.snapshot_stats()
	421
	422	def snapshot_stats(self):
	423	self.stats = {}
	424	for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
	425	callers = callers.copy()
	426	nc = 0
	427	for callcnt in callers.itervalues():
	428	nc += callcnt
	429	self.stats[func] = cc, nc, tt, ct, callers
	430
	431
	432	# The following two methods can be called by clients to use
	433	# a profiler to profile a statement, given as a string.
	434
	435	def run(self, cmd):
	436	import __main__
	437	dict = __main__.__dict__
	438	return self.runctx(cmd, dict, dict)
	439
	440	def runctx(self, cmd, globals, locals):
	441	self.set_cmd(cmd)
	442	sys.setprofile(self.dispatcher)
	443	try:
	444	exec cmd in globals, locals
	445	finally:
	446	sys.setprofile(None)
	447	return self
	448
	449	# This method is more useful to profile a single function call.
	450	def runcall(self, func, args, *kw):
	451	self.set_cmd(repr(func))
	452	sys.setprofile(self.dispatcher)
	453	try:
	454	return func(args, *kw)
	455	finally:
	456	sys.setprofile(None)
	457
	458
	459	#******************************************************************
	460	# The following calculates the overhead for using a profiler. The
	461	# problem is that it takes a fair amount of time for the profiler
	462	# to stop the stopwatch (from the time it receives an event).
	463	# Similarly, there is a delay from the time that the profiler
	464	# re-starts the stopwatch before the user's code really gets to
	465	# continue. The following code tries to measure the difference on
	466	# a per-event basis.
	467	#
	468	# Note that this difference is only significant if there are a lot of
	469	# events, and relatively little user code per event. For example,
	470	# code with small functions will typically benefit from having the
	471	# profiler calibrated for the current platform. This could be
	472	# done on the fly during init() time, but it is not worth the
	473	# effort. Also note that if too large a value specified, then
	474	# execution time on some functions will actually appear as a
	475	# negative number. It is normal for some functions (with very
	476	# low call counts) to have such negative stats, even if the
	477	# calibration figure is "correct."
	478	#
	479	# One alternative to profile-time calibration adjustments (i.e.,
	480	# adding in the magic little delta during each event) is to track
	481	# more carefully the number of events (and cumulatively, the number
	482	# of events during sub functions) that are seen. If this were
	483	# done, then the arithmetic could be done after the fact (i.e., at
	484	# display time). Currently, we track only call/return events.
	485	# These values can be deduced by examining the callees and callers
	486	# vectors for each functions. Hence we can almost correct the
	487	# internal time figure at print time (note that we currently don't
	488	# track exception event processing counts). Unfortunately, there
	489	# is currently no similar information for cumulative sub-function
	490	# time. It would not be hard to "get all this info" at profiler
	491	# time. Specifically, we would have to extend the tuples to keep
	492	# counts of this in each frame, and then extend the defs of timing
	493	# tuples to include the significant two figures. I'm a bit fearful
	494	# that this additional feature will slow the heavily optimized
	495	# event/time ratio (i.e., the profiler would run slower, fur a very
	496	# low "value added" feature.)
	497	#**************************************************************
	498
	499	def calibrate(self, m, verbose=0):
	500	if self.__class__ is not Profile:
	501	raise TypeError("Subclasses must override .calibrate().")
	502
	503	saved_bias = self.bias
	504	self.bias = 0
	505	try:
	506	return self._calibrate_inner(m, verbose)
	507	finally:
	508	self.bias = saved_bias
	509
	510	def _calibrate_inner(self, m, verbose):
	511	get_time = self.get_time
	512
	513	# Set up a test case to be run with and without profiling. Include
	514	# lots of calls, because we're trying to quantify stopwatch overhead.
	515	# Do not raise any exceptions, though, because we want to know
	516	# exactly how many profile events are generated (one call event, +
	517	# one return event, per Python-level call).
	518
	519	def f1(n):
	520	for i in range(n):
	521	x = 1
	522
	523	def f(m, f1=f1):
	524	for i in range(m):
	525	f1(100)
	526
	527	f(m) # warm up the cache
	528
	529	# elapsed_noprofile <- time f(m) takes without profiling.
	530	t0 = get_time()
	531	f(m)
	532	t1 = get_time()
	533	elapsed_noprofile = t1 - t0
	534	if verbose:
	535	print "elapsed time without profiling =", elapsed_noprofile
	536
	537	# elapsed_profile <- time f(m) takes with profiling. The difference
	538	# is profiling overhead, only some of which the profiler subtracts
	539	# out on its own.
	540	p = Profile()
	541	t0 = get_time()
	542	p.runctx('f(m)', globals(), locals())
	543	t1 = get_time()
	544	elapsed_profile = t1 - t0
	545	if verbose:
	546	print "elapsed time with profiling =", elapsed_profile
	547
	548	# reported_time <- "CPU seconds" the profiler charged to f and f1.
	549	total_calls = 0.0
	550	reported_time = 0.0
	551	for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
	552	p.timings.items():
	553	if funcname in ("f", "f1"):
	554	total_calls += cc
	555	reported_time += tt
	556
	557	if verbose:
	558	print "'CPU seconds' profiler reported =", reported_time
	559	print "total # calls =", total_calls
	560	if total_calls != m + 1:
	561	raise ValueError("internal error: total calls = %d" % total_calls)
	562
	563	# reported_time - elapsed_noprofile = overhead the profiler wasn't
	564	# able to measure. Divide by twice the number of calls (since there
	565	# are two profiler events per call in this test) to get the hidden
	566	# overhead per event.
	567	mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
	568	if verbose:
	569	print "mean stopwatch overhead per profile event =", mean
	570	return mean
	571
	572	#****************************************************************************
	573	def Stats(*args):
	574	print 'Report generating functions are in the "pstats" module\a'
	575
	576	def main():
	577	usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
	578	parser = OptionParser(usage=usage)
	579	parser.allow_interspersed_args = False
	580	parser.add_option('-o', '--outfile', dest="outfile",
	581	help="Save stats to <outfile>", default=None)
	582	parser.add_option('-s', '--sort', dest="sort",
[391]	583	help="Sort order when printing to stdout, based on pstats.Stats class",
	584	default=-1)
[2]	585
	586	if not sys.argv[1:]:
	587	parser.print_usage()
	588	sys.exit(2)
	589
	590	(options, args) = parser.parse_args()
[391]	591	sys.argv[:] = args
[2]	592
[391]	593	if len(args) > 0:
	594	progname = args[0]
	595	sys.path.insert(0, os.path.dirname(progname))
	596	with open(progname, 'rb') as fp:
	597	code = compile(fp.read(), progname, 'exec')
	598	globs = {
	599	'__file__': progname,
	600	'__name__': '__main__',
	601	'__package__': None,
	602	}
	603	runctx(code, globs, None, options.outfile, options.sort)
[2]	604	else:
	605	parser.print_usage()
	606	return parser
	607
	608	# When invoked as main program, invoke the profiler on a script
	609	if __name__ == '__main__':
	610	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/profile.py

Download in other formats: