source: trunk/binutils/gprof/hist.c@ 3830

Last change on this file since 3830 was 610, checked in by bird, 22 years ago

This commit was generated by cvs2svn to compensate for changes in r609,
which included commits to RCS files with non-trunk default branches.

  • Property cvs2svn:cvs-rev set to 1.1.1.2
  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 15.4 KB
Line 
1/* hist.c - Histogram related operations.
2
3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22
23#include "libiberty.h"
24#include "gprof.h"
25#include "search_list.h"
26#include "source.h"
27#include "symtab.h"
28#include "corefile.h"
29#include "gmon_io.h"
30#include "gmon_out.h"
31#include "hist.h"
32#include "sym_ids.h"
33#include "utils.h"
34
35#define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
36
37static void scale_and_align_entries PARAMS ((void));
38static void print_header PARAMS ((int));
39static void print_line PARAMS ((Sym *, double));
40static int cmp_time PARAMS ((const PTR, const PTR));
41
42/* Declarations of automatically generated functions to output blurbs. */
43extern void flat_blurb PARAMS ((FILE * fp));
44
45bfd_vma s_lowpc; /* Lowest address in .text. */
46bfd_vma s_highpc = 0; /* Highest address in .text. */
47bfd_vma lowpc, highpc; /* Same, but expressed in UNITs. */
48int hist_num_bins = 0; /* Number of histogram samples. */
49int *hist_sample = 0; /* Histogram samples (shorts in the file!). */
50double hist_scale;
51char hist_dimension[16] = "seconds";
52char hist_dimension_abbrev = 's';
53
54static double accum_time; /* Accumulated time so far for print_line(). */
55static double total_time; /* Total time for all routines. */
56
57/* Table of SI prefixes for powers of 10 (used to automatically
58 scale some of the values in the flat profile). */
59const struct
60 {
61 char prefix;
62 double scale;
63 }
64SItab[] =
65{
66 { 'T', 1e-12 }, /* tera */
67 { 'G', 1e-09 }, /* giga */
68 { 'M', 1e-06 }, /* mega */
69 { 'K', 1e-03 }, /* kilo */
70 { ' ', 1e-00 },
71 { 'm', 1e+03 }, /* milli */
72 { 'u', 1e+06 }, /* micro */
73 { 'n', 1e+09 }, /* nano */
74 { 'p', 1e+12 }, /* pico */
75 { 'f', 1e+15 }, /* femto */
76 { 'a', 1e+18 } /* ato */
77};
78
79
80/* Read the histogram from file IFP. FILENAME is the name of IFP and
81 is provided for formatting error messages only. */
82
83void
84hist_read_rec (ifp, filename)
85 FILE * ifp;
86 const char *filename;
87{
88 bfd_vma n_lowpc, n_highpc;
89 int i, ncnt, profrate;
90 UNIT count;
91
92 if (gmon_io_read_vma (ifp, &n_lowpc)
93 || gmon_io_read_vma (ifp, &n_highpc)
94 || gmon_io_read_32 (ifp, &ncnt)
95 || gmon_io_read_32 (ifp, &profrate)
96 || gmon_io_read (ifp, hist_dimension, 15)
97 || gmon_io_read (ifp, &hist_dimension_abbrev, 1))
98 {
99 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
100 whoami, filename);
101
102 done (1);
103 }
104
105 if (!s_highpc)
106 {
107 /* This is the first histogram record. */
108 s_lowpc = n_lowpc;
109 s_highpc = n_highpc;
110 lowpc = (bfd_vma) n_lowpc / sizeof (UNIT);
111 highpc = (bfd_vma) n_highpc / sizeof (UNIT);
112 hist_num_bins = ncnt;
113 hz = profrate;
114 }
115
116 DBG (SAMPLEDEBUG,
117 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %d\n",
118 (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt);
119 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %d\n",
120 (unsigned long) s_lowpc, (unsigned long) s_highpc,
121 hist_num_bins);
122 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n",
123 (unsigned long) lowpc, (unsigned long) highpc));
124
125 if (n_lowpc != s_lowpc || n_highpc != s_highpc
126 || ncnt != hist_num_bins || hz != profrate)
127 {
128 fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"),
129 whoami, filename);
130 done (1);
131 }
132
133 if (!hist_sample)
134 {
135 hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0]));
136 memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0]));
137 }
138
139 for (i = 0; i < hist_num_bins; ++i)
140 {
141 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
142 {
143 fprintf (stderr,
144 _("%s: %s: unexpected EOF after reading %d of %d samples\n"),
145 whoami, filename, i, hist_num_bins);
146 done (1);
147 }
148 hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
149 DBG (SAMPLEDEBUG,
150 printf ("[hist_read_rec] 0x%lx: %u\n",
151 (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt),
152 hist_sample[i]));
153 }
154}
155
156
157/* Write execution histogram to file OFP. FILENAME is the name
158 of OFP and is provided for formatting error-messages only. */
159
160void
161hist_write_hist (ofp, filename)
162 FILE * ofp;
163 const char *filename;
164{
165 UNIT count;
166 int i;
167
168 /* Write header. */
169
170 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
171 || gmon_io_write_vma (ofp, s_lowpc)
172 || gmon_io_write_vma (ofp, s_highpc)
173 || gmon_io_write_32 (ofp, hist_num_bins)
174 || gmon_io_write_32 (ofp, hz)
175 || gmon_io_write (ofp, hist_dimension, 15)
176 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
177 {
178 perror (filename);
179 done (1);
180 }
181
182 for (i = 0; i < hist_num_bins; ++i)
183 {
184 bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]);
185
186 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
187 {
188 perror (filename);
189 done (1);
190 }
191 }
192}
193
194
195/* Calculate scaled entry point addresses (to save time in
196 hist_assign_samples), and, on architectures that have procedure
197 entry masks at the start of a function, possibly push the scaled
198 entry points over the procedure entry mask, if it turns out that
199 the entry point is in one bin and the code for a routine is in the
200 next bin. */
201
202static void
203scale_and_align_entries ()
204{
205 Sym *sym;
206 bfd_vma bin_of_entry;
207 bfd_vma bin_of_code;
208
209 for (sym = symtab.base; sym < symtab.limit; sym++)
210 {
211 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
212 bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale;
213 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc)
214 / hist_scale);
215 if (bin_of_entry < bin_of_code)
216 {
217 DBG (SAMPLEDEBUG,
218 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
219 (unsigned long) sym->hist.scaled_addr,
220 (unsigned long) (sym->hist.scaled_addr
221 + UNITS_TO_CODE)));
222 sym->hist.scaled_addr += UNITS_TO_CODE;
223 }
224 }
225}
226
227
228/* Assign samples to the symbol to which they belong.
229
230 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
231 which may overlap one more symbol address ranges. If a symbol
232 overlaps with the bin's address range by O percent, then O percent
233 of the bin's count is credited to that symbol.
234
235 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
236 with respect to the symbol's address range [SYM_LOW_PC,
237 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
238 the distance (in UNITs) between the arrows, the fraction of the
239 sample that is to be credited to the symbol which starts at
240 SYM_LOW_PC.
241
242 sym_low_pc sym_high_pc
243 | |
244 v v
245
246 +-----------------------------------------------+
247 | |
248 | ->| |<- ->| |<- ->| |<- |
249 | | | | | |
250 +---------+ +---------+ +---------+
251
252 ^ ^ ^ ^ ^ ^
253 | | | | | |
254 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
255
256 For the VAX we assert that samples will never fall in the first two
257 bytes of any routine, since that is the entry mask, thus we call
258 scale_and_align_entries() to adjust the entry points if the entry
259 mask falls in one bin but the code for the routine doesn't start
260 until the next bin. In conjunction with the alignment of routine
261 addresses, this should allow us to have only one sample for every
262 four bytes of text space and never have any overlap (the two end
263 cases, above). */
264
265void
266hist_assign_samples ()
267{
268 bfd_vma bin_low_pc, bin_high_pc;
269 bfd_vma sym_low_pc, sym_high_pc;
270 bfd_vma overlap, addr;
271 int bin_count, i;
272 unsigned int j;
273 double time, credit;
274
275 /* Read samples and assign to symbols. */
276 hist_scale = highpc - lowpc;
277 hist_scale /= hist_num_bins;
278 scale_and_align_entries ();
279
280 /* Iterate over all sample bins. */
281 for (i = 0, j = 1; i < hist_num_bins; ++i)
282 {
283 bin_count = hist_sample[i];
284 if (! bin_count)
285 continue;
286
287 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
288 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
289 time = bin_count;
290
291 DBG (SAMPLEDEBUG,
292 printf (
293 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%d\n",
294 (unsigned long) (sizeof (UNIT) * bin_low_pc),
295 (unsigned long) (sizeof (UNIT) * bin_high_pc),
296 bin_count));
297 total_time += time;
298
299 /* Credit all symbols that are covered by bin I. */
300 for (j = j - 1; j < symtab.len; ++j)
301 {
302 sym_low_pc = symtab.base[j].hist.scaled_addr;
303 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
304
305 /* If high end of bin is below entry address,
306 go for next bin. */
307 if (bin_high_pc < sym_low_pc)
308 break;
309
310 /* If low end of bin is above high end of symbol,
311 go for next symbol. */
312 if (bin_low_pc >= sym_high_pc)
313 continue;
314
315 overlap =
316 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
317 if (overlap > 0)
318 {
319 DBG (SAMPLEDEBUG,
320 printf (
321 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
322 (unsigned long) symtab.base[j].addr,
323 (unsigned long) (sizeof (UNIT) * sym_high_pc),
324 symtab.base[j].name, overlap * time / hist_scale,
325 (long) overlap));
326
327 addr = symtab.base[j].addr;
328 credit = overlap * time / hist_scale;
329
330 /* Credit symbol if it appears in INCL_FLAT or that
331 table is empty and it does not appear it in
332 EXCL_FLAT. */
333 if (sym_lookup (&syms[INCL_FLAT], addr)
334 || (syms[INCL_FLAT].len == 0
335 && !sym_lookup (&syms[EXCL_FLAT], addr)))
336 {
337 symtab.base[j].hist.time += credit;
338 }
339 else
340 {
341 total_time -= credit;
342 }
343 }
344 }
345 }
346
347 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
348 total_time));
349}
350
351
352/* Print header for flag histogram profile. */
353
354static void
355print_header (prefix)
356 int prefix;
357{
358 char unit[64];
359
360 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
361
362 if (bsd_style_output)
363 {
364 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
365 (long) hist_scale * sizeof (UNIT));
366 if (total_time > 0.0)
367 {
368 printf (_(" for %.2f%% of %.2f %s\n\n"),
369 100.0 / total_time, total_time / hz, hist_dimension);
370 }
371 }
372 else
373 {
374 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
375 }
376
377 if (total_time <= 0.0)
378 {
379 printf (_(" no time accumulated\n\n"));
380
381 /* This doesn't hurt since all the numerators will be zero. */
382 total_time = 1.0;
383 }
384
385 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
386 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
387 "");
388 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
389 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
390 _("name"));
391}
392
393
394static void
395print_line (sym, scale)
396 Sym *sym;
397 double scale;
398{
399 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
400 return;
401
402 accum_time += sym->hist.time;
403
404 if (bsd_style_output)
405 printf ("%5.1f %10.2f %8.2f",
406 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
407 accum_time / hz, sym->hist.time / hz);
408 else
409 printf ("%6.2f %9.2f %8.2f",
410 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
411 accum_time / hz, sym->hist.time / hz);
412
413 if (sym->ncalls != 0)
414 printf (" %8lu %8.2f %8.2f ",
415 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
416 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
417 else
418 printf (" %8.8s %8.8s %8.8s ", "", "", "");
419
420 if (bsd_style_output)
421 print_name (sym);
422 else
423 print_name_only (sym);
424
425 printf ("\n");
426}
427
428
429/* Compare LP and RP. The primary comparison key is execution time,
430 the secondary is number of invocation, and the tertiary is the
431 lexicographic order of the function names. */
432
433static int
434cmp_time (lp, rp)
435 const PTR lp;
436 const PTR rp;
437{
438 const Sym *left = *(const Sym **) lp;
439 const Sym *right = *(const Sym **) rp;
440 double time_diff;
441
442 time_diff = right->hist.time - left->hist.time;
443
444 if (time_diff > 0.0)
445 return 1;
446
447 if (time_diff < 0.0)
448 return -1;
449
450 if (right->ncalls > left->ncalls)
451 return 1;
452
453 if (right->ncalls < left->ncalls)
454 return -1;
455
456 return strcmp (left->name, right->name);
457}
458
459
460/* Print the flat histogram profile. */
461
462void
463hist_print ()
464{
465 Sym **time_sorted_syms, *top_dog, *sym;
466 unsigned int index;
467 unsigned log_scale;
468 double top_time, time;
469 bfd_vma addr;
470
471 if (first_output)
472 first_output = FALSE;
473 else
474 printf ("\f\n");
475
476 accum_time = 0.0;
477
478 if (bsd_style_output)
479 {
480 if (print_descriptions)
481 {
482 printf (_("\n\n\nflat profile:\n"));
483 flat_blurb (stdout);
484 }
485 }
486 else
487 {
488 printf (_("Flat profile:\n"));
489 }
490
491 /* Sort the symbol table by time (call-count and name as secondary
492 and tertiary keys). */
493 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
494
495 for (index = 0; index < symtab.len; ++index)
496 time_sorted_syms[index] = &symtab.base[index];
497
498 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
499
500 if (bsd_style_output)
501 {
502 log_scale = 5; /* Milli-seconds is BSD-default. */
503 }
504 else
505 {
506 /* Search for symbol with highest per-call
507 execution time and scale accordingly. */
508 log_scale = 0;
509 top_dog = 0;
510 top_time = 0.0;
511
512 for (index = 0; index < symtab.len; ++index)
513 {
514 sym = time_sorted_syms[index];
515
516 if (sym->ncalls != 0)
517 {
518 time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
519
520 if (time > top_time)
521 {
522 top_dog = sym;
523 top_time = time;
524 }
525 }
526 }
527
528 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
529 {
530 top_time /= hz;
531
532 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
533 {
534 double scaled_value = SItab[log_scale].scale * top_time;
535
536 if (scaled_value >= 1.0 && scaled_value < 1000.0)
537 break;
538 }
539 }
540 }
541
542 /* For now, the dimension is always seconds. In the future, we
543 may also want to support other (pseudo-)dimensions (such as
544 I-cache misses etc.). */
545 print_header (SItab[log_scale].prefix);
546
547 for (index = 0; index < symtab.len; ++index)
548 {
549 addr = time_sorted_syms[index]->addr;
550
551 /* Print symbol if its in INCL_FLAT table or that table
552 is empty and the symbol is not in EXCL_FLAT. */
553 if (sym_lookup (&syms[INCL_FLAT], addr)
554 || (syms[INCL_FLAT].len == 0
555 && !sym_lookup (&syms[EXCL_FLAT], addr)))
556 print_line (time_sorted_syms[index], SItab[log_scale].scale);
557 }
558
559 free (time_sorted_syms);
560
561 if (print_descriptions && !bsd_style_output)
562 flat_blurb (stdout);
563}
Note: See TracBrowser for help on using the repository browser.