source: trunk/binutils/gprof/cg_print.c@ 3020

Last change on this file since 3020 was 610, checked in by bird, 22 years ago

This commit was generated by cvs2svn to compensate for changes in r609,
which included commits to RCS files with non-trunk default branches.

  • Property cvs2svn:cvs-rev set to 1.1.1.2
  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 33.6 KB
Line 
1/* cg_print.c - Print routines for displaying call graphs.
2
3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22
23#include "libiberty.h"
24#include "gprof.h"
25#include "search_list.h"
26#include "source.h"
27#include "symtab.h"
28#include "cg_arcs.h"
29#include "cg_print.h"
30#include "hist.h"
31#include "utils.h"
32#include "corefile.h"
33
34/* Return value of comparison functions used to sort tables. */
35#define LESSTHAN -1
36#define EQUALTO 0
37#define GREATERTHAN 1
38
39static void print_header PARAMS ((void));
40static void print_cycle PARAMS ((Sym *));
41static int cmp_member PARAMS ((Sym *, Sym *));
42static void sort_members PARAMS ((Sym *));
43static void print_members PARAMS ((Sym *));
44static int cmp_arc PARAMS ((Arc *, Arc *));
45static void sort_parents PARAMS ((Sym *));
46static void print_parents PARAMS ((Sym *));
47static void sort_children PARAMS ((Sym *));
48static void print_children PARAMS ((Sym *));
49static void print_line PARAMS ((Sym *));
50static int cmp_name PARAMS ((const PTR, const PTR));
51static int cmp_arc_count PARAMS ((const PTR, const PTR));
52static int cmp_fun_nuses PARAMS ((const PTR, const PTR));
53static void order_and_dump_functions_by_arcs
54 PARAMS ((Arc **, unsigned long, int, Arc **, unsigned long *));
55
56/* Declarations of automatically generated functions to output blurbs. */
57extern void bsd_callg_blurb PARAMS ((FILE * fp));
58extern void fsf_callg_blurb PARAMS ((FILE * fp));
59
60double print_time = 0.0;
61
62
63static void
64print_header ()
65{
66 if (first_output)
67 first_output = FALSE;
68 else
69 printf ("\f\n");
70
71 if (!bsd_style_output)
72 {
73 if (print_descriptions)
74 printf (_("\t\t Call graph (explanation follows)\n\n"));
75 else
76 printf (_("\t\t\tCall graph\n\n"));
77 }
78
79 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
80 (long) hist_scale * sizeof (UNIT));
81
82 if (print_time > 0.0)
83 printf (_(" for %.2f%% of %.2f seconds\n\n"),
84 100.0 / print_time, print_time / hz);
85 else
86 {
87 printf (_(" no time propagated\n\n"));
88
89 /* This doesn't hurt, since all the numerators will be 0.0. */
90 print_time = 1.0;
91 }
92
93 if (bsd_style_output)
94 {
95 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
96 "", "", "", "", _("called"), _("total"), _("parents"));
97 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
98 _("index"), _("%time"), _("self"), _("descendants"),
99 _("called"), _("self"), _("name"), _("index"));
100 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
101 "", "", "", "", _("called"), _("total"), _("children"));
102 printf ("\n");
103 }
104 else
105 {
106 printf (_("index %% time self children called name\n"));
107 }
108}
109
110/* Print a cycle header. */
111
112static void
113print_cycle (cyc)
114 Sym *cyc;
115{
116 char buf[BUFSIZ];
117
118 sprintf (buf, "[%d]", cyc->cg.index);
119 printf (bsd_style_output
120 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
121 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
122 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
123 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
124
125 if (cyc->cg.self_calls != 0)
126 printf ("+%-7lu", cyc->cg.self_calls);
127 else
128 printf (" %7.7s", "");
129
130 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
131}
132
133/* Compare LEFT and RIGHT membmer. Major comparison key is
134 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
135
136static int
137cmp_member (left, right)
138 Sym *left;
139 Sym *right;
140{
141 double left_time = left->cg.prop.self + left->cg.prop.child;
142 double right_time = right->cg.prop.self + right->cg.prop.child;
143 unsigned long left_calls = left->ncalls + left->cg.self_calls;
144 unsigned long right_calls = right->ncalls + right->cg.self_calls;
145
146 if (left_time > right_time)
147 return GREATERTHAN;
148
149 if (left_time < right_time)
150 return LESSTHAN;
151
152 if (left_calls > right_calls)
153 return GREATERTHAN;
154
155 if (left_calls < right_calls)
156 return LESSTHAN;
157
158 return EQUALTO;
159}
160
161/* Sort members of a cycle. */
162
163static void
164sort_members (cyc)
165 Sym *cyc;
166{
167 Sym *todo, *doing, *prev;
168
169 /* Detach cycle members from cyclehead,
170 and insertion sort them back on. */
171 todo = cyc->cg.cyc.next;
172 cyc->cg.cyc.next = 0;
173
174 for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
175 {
176 todo = doing->cg.cyc.next;
177
178 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
179 {
180 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
181 break;
182 }
183
184 doing->cg.cyc.next = prev->cg.cyc.next;
185 prev->cg.cyc.next = doing;
186 }
187}
188
189/* Print the members of a cycle. */
190
191static void
192print_members (cyc)
193 Sym *cyc;
194{
195 Sym *member;
196
197 sort_members (cyc);
198
199 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
200 {
201 printf (bsd_style_output
202 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
203 : "%6.6s %5.5s %7.2f %7.2f %7lu",
204 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
205 member->ncalls);
206
207 if (member->cg.self_calls != 0)
208 printf ("+%-7lu", member->cg.self_calls);
209 else
210 printf (" %7.7s", "");
211
212 printf (" ");
213 print_name (member);
214 printf ("\n");
215 }
216}
217
218/* Compare two arcs to/from the same child/parent.
219 - if one arc is a self arc, it's least.
220 - if one arc is within a cycle, it's less than.
221 - if both arcs are within a cycle, compare arc counts.
222 - if neither arc is within a cycle, compare with
223 time + child_time as major key
224 arc count as minor key. */
225
226static int
227cmp_arc (left, right)
228 Arc *left;
229 Arc *right;
230{
231 Sym *left_parent = left->parent;
232 Sym *left_child = left->child;
233 Sym *right_parent = right->parent;
234 Sym *right_child = right->child;
235 double left_time, right_time;
236
237 DBG (TIMEDEBUG,
238 printf ("[cmp_arc] ");
239 print_name (left_parent);
240 printf (" calls ");
241 print_name (left_child);
242 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
243 left->count, left_child->ncalls);
244 printf ("[cmp_arc] ");
245 print_name (right_parent);
246 printf (" calls ");
247 print_name (right_child);
248 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
249 right->count, right_child->ncalls);
250 printf ("\n");
251 );
252
253 if (left_parent == left_child)
254 return LESSTHAN; /* Left is a self call. */
255
256 if (right_parent == right_child)
257 return GREATERTHAN; /* Right is a self call. */
258
259 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
260 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
261 {
262 /* Left is a call within a cycle. */
263 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
264 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
265 {
266 /* Right is a call within the cycle, too. */
267 if (left->count < right->count)
268 return LESSTHAN;
269
270 if (left->count > right->count)
271 return GREATERTHAN;
272
273 return EQUALTO;
274 }
275 else
276 {
277 /* Right isn't a call within the cycle. */
278 return LESSTHAN;
279 }
280 }
281 else
282 {
283 /* Left isn't a call within a cycle. */
284 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
285 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
286 {
287 /* Right is a call within a cycle. */
288 return GREATERTHAN;
289 }
290 else
291 {
292 /* Neither is a call within a cycle. */
293 left_time = left->time + left->child_time;
294 right_time = right->time + right->child_time;
295
296 if (left_time < right_time)
297 return LESSTHAN;
298
299 if (left_time > right_time)
300 return GREATERTHAN;
301
302 if (left->count < right->count)
303 return LESSTHAN;
304
305 if (left->count > right->count)
306 return GREATERTHAN;
307
308 return EQUALTO;
309 }
310 }
311}
312
313
314static void
315sort_parents (child)
316 Sym * child;
317{
318 Arc *arc, *detached, sorted, *prev;
319
320 /* Unlink parents from child, then insertion sort back on to
321 sorted's parents.
322 *arc the arc you have detached and are inserting.
323 *detached the rest of the arcs to be sorted.
324 sorted arc list onto which you insertion sort.
325 *prev arc before the arc you are comparing. */
326 sorted.next_parent = 0;
327
328 for (arc = child->cg.parents; arc; arc = detached)
329 {
330 detached = arc->next_parent;
331
332 /* Consider *arc as disconnected; insert it into sorted. */
333 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
334 {
335 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
336 break;
337 }
338
339 arc->next_parent = prev->next_parent;
340 prev->next_parent = arc;
341 }
342
343 /* Reattach sorted arcs to child. */
344 child->cg.parents = sorted.next_parent;
345}
346
347
348static void
349print_parents (child)
350 Sym *child;
351{
352 Sym *parent;
353 Arc *arc;
354 Sym *cycle_head;
355
356 if (child->cg.cyc.head != 0)
357 cycle_head = child->cg.cyc.head;
358 else
359 cycle_head = child;
360
361 if (!child->cg.parents)
362 {
363 printf (bsd_style_output
364 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
365 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
366 "", "", "", "", "", "");
367 return;
368 }
369
370 sort_parents (child);
371
372 for (arc = child->cg.parents; arc; arc = arc->next_parent)
373 {
374 parent = arc->parent;
375 if (child == parent || (child->cg.cyc.num != 0
376 && parent->cg.cyc.num == child->cg.cyc.num))
377 {
378 /* Selfcall or call among siblings. */
379 printf (bsd_style_output
380 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
381 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
382 "", "", "", "",
383 arc->count, "");
384 print_name (parent);
385 printf ("\n");
386 }
387 else
388 {
389 /* Regular parent of child. */
390 printf (bsd_style_output
391 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
392 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
393 "", "",
394 arc->time / hz, arc->child_time / hz,
395 arc->count, cycle_head->ncalls);
396 print_name (parent);
397 printf ("\n");
398 }
399 }
400}
401
402
403static void
404sort_children (parent)
405 Sym *parent;
406{
407 Arc *arc, *detached, sorted, *prev;
408
409 /* Unlink children from parent, then insertion sort back on to
410 sorted's children.
411 *arc the arc you have detached and are inserting.
412 *detached the rest of the arcs to be sorted.
413 sorted arc list onto which you insertion sort.
414 *prev arc before the arc you are comparing. */
415 sorted.next_child = 0;
416
417 for (arc = parent->cg.children; arc; arc = detached)
418 {
419 detached = arc->next_child;
420
421 /* Consider *arc as disconnected; insert it into sorted. */
422 for (prev = &sorted; prev->next_child; prev = prev->next_child)
423 {
424 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
425 break;
426 }
427
428 arc->next_child = prev->next_child;
429 prev->next_child = arc;
430 }
431
432 /* Reattach sorted children to parent. */
433 parent->cg.children = sorted.next_child;
434}
435
436
437static void
438print_children (parent)
439 Sym *parent;
440{
441 Sym *child;
442 Arc *arc;
443
444 sort_children (parent);
445 arc = parent->cg.children;
446
447 for (arc = parent->cg.children; arc; arc = arc->next_child)
448 {
449 child = arc->child;
450 if (child == parent || (child->cg.cyc.num != 0
451 && child->cg.cyc.num == parent->cg.cyc.num))
452 {
453 /* Self call or call to sibling. */
454 printf (bsd_style_output
455 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
456 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
457 "", "", "", "", arc->count, "");
458 print_name (child);
459 printf ("\n");
460 }
461 else
462 {
463 /* Regular child of parent. */
464 printf (bsd_style_output
465 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
466 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
467 "", "",
468 arc->time / hz, arc->child_time / hz,
469 arc->count, child->cg.cyc.head->ncalls);
470 print_name (child);
471 printf ("\n");
472 }
473 }
474}
475
476
477static void
478print_line (np)
479 Sym *np;
480{
481 char buf[BUFSIZ];
482
483 sprintf (buf, "[%d]", np->cg.index);
484 printf (bsd_style_output
485 ? "%-6.6s %5.1f %7.2f %11.2f"
486 : "%-6.6s %5.1f %7.2f %7.2f", buf,
487 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
488 np->cg.prop.self / hz, np->cg.prop.child / hz);
489
490 if ((np->ncalls + np->cg.self_calls) != 0)
491 {
492 printf (" %7lu", np->ncalls);
493
494 if (np->cg.self_calls != 0)
495 printf ("+%-7lu ", np->cg.self_calls);
496 else
497 printf (" %7.7s ", "");
498 }
499 else
500 {
501 printf (" %7.7s %7.7s ", "", "");
502 }
503
504 print_name (np);
505 printf ("\n");
506}
507
508
509/* Print dynamic call graph. */
510
511void
512cg_print (timesortsym)
513 Sym ** timesortsym;
514{
515 unsigned int index;
516 Sym *parent;
517
518 if (print_descriptions && bsd_style_output)
519 bsd_callg_blurb (stdout);
520
521 print_header ();
522
523 for (index = 0; index < symtab.len + num_cycles; ++index)
524 {
525 parent = timesortsym[index];
526
527 if ((ignore_zeros && parent->ncalls == 0
528 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
529 && parent->cg.prop.child == 0)
530 || !parent->cg.print_flag
531 || (line_granularity && ! parent->is_func))
532 continue;
533
534 if (!parent->name && parent->cg.cyc.num != 0)
535 {
536 /* Cycle header. */
537 print_cycle (parent);
538 print_members (parent);
539 }
540 else
541 {
542 print_parents (parent);
543 print_line (parent);
544 print_children (parent);
545 }
546
547 if (bsd_style_output)
548 printf ("\n");
549
550 printf ("-----------------------------------------------\n");
551
552 if (bsd_style_output)
553 printf ("\n");
554 }
555
556 free (timesortsym);
557
558 if (print_descriptions && !bsd_style_output)
559 fsf_callg_blurb (stdout);
560}
561
562
563static int
564cmp_name (left, right)
565 const PTR left;
566 const PTR right;
567{
568 const Sym **npp1 = (const Sym **) left;
569 const Sym **npp2 = (const Sym **) right;
570
571 return strcmp ((*npp1)->name, (*npp2)->name);
572}
573
574
575void
576cg_print_index ()
577{
578 unsigned int index;
579 unsigned int nnames, todo, i, j;
580 int col, starting_col;
581 Sym **name_sorted_syms, *sym;
582 const char *filename;
583 char buf[20];
584 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
585
586 /* Now, sort regular function name
587 alphabetically to create an index. */
588 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
589
590 for (index = 0, nnames = 0; index < symtab.len; index++)
591 {
592 if (ignore_zeros && symtab.base[index].ncalls == 0
593 && symtab.base[index].hist.time == 0)
594 continue;
595
596 name_sorted_syms[nnames++] = &symtab.base[index];
597 }
598
599 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
600
601 for (index = 1, todo = nnames; index <= num_cycles; index++)
602 name_sorted_syms[todo++] = &cycle_header[index];
603
604 printf ("\f\n");
605 printf (_("Index by function name\n\n"));
606 index = (todo + 2) / 3;
607
608 for (i = 0; i < index; i++)
609 {
610 col = 0;
611 starting_col = 0;
612
613 for (j = i; j < todo; j += index)
614 {
615 sym = name_sorted_syms[j];
616
617 if (sym->cg.print_flag)
618 sprintf (buf, "[%d]", sym->cg.index);
619 else
620 sprintf (buf, "(%d)", sym->cg.index);
621
622 if (j < nnames)
623 {
624 if (bsd_style_output)
625 {
626 printf ("%6.6s %-19.19s", buf, sym->name);
627 }
628 else
629 {
630 col += strlen (buf);
631
632 for (; col < starting_col + 5; ++col)
633 putchar (' ');
634
635 printf (" %s ", buf);
636 col += print_name_only (sym);
637
638 if (!line_granularity && sym->is_static && sym->file)
639 {
640 filename = sym->file->name;
641
642 if (!print_path)
643 {
644 filename = strrchr (filename, '/');
645
646 if (filename)
647 ++filename;
648 else
649 filename = sym->file->name;
650 }
651
652 printf (" (%s)", filename);
653 col += strlen (filename) + 3;
654 }
655 }
656 }
657 else
658 {
659 if (bsd_style_output)
660 {
661 printf ("%6.6s ", buf);
662 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
663 printf ("%-19.19s", buf);
664 }
665 else
666 {
667 col += strlen (buf);
668 for (; col < starting_col + 5; ++col)
669 putchar (' ');
670 printf (" %s ", buf);
671 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
672 printf ("%s", buf);
673 col += strlen (buf);
674 }
675 }
676
677 starting_col += column_width;
678 }
679
680 printf ("\n");
681 }
682
683 free (name_sorted_syms);
684}
685
686/* Compare two arcs based on their usage counts.
687 We want to sort in descending order. */
688
689static int
690cmp_arc_count (left, right)
691 const PTR left;
692 const PTR right;
693{
694 const Arc **npp1 = (const Arc **) left;
695 const Arc **npp2 = (const Arc **) right;
696
697 if ((*npp1)->count > (*npp2)->count)
698 return -1;
699 else if ((*npp1)->count < (*npp2)->count)
700 return 1;
701 else
702 return 0;
703}
704
705/* Compare two funtions based on their usage counts.
706 We want to sort in descending order. */
707
708static int
709cmp_fun_nuses (left, right)
710 const PTR left;
711 const PTR right;
712{
713 const Sym **npp1 = (const Sym **) left;
714 const Sym **npp2 = (const Sym **) right;
715
716 if ((*npp1)->nuses > (*npp2)->nuses)
717 return -1;
718 else if ((*npp1)->nuses < (*npp2)->nuses)
719 return 1;
720 else
721 return 0;
722}
723
724/* Print a suggested function ordering based on the profiling data.
725
726 We perform 4 major steps when ordering functions:
727
728 * Group unused functions together and place them at the
729 end of the function order.
730
731 * Search the highest use arcs (those which account for 90% of
732 the total arc count) for functions which have several parents.
733
734 Group those with the most call sites together (currently the
735 top 1.25% which have at least five different call sites).
736
737 These are emitted at the start of the function order.
738
739 * Use a greedy placement algorithm to place functions which
740 occur in the top 99% of the arcs in the profile. Some provisions
741 are made to handle high usage arcs where the parent and/or
742 child has already been placed.
743
744 * Run the same greedy placement algorithm on the remaining
745 arcs to place the leftover functions.
746
747
748 The various "magic numbers" should (one day) be tuneable by command
749 line options. They were arrived at by benchmarking a few applications
750 with various values to see which values produced better overall function
751 orderings.
752
753 Of course, profiling errors, machine limitations (PA long calls), and
754 poor cutoff values for the placement algorithm may limit the usefullness
755 of the resulting function order. Improvements would be greatly appreciated.
756
757 Suggestions:
758
759 * Place the functions with many callers near the middle of the
760 list to reduce long calls.
761
762 * Propagate arc usage changes as functions are placed. Ie if
763 func1 and func2 are placed together, arcs to/from those arcs
764 to the same parent/child should be combined, then resort the
765 arcs to choose the next one.
766
767 * Implement some global positioning algorithm to place the
768 chains made by the greedy local positioning algorithm. Probably
769 by examining arcs which haven't been placed yet to tie two
770 chains together.
771
772 * Take a function's size and time into account in the algorithm;
773 size in particular is important on the PA (long calls). Placing
774 many small functions onto their own page may be wise.
775
776 * Use better profiling information; many published algorithms
777 are based on call sequences through time, rather than just
778 arc counts.
779
780 * Prodecure cloning could improve performance when a small number
781 of arcs account for most of the calls to a particular function.
782
783 * Use relocation information to avoid moving unused functions
784 completely out of the code stream; this would avoid severe lossage
785 when the profile data bears little resemblance to actual runs.
786
787 * Propagation of arc usages should also improve .o link line
788 ordering which shares the same arc placement algorithm with
789 the function ordering code (in fact it is a degenerate case
790 of function ordering). */
791
792void
793cg_print_function_ordering ()
794{
795 unsigned long index, used, unused, scratch_index;
796 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
797#ifdef __GNUC__
798 unsigned long long total_arcs, tmp_arcs_count;
799#else
800 unsigned long total_arcs, tmp_arcs_count;
801#endif
802 Sym **unused_syms, **used_syms, **scratch_syms;
803 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
804
805 index = 0;
806 used = 0;
807 unused = 0;
808 scratch_index = 0;
809 unplaced_arc_count = 0;
810 high_arc_count = 0;
811 scratch_arc_count = 0;
812
813 /* First group all the unused functions together. */
814 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
815 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
816 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
817 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
818 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
819 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
820
821 /* Walk through all the functions; mark those which are never
822 called as placed (we'll emit them as a group later). */
823 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
824 {
825 if (symtab.base[index].ncalls == 0)
826 {
827 /* Filter out gprof generated names. */
828 if (strcmp (symtab.base[index].name, "<locore>")
829 && strcmp (symtab.base[index].name, "<hicore>"))
830 {
831 unused_syms[unused++] = &symtab.base[index];
832 symtab.base[index].has_been_placed = 1;
833 }
834 }
835 else
836 {
837 used_syms[used++] = &symtab.base[index];
838 symtab.base[index].has_been_placed = 0;
839 symtab.base[index].next = 0;
840 symtab.base[index].prev = 0;
841 symtab.base[index].nuses = 0;
842 }
843 }
844
845 /* Sort the arcs from most used to least used. */
846 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
847
848 /* Compute the total arc count. Also mark arcs as unplaced.
849
850 Note we don't compensate for overflow if that happens!
851 Overflow is much less likely when this file is compiled
852 with GCC as it can double-wide integers via long long. */
853 total_arcs = 0;
854 for (index = 0; index < numarcs; index++)
855 {
856 total_arcs += arcs[index]->count;
857 arcs[index]->has_been_placed = 0;
858 }
859
860 /* We want to pull out those functions which are referenced
861 by many highly used arcs and emit them as a group. This
862 could probably use some tuning. */
863 tmp_arcs_count = 0;
864 for (index = 0; index < numarcs; index++)
865 {
866 tmp_arcs_count += arcs[index]->count;
867
868 /* Count how many times each parent and child are used up
869 to our threshhold of arcs (90%). */
870 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
871 break;
872
873 arcs[index]->child->nuses++;
874 }
875
876 /* Now sort a temporary symbol table based on the number of
877 times each function was used in the highest used arcs. */
878 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
879 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
880
881 /* Now pick out those symbols we're going to emit as
882 a group. We take up to 1.25% of the used symbols. */
883 for (index = 0; index < used / 80; index++)
884 {
885 Sym *sym = scratch_syms[index];
886 Arc *arc;
887
888 /* If we hit symbols that aren't used from many call sites,
889 then we can quit. We choose five as the low limit for
890 no particular reason. */
891 if (sym->nuses == 5)
892 break;
893
894 /* We're going to need the arcs between these functions.
895 Unfortunately, we don't know all these functions
896 until we're done. So we keep track of all the arcs
897 to the functions we care about, then prune out those
898 which are uninteresting.
899
900 An interesting variation would be to quit when we found
901 multi-call site functions which account for some percentage
902 of the arcs. */
903 arc = sym->cg.children;
904
905 while (arc)
906 {
907 if (arc->parent != arc->child)
908 scratch_arcs[scratch_arc_count++] = arc;
909 arc->has_been_placed = 1;
910 arc = arc->next_child;
911 }
912
913 arc = sym->cg.parents;
914
915 while (arc)
916 {
917 if (arc->parent != arc->child)
918 scratch_arcs[scratch_arc_count++] = arc;
919 arc->has_been_placed = 1;
920 arc = arc->next_parent;
921 }
922
923 /* Keep track of how many symbols we're going to place. */
924 scratch_index = index;
925
926 /* A lie, but it makes identifying
927 these functions easier later. */
928 sym->has_been_placed = 1;
929 }
930
931 /* Now walk through the temporary arcs and copy
932 those we care about into the high arcs array. */
933 for (index = 0; index < scratch_arc_count; index++)
934 {
935 Arc *arc = scratch_arcs[index];
936
937 /* If this arc refers to highly used functions, then
938 then we want to keep it. */
939 if (arc->child->has_been_placed
940 && arc->parent->has_been_placed)
941 {
942 high_arcs[high_arc_count++] = scratch_arcs[index];
943
944 /* We need to turn of has_been_placed since we're going to
945 use the main arc placement algorithm on these arcs. */
946 arc->child->has_been_placed = 0;
947 arc->parent->has_been_placed = 0;
948 }
949 }
950
951 /* Dump the multi-site high usage functions which are not
952 going to be ordered by the main ordering algorithm. */
953 for (index = 0; index < scratch_index; index++)
954 {
955 if (scratch_syms[index]->has_been_placed)
956 printf ("%s\n", scratch_syms[index]->name);
957 }
958
959 /* Now we can order the multi-site high use
960 functions based on the arcs between them. */
961 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
962 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
963 unplaced_arcs, &unplaced_arc_count);
964
965 /* Order and dump the high use functions left,
966 these typically have only a few call sites. */
967 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
968 unplaced_arcs, &unplaced_arc_count);
969
970 /* Now place the rarely used functions. */
971 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
972 scratch_arcs, &scratch_arc_count);
973
974 /* Output any functions not emitted by the order_and_dump calls. */
975 for (index = 0; index < used; index++)
976 if (used_syms[index]->has_been_placed == 0)
977 printf("%s\n", used_syms[index]->name);
978
979 /* Output the unused functions. */
980 for (index = 0; index < unused; index++)
981 printf("%s\n", unused_syms[index]->name);
982
983 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
984 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
985 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
986 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
987 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
988 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
989
990 free (unused_syms);
991 free (used_syms);
992 free (scratch_syms);
993 free (high_arcs);
994 free (scratch_arcs);
995 free (unplaced_arcs);
996}
997
998/* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
999 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
1000
1001 If ALL is nonzero, then place all functions referenced by THE_ARCS,
1002 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
1003
1004#define MOST 0.99
1005static void
1006order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
1007 unplaced_arcs, unplaced_arc_count)
1008 Arc **the_arcs;
1009 unsigned long arc_count;
1010 int all;
1011 Arc **unplaced_arcs;
1012 unsigned long *unplaced_arc_count;
1013{
1014#ifdef __GNUC__
1015 unsigned long long tmp_arcs, total_arcs;
1016#else
1017 unsigned long tmp_arcs, total_arcs;
1018#endif
1019 unsigned int index;
1020
1021 /* If needed, compute the total arc count.
1022
1023 Note we don't compensate for overflow if that happens! */
1024 if (! all)
1025 {
1026 total_arcs = 0;
1027 for (index = 0; index < arc_count; index++)
1028 total_arcs += the_arcs[index]->count;
1029 }
1030 else
1031 total_arcs = 0;
1032
1033 tmp_arcs = 0;
1034
1035 for (index = 0; index < arc_count; index++)
1036 {
1037 Sym *sym1, *sym2;
1038 Sym *child, *parent;
1039
1040 tmp_arcs += the_arcs[index]->count;
1041
1042 /* Ignore this arc if it's already been placed. */
1043 if (the_arcs[index]->has_been_placed)
1044 continue;
1045
1046 child = the_arcs[index]->child;
1047 parent = the_arcs[index]->parent;
1048
1049 /* If we're not using all arcs, and this is a rarely used
1050 arc, then put it on the unplaced_arc list. Similarly
1051 if both the parent and child of this arc have been placed. */
1052 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1053 || child->has_been_placed || parent->has_been_placed)
1054 {
1055 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1056 continue;
1057 }
1058
1059 /* If all slots in the parent and child are full, then there isn't
1060 anything we can do right now. We'll place this arc on the
1061 unplaced arc list in the hope that a global positioning
1062 algorithm can use it to place function chains. */
1063 if (parent->next && parent->prev && child->next && child->prev)
1064 {
1065 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1066 continue;
1067 }
1068
1069 /* If the parent is unattached, then find the closest
1070 place to attach it onto child's chain. Similarly
1071 for the opposite case. */
1072 if (!parent->next && !parent->prev)
1073 {
1074 int next_count = 0;
1075 int prev_count = 0;
1076 Sym *prev = child;
1077 Sym *next = child;
1078
1079 /* Walk to the beginning and end of the child's chain. */
1080 while (next->next)
1081 {
1082 next = next->next;
1083 next_count++;
1084 }
1085
1086 while (prev->prev)
1087 {
1088 prev = prev->prev;
1089 prev_count++;
1090 }
1091
1092 /* Choose the closest. */
1093 child = next_count < prev_count ? next : prev;
1094 }
1095 else if (! child->next && !child->prev)
1096 {
1097 int next_count = 0;
1098 int prev_count = 0;
1099 Sym *prev = parent;
1100 Sym *next = parent;
1101
1102 while (next->next)
1103 {
1104 next = next->next;
1105 next_count++;
1106 }
1107
1108 while (prev->prev)
1109 {
1110 prev = prev->prev;
1111 prev_count++;
1112 }
1113
1114 parent = prev_count < next_count ? prev : next;
1115 }
1116 else
1117 {
1118 /* Couldn't find anywhere to attach the functions,
1119 put the arc on the unplaced arc list. */
1120 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1121 continue;
1122 }
1123
1124 /* Make sure we don't tie two ends together. */
1125 sym1 = parent;
1126 if (sym1->next)
1127 while (sym1->next)
1128 sym1 = sym1->next;
1129 else
1130 while (sym1->prev)
1131 sym1 = sym1->prev;
1132
1133 sym2 = child;
1134 if (sym2->next)
1135 while (sym2->next)
1136 sym2 = sym2->next;
1137 else
1138 while (sym2->prev)
1139 sym2 = sym2->prev;
1140
1141 if (sym1 == child
1142 && sym2 == parent)
1143 {
1144 /* This would tie two ends together. */
1145 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1146 continue;
1147 }
1148
1149 if (parent->next)
1150 {
1151 /* Must attach to the parent's prev field. */
1152 if (! child->next)
1153 {
1154 /* parent-prev and child-next */
1155 parent->prev = child;
1156 child->next = parent;
1157 the_arcs[index]->has_been_placed = 1;
1158 }
1159 }
1160 else if (parent->prev)
1161 {
1162 /* Must attach to the parent's next field. */
1163 if (! child->prev)
1164 {
1165 /* parent-next and child-prev */
1166 parent->next = child;
1167 child->prev = parent;
1168 the_arcs[index]->has_been_placed = 1;
1169 }
1170 }
1171 else
1172 {
1173 /* Can attach to either field in the parent, depends
1174 on where we've got space in the child. */
1175 if (child->prev)
1176 {
1177 /* parent-prev and child-next. */
1178 parent->prev = child;
1179 child->next = parent;
1180 the_arcs[index]->has_been_placed = 1;
1181 }
1182 else
1183 {
1184 /* parent-next and child-prev. */
1185 parent->next = child;
1186 child->prev = parent;
1187 the_arcs[index]->has_been_placed = 1;
1188 }
1189 }
1190 }
1191
1192 /* Dump the chains of functions we've made. */
1193 for (index = 0; index < arc_count; index++)
1194 {
1195 Sym *sym;
1196 if (the_arcs[index]->parent->has_been_placed
1197 || the_arcs[index]->child->has_been_placed)
1198 continue;
1199
1200 sym = the_arcs[index]->parent;
1201
1202 /* If this symbol isn't attached to any other
1203 symbols, then we've got a rarely used arc.
1204
1205 Skip it for now, we'll deal with them later. */
1206 if (sym->next == NULL
1207 && sym->prev == NULL)
1208 continue;
1209
1210 /* Get to the start of this chain. */
1211 while (sym->prev)
1212 sym = sym->prev;
1213
1214 while (sym)
1215 {
1216 /* Mark it as placed. */
1217 sym->has_been_placed = 1;
1218 printf ("%s\n", sym->name);
1219 sym = sym->next;
1220 }
1221 }
1222
1223 /* If we want to place all the arcs, then output
1224 those which weren't placed by the main algorithm. */
1225 if (all)
1226 for (index = 0; index < arc_count; index++)
1227 {
1228 Sym *sym;
1229 if (the_arcs[index]->parent->has_been_placed
1230 || the_arcs[index]->child->has_been_placed)
1231 continue;
1232
1233 sym = the_arcs[index]->parent;
1234
1235 sym->has_been_placed = 1;
1236 printf ("%s\n", sym->name);
1237 }
1238}
1239
1240/* Print a suggested .o ordering for files on a link line based
1241 on profiling information. This uses the function placement
1242 code for the bulk of its work. */
1243
1244void
1245cg_print_file_ordering ()
1246{
1247 unsigned long scratch_arc_count, index;
1248 Arc **scratch_arcs;
1249 char *last;
1250
1251 scratch_arc_count = 0;
1252
1253 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1254 for (index = 0; index < numarcs; index++)
1255 {
1256 if (! arcs[index]->parent->mapped
1257 || ! arcs[index]->child->mapped)
1258 arcs[index]->has_been_placed = 1;
1259 }
1260
1261 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1262 scratch_arcs, &scratch_arc_count);
1263
1264 /* Output .o's not handled by the main placement algorithm. */
1265 for (index = 0; index < symtab.len; index++)
1266 {
1267 if (symtab.base[index].mapped
1268 && ! symtab.base[index].has_been_placed)
1269 printf ("%s\n", symtab.base[index].name);
1270 }
1271
1272 /* Now output any .o's that didn't have any text symbols. */
1273 last = NULL;
1274 for (index = 0; index < symbol_map_count; index++)
1275 {
1276 unsigned int index2;
1277
1278 /* Don't bother searching if this symbol
1279 is the same as the previous one. */
1280 if (last && !strcmp (last, symbol_map[index].file_name))
1281 continue;
1282
1283 for (index2 = 0; index2 < symtab.len; index2++)
1284 {
1285 if (! symtab.base[index2].mapped)
1286 continue;
1287
1288 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1289 break;
1290 }
1291
1292 /* If we didn't find it in the symbol table, then it must
1293 be a .o with no text symbols. Output it last. */
1294 if (index2 == symtab.len)
1295 printf ("%s\n", symbol_map[index].file_name);
1296 last = symbol_map[index].file_name;
1297 }
1298}
Note: See TracBrowser for help on using the repository browser.