Context Navigation

funstack.awk

Visit:

Last change on this file was 3076, checked in by bird, 18 years ago
gawk 3.1.5
File size: 25.9 KB

Line
1	### ====================================================================
2	### @Awk-file{
3	### author = "Nelson H. F. Beebe",
4	### version = "1.00",
5	### date = "09 October 1996",
6	### time = "15:57:06 MDT",
7	### filename = "journal-toc.awk",
8	### address = "Center for Scientific Computing
9	### Department of Mathematics
10	### University of Utah
11	### Salt Lake City, UT 84112
12	### USA",
13	### telephone = "+1 801 581 5254",
14	### FAX = "+1 801 581 4148",
15	### URL = "http://www.math.utah.edu/~beebe",
16	### checksum = "25092 977 3357 26493",
17	### email = "beebe@math.utah.edu (Internet)",
18	### codetable = "ISO/ASCII",
19	### keywords = "BibTeX, bibliography, HTML, journal table of
20	### contents",
21	### supported = "yes",
22	### docstring = "Create a journal cover table of contents from
23	### <at>Article{...} entries in a journal BibTeX
24	### .bib file for checking the bibliography
25	### database against the actual journal covers.
26	### The output can be either plain text, or HTML.
27	###
28	### Usage:
29	### bibclean -max-width 0 BibTeX-file(s) \| \
30	### bibsort -byvolume \| \
31	### awk -f journal-toc.awk \
32	### [-v HTML=nnn] [-v INDENT=nnn] \
33	### [-v BIBFILEURL=url] >foo.toc
34	###
35	### or if the bibliography is already sorted
36	### by volume,
37	###
38	### bibclean -max-width 0 BibTeX-file(s) \| \
39	### awk -f journal-toc.awk \
40	### [-v HTML=nnn] [-v INDENT=nnn] \
41	### [-v BIBFILEURL=url] >foo.toc
42	###
43	### A non-zero value of the command-line option,
44	### HTML=nnn, results in HTML output instead of
45	### the default plain ASCII text (corresponding
46	### to HTML=0). The
47	###
48	### The INDENT=nnn command-line option specifies
49	### the number of blanks to indent each logical
50	### level of HTML. The default is INDENT=4.
51	### INDENT=0 suppresses indentation. The INDENT
52	### option has no effect when the default HTML=0
53	### (plain text output) option is in effect.
54	###
55	### When HTML output is selected, the
56	### BIBFILEURL=url command-line option provides a
57	### way to request hypertext links from table of
58	### contents page numbers to the complete BibTeX
59	### entry for the article. These links are
60	### created by appending a sharp (#) and the
61	### citation label to the BIBFILEURL value, which
62	### conforms with the practice of
63	### bibtex-to-html.awk.
64	###
65	### The HTML output form may be useful as a more
66	### compact representation of journal article
67	### bibliography data than the original BibTeX
68	### file provides. Of course, the
69	### table-of-contents format provides less
70	### information, and is considerably more
71	### troublesome for a computer program to parse.
72	###
73	### When URL key values are provided, they will
74	### be used to create hypertext links around
75	### article titles. This supports journals that
76	### provide article contents on the World-Wide
77	### Web.
78	###
79	### For parsing simplicity, this program requires
80	### that BibTeX
81	###
82	### key = "value"
83	###
84	### and
85	###
86	### @String{name = "value"}
87	###
88	### specifications be entirely contained on
89	### single lines, which is readily provided by
90	### the `bibclean -max-width 0' filter. It also
91	### requires that bibliography entries begin and
92	### end at the start of a line, and that
93	### quotation marks, rather than balanced braces,
94	### delimit string values. This is a
95	### conventional format that again can be
96	### guaranteed by bibclean.
97	###
98	### This program requires `new' awk, as described
99	### in the book
100	###
101	### Alfred V. Aho, Brian W. Kernighan, and
102	### Peter J. Weinberger,
103	### ``The AWK Programming Language'',
104	### Addison-Wesley (1988), ISBN
105	### 0-201-07981-X,
106	###
107	### such as provided by programs named (GNU)
108	### gawk, nawk, and recent AT&T awk.
109	###
110	### The checksum field above contains a CRC-16
111	### checksum as the first value, followed by the
112	### equivalent of the standard UNIX wc (word
113	### count) utility output of lines, words, and
114	### characters. This is produced by Robert
115	### Solovay's checksum utility.",
116	### }
117	### ====================================================================
118
119	BEGIN { initialize() }
120
121	/^ @ [Ss][Tt][Rr][Ii][Nn][Gg] *{/ { do_String(); next }
122
123	/^ @ [Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/ { next }
124
125	/^ @ [Aa][Rr][Tt][Ii][Cc][Ll][Ee]/ { do_Article(); next }
126
127	/^ *@/ { do_Other(); next }
128
129	/^ author = *\"/ { do_author(); next }
130
131	/^ journal = */ { do_journal(); next }
132
133	/^ volume = *\"/ { do_volume(); next }
134
135	/^ number = *\"/ { do_number(); next }
136
137	/^ year = *\"/ { do_year(); next }
138
139	/^ month = */ { do_month(); next }
140
141	/^ title = *\"/ { do_title(); next }
142
143	/^ pages = *\"/ { do_pages(); next }
144
145	/^ URL = *\"/ { do_URL(); next }
146
147	/^ } $/ { if (In_Article) do_end_entry(); next }
148
149	END { terminate() }
150
151
152	########################################################################
153	# NB: The programming conventions for variables in this program are: #
154	# UPPERCASE global constants and user options #
155	# Initialuppercase global variables #
156	# lowercase local variables #
157	# Any deviation is an error! #
158	########################################################################
159
160
161	function do_Article()
162	{
163	In_Article = 1
164
165	Citation_label = $0
166	sub(/^[^\{]*{/,"",Citation_label)
167	sub(/ , $/,"",Citation_label)
168
169	Author = ""
170	Title = ""
171	Journal = ""
172	Volume = ""
173	Number = ""
174	Month = ""
175	Year = ""
176	Pages = ""
177	Url = ""
178	}
179
180
181	function do_author()
182	{
183	Author = TeX_to_HTML(get_value($0))
184	}
185
186
187	function do_end_entry( k,n,parts)
188	{
189	n = split(Author,parts," and ")
190	if (Last_number != Number)
191	do_new_issue()
192	for (k = 1; k < n; ++k)
193	print_toc_line(parts[k] " and", "", "")
194	Title_prefix = html_begin_title()
195	Title_suffix = html_end_title()
196	if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
197	print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
198	else # need to split long title over multiple lines
199	do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
200	}
201
202
203	function do_journal()
204	{
205	if ($0 ~ /[=] *"/) # have journal = "quoted journal name",
206	Journal = get_value($0)
207	else # have journal = journal-abbreviation,
208	{
209	Journal = get_abbrev($0)
210	if (Journal in String) # replace abbrev by its expansion
211	Journal = String[Journal]
212	}
213	gsub(/\\-/,"",Journal) # remove discretionary hyphens
214	}
215
216
217	function do_long_title(author,title,pages, last_title,n)
218	{
219	title = trim(title) # discard leading and trailing space
220	while (length(title) > 0)
221	{
222	n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
223	last_title = substr(title,1,n)
224	title = substr(title,n+1)
225	sub(/^ +/,"",title) # discard any leading space
226	print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
227	author = ""
228	}
229	}
230
231
232	function do_month( k,n,parts)
233	{
234	Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
235	gsub(/[\"]/,"",Month)
236	gsub(/ # \\slash # /," / ",Month)
237	gsub(/ # -+ # /," / ",Month)
238	n = split(Month,parts," / ")
239	Month = ""
240	for (k = 1; k <= n; ++k)
241	Month = Month ((k > 1) ? " / " : "") \
242	((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
243	}
244
245
246	function do_new_issue()
247	{
248	Last_number = Number
249	if (HTML)
250	{
251	if (Last_volume != Volume)
252	{
253	Last_volume = Volume
254	print_line(prefix(2) "<BR>")
255	}
256	html_end_toc()
257	html_begin_issue()
258	print_line(prefix(2) Journal "<BR>")
259	}
260	else
261	{
262	print_line("")
263	print_line(Journal)
264	}
265
266	print_line(strip_html(vol_no_month_year()))
267
268	if (HTML)
269	{
270	html_end_issue()
271	html_toc_entry()
272	html_begin_toc()
273	}
274	else
275	print_line("")
276	}
277
278
279	function do_number()
280	{
281	Number = get_value($0)
282	}
283
284
285	function do_Other()
286	{
287	In_Article = 0
288	}
289
290
291	function do_pages()
292	{
293	Pages = get_value($0)
294	sub(/--[?][?]/,"",Pages)
295	}
296
297
298	function do_String()
299	{
300	sub(/^[^\{]*\{/,"",$0) # discard up to and including open brace
301	sub(/\} *$/,"",$0) # discard from optional whitespace and trailing brace to end of line
302	String[get_key($0)] = get_value($0)
303	}
304
305
306	function do_title()
307	{
308	Title = TeX_to_HTML(get_value($0))
309	}
310
311
312	function do_URL( parts)
313	{
314	Url = get_value($0)
315	split(Url,parts,"[,;]") # in case we have multiple URLs
316	Url = trim(parts[1])
317	}
318
319
320	function do_volume()
321	{
322	Volume = get_value($0)
323	}
324
325
326	function do_year()
327	{
328	Year = get_value($0)
329	}
330
331
332	function get_abbrev(s)
333	{ # return abbrev from ``key = abbrev,''
334	sub(/^[^=]= /,"",s) # discard text up to start of non-blank value
335	sub(/ ,? $/,"",s) # discard trailing optional whitspace, quote,
336	# optional comma, and optional space
337	return (s)
338	}
339
340
341	function get_key(s)
342	{ # return kay from ``key = "value",''
343	sub(/^ */,"",s) # discard leading space
344	sub(/ =.$/,"",s) # discard everthing after key
345
346	return (s)
347	}
348
349
350	function get_value(s)
351	{ # return value from ``key = "value",''
352	sub(/^[^\"]\" /,"",s) # discard text up to start of non-blank value
353	sub(/ \",? $/,"",s) # discard trailing optional whitspace, quote,
354	# optional comma, and optional space
355	return (s)
356	}
357
358
359	function html_accents(s)
360	{
361	if (index(s,"\\") > 0) # important optimization
362	{
363	# Convert common lower-case accented letters according to the
364	# table on p. 169 of in Peter Flynn's ``The World Wide Web
365	# Handbook'', International Thomson Computer Press, 1995, ISBN
366	# 1-85032-205-8. The official table of ISO Latin 1 SGML
367	# entities used in HTML can be found in the file
368	# /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
369	# may differ).
370
371	gsub(/{\\\a}/, "\\à", s)
372	gsub(/{\\'a}/, "\\á", s)
373	gsub(/{\\[\^]a}/,"\\â", s)
374	gsub(/{\\~a}/, "\\ã", s)
375	gsub(/{\\\"a}/, "\\ä", s)
376	gsub(/{\\aa}/, "\\å", s)
377	gsub(/{\\ae}/, "\\æ", s)
378
379	gsub(/{\\c{c}}/,"\\ç", s)
380
381	gsub(/{\\\e}/, "\\è", s)
382	gsub(/{\\'e}/, "\\é", s)
383	gsub(/{\\[\^]e}/,"\\ê", s)
384	gsub(/{\\\"e}/, "\\ë", s)
385
386	gsub(/{\\\i}/, "\\ì", s)
387	gsub(/{\\'i}/, "\\í", s)
388	gsub(/{\\[\^]i}/,"\\î", s)
389	gsub(/{\\\"i}/, "\\ï", s)
390
391	# ignore eth and thorn
392
393	gsub(/{\\~n}/, "\\ñ", s)
394
395	gsub(/{\\\o}/, "\\ò", s)
396	gsub(/{\\'o}/, "\\ó", s)
397	gsub(/{\\[\^]o}/, "\\ô", s)
398	gsub(/{\\~o}/, "\\õ", s)
399	gsub(/{\\\"o}/, "\\ö", s)
400	gsub(/{\\o}/, "\\ø", s)
401
402	gsub(/{\\\u}/, "\\ù", s)
403	gsub(/{\\'u}/, "\\ú", s)
404	gsub(/{\\[\^]u}/,"\\û", s)
405	gsub(/{\\\"u}/, "\\ü", s)
406
407	gsub(/{\\'y}/, "\\ý", s)
408	gsub(/{\\\"y}/, "\\ÿ", s)
409
410	# Now do the same for upper-case accents
411
412	gsub(/{\\\A}/, "\\À", s)
413	gsub(/{\\'A}/, "\\Á", s)
414	gsub(/{\\[\^]A}/, "\\Â", s)
415	gsub(/{\\~A}/, "\\Ã", s)
416	gsub(/{\\\"A}/, "\\Ä", s)
417	gsub(/{\\AA}/, "\\Å", s)
418	gsub(/{\\AE}/, "\\Æ", s)
419
420	gsub(/{\\c{C}}/,"\\Ç", s)
421
422	gsub(/{\\\e}/, "\\È", s)
423	gsub(/{\\'E}/, "\\É", s)
424	gsub(/{\\[\^]E}/, "\\Ê", s)
425	gsub(/{\\\"E}/, "\\Ë", s)
426
427	gsub(/{\\\I}/, "\\Ì", s)
428	gsub(/{\\'I}/, "\\Í", s)
429	gsub(/{\\[\^]I}/, "\\Î", s)
430	gsub(/{\\\"I}/, "\\Ï", s)
431
432	# ignore eth and thorn
433
434	gsub(/{\\~N}/, "\\Ñ", s)
435
436	gsub(/{\\\O}/, "\\Ò", s)
437	gsub(/{\\'O}/, "\\Ó", s)
438	gsub(/{\\[\^]O}/, "\\Ô", s)
439	gsub(/{\\~O}/, "\\Õ", s)
440	gsub(/{\\\"O}/, "\\Ö", s)
441	gsub(/{\\O}/, "\\Ø", s)
442
443	gsub(/{\\\U}/, "\\Ù", s)
444	gsub(/{\\'U}/, "\\Ú", s)
445	gsub(/{\\[\^]U}/, "\\Û", s)
446	gsub(/{\\\"U}/, "\\Ü", s)
447
448	gsub(/{\\'Y}/, "\\Ý", s)
449
450	gsub(/{\\ss}/, "\\ß", s)
451
452	# Others not mentioned in Flynn's book
453	gsub(/{\\'\\i}/,"\\í", s)
454	gsub(/{\\'\\j}/,"j", s)
455	}
456	return (s)
457	}
458
459
460	function html_begin_issue()
461	{
462	print_line("")
463	print_line(prefix(2) "<HR>")
464	print_line("")
465	print_line(prefix(2) "<H1>")
466	print_line(prefix(3) "<A NAME=\"" html_label() "\">")
467	}
468
469
470	function html_begin_pages()
471	{
472	return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
473	}
474
475
476	function html_begin_pre()
477	{
478	In_PRE = 1
479	print_line("<PRE>")
480	}
481
482
483	function html_begin_title()
484	{
485	return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
486	}
487
488
489	function html_begin_toc()
490	{
491	html_end_toc()
492	html_begin_pre()
493	}
494
495
496	function html_body( k)
497	{
498	for (k = 1; k <= BodyLines; ++k)
499	print Body[k]
500	}
501
502	function html_breakpoint(title,maxlength, break_after,k)
503	{
504	# Return the largest character position in title AFTER which we
505	# can break the title across lines, without exceeding maxlength
506	# visible characters.
507	if (html_length(title) > maxlength) # then need to split title across lines
508	{
509	# In the presence of HTML markup, the initialization of
510	# k here is complicated, because we need to advance it
511	# until html_length(title) is at least maxlength,
512	# without invoking the expensive html_length() function
513	# too frequently. The need to split the title makes the
514	# alternative of delayed insertion of HTML markup much
515	# more complicated.
516	break_after = 0
517	for (k = min(maxlength,length(title)); k < length(title); ++k)
518	{
519	if (substr(title,k+1,1) == " ")
520	{ # could break after position k
521	if (html_length(substr(title,1,k)) <= maxlength)
522	break_after = k
523	else # advanced too far, retreat back to last break_after
524	break
525	}
526	}
527	if (break_after == 0) # no breakpoint found by forward scan
528	{ # so switch to backward scan
529	for (k = min(maxlength,length(title)) - 1; \
530	(k > 0) && (substr(title,k+1,1) != " "); --k)
531	; # find space at which to break title
532	if (k < 1) # no break point found
533	k = length(title) # so must print entire string
534	}
535	else
536	k = break_after
537	}
538	else # title fits on one line
539	k = length(title)
540	return (k)
541	}
542
543
544
545	function html_end_issue()
546	{
547	print_line(prefix(3) "</A>")
548	print_line(prefix(2) "</H1>")
549	}
550
551
552	function html_end_pages()
553	{
554	return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
555	}
556
557
558	function html_end_pre()
559	{
560	if (In_PRE)
561	{
562	print_line("</PRE>")
563	In_PRE = 0
564	}
565	}
566
567
568	function html_end_title()
569	{
570	return ((HTML && (Url != "")) ? "</A>" : "")
571	}
572
573
574	function html_end_toc()
575	{
576	html_end_pre()
577	}
578
579
580	function html_fonts(s, arg,control_word,k,level,n,open_brace)
581	{
582	open_brace = index(s,"{")
583	if (open_brace > 0) # important optimization
584	{
585	level = 1
586	for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
587	{
588	if (substr(s,k,1) == "{")
589	level++
590	else if (substr(s,k,1) == "}")
591	level--
592	}
593
594	# {...} is now found at open_brace ... (k-1)
595	for (control_word in Font_decl_map) # look for {\xxx ...}
596	{
597	if (substr(s,open_brace+1,length(control_word)+1) ~ \
598	("\\" control_word "[^A-Za-z]"))
599	{
600	n = open_brace + 1 + length(control_word)
601	arg = trim(substr(s,n,k - n))
602	if (Font_decl_map[control_word] == "toupper") # arg -> ARG
603	arg = toupper(arg)
604	else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
605	arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
606	return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
607	}
608	}
609	for (control_word in Font_cmd_map) # look for \xxx{...}
610	{
611	if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
612	("\\" control_word))
613	{
614	n = open_brace + 1
615	arg = trim(substr(s,n,k - n))
616	if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
617	arg = toupper(arg)
618	else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
619	arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
620	n = open_brace - length(control_word) - 1
621	return (substr(s,1,n) arg html_fonts(substr(s,k)))
622	}
623	}
624	}
625	return (s)
626	}
627
628
629	function html_header()
630	{
631	USER = ENVIRON["USER"]
632	if (USER == "")
633	USER = ENVIRON["LOGNAME"]
634	if (USER == "")
635	USER = "????"
636	"hostname" \| getline HOSTNAME
637	"date" \| getline DATE
638	("ypcat passwd \| grep '^" USER ":' \| awk -F: '{print $5}'") \| getline PERSONAL_NAME
639	if (PERSONAL_NAME == "")
640	("grep '^" USER ":' /etc/passwd \| awk -F: '{print $5}'") \| getline PERSONAL_NAME
641
642
643	print "<!-- WARNING: Do NOT edit this file. It was converted from -->"
644	print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
645	print "<!-- on " DATE " -->"
646	print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
647	print ""
648	print ""
649	print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
650	print ""
651	print "<HTML>"
652	print prefix(1) "<HEAD>"
653	print prefix(2) "<TITLE>"
654	print prefix(3) Journal
655	print prefix(2) "</TITLE>"
656	print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
657	print prefix(1) "</HEAD>"
658	print ""
659	print prefix(1) "<BODY>"
660	}
661
662
663	function html_label( label)
664	{
665	label = Volume "(" Number "):" Month ":" Year
666	gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
667	return (label)
668	}
669
670
671	function html_length(s)
672	{ # Return visible length of s, ignoring any HTML markup
673	if (HTML)
674	{
675	gsub(/<\/?[^>]*>/,"",s) # remove SGML tags
676	gsub(/&[A-Za-z0-9]+;/,"",s) # remove SGML entities
677	}
678	return (length(s))
679	}
680
681
682	function html_toc()
683	{
684	print prefix(2) "<H1>"
685	print prefix(3) "Table of contents for issues of " Journal
686	print prefix(2) "</H1>"
687	print HTML_TOC
688	}
689
690
691	function html_toc_entry()
692	{
693	HTML_TOC = HTML_TOC " <A HREF=\"#" html_label() "\">"
694	HTML_TOC = HTML_TOC vol_no_month_year()
695	HTML_TOC = HTML_TOC "</A><BR>" "\n"
696	}
697
698
699	function html_trailer()
700	{
701	html_end_pre()
702	print prefix(1) "</BODY>"
703	print "</HTML>"
704	}
705
706
707	function initialize()
708	{
709	# NB: Update these when the program changes
710	VERSION_DATE = "[09-Oct-1996]"
711	VERSION_NUMBER = "1.00"
712
713	HTML = (HTML == "") ? 0 : (0 + HTML)
714
715	if (INDENT == "")
716	INDENT = 4
717
718	if (HTML == 0)
719	INDENT = 0 # indentation suppressed in ASCII mode
720
721	LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
722
723	MAX_TITLE_CHARS = 36 # 36 produces a 79-char output line when there is
724	# just an initial page number. If this is
725	# increased, the LEADERS string may need to be
726	# lengthened.
727
728	MIN_LEADERS = 4 # Minimum number of characters from LEADERS
729	# required when leaders are used. The total
730	# number of characters that can appear in a
731	# title line is MAX_TITLE_CHARS + MIN_LEADERS.
732	# Leaders are omitted when the title length is
733	# between MAX_TITLE_CHARS and this sum.
734
735	MIN_LEADERS_SPACE = " " # must be at least MIN_LEADERS characters long
736
737	Month_expansion["jan"] = "January"
738	Month_expansion["feb"] = "February"
739	Month_expansion["mar"] = "March"
740	Month_expansion["apr"] = "April"
741	Month_expansion["may"] = "May"
742	Month_expansion["jun"] = "June"
743	Month_expansion["jul"] = "July"
744	Month_expansion["aug"] = "August"
745	Month_expansion["sep"] = "September"
746	Month_expansion["oct"] = "October"
747	Month_expansion["nov"] = "November"
748	Month_expansion["dec"] = "December"
749
750	Font_cmd_map["\\emph"] = "EM"
751	Font_cmd_map["\\textbf"] = "B"
752	Font_cmd_map["\\textit"] = "I"
753	Font_cmd_map["\\textmd"] = ""
754	Font_cmd_map["\\textrm"] = ""
755	Font_cmd_map["\\textsc"] = "toupper"
756	Font_cmd_map["\\textsl"] = "I"
757	Font_cmd_map["\\texttt"] = "t"
758	Font_cmd_map["\\textup"] = ""
759
760	Font_decl_map["\\bf"] = "B"
761	Font_decl_map["\\em"] = "EM"
762	Font_decl_map["\\it"] = "I"
763	Font_decl_map["\\rm"] = ""
764	Font_decl_map["\\sc"] = "toupper"
765	Font_decl_map["\\sf"] = ""
766	Font_decl_map["\\tt"] = "TT"
767	Font_decl_map["\\itshape"] = "I"
768	Font_decl_map["\\upshape"] = ""
769	Font_decl_map["\\slshape"] = "I"
770	Font_decl_map["\\scshape"] = "toupper"
771	Font_decl_map["\\mdseries"] = ""
772	Font_decl_map["\\bfseries"] = "B"
773	Font_decl_map["\\rmfamily"] = ""
774	Font_decl_map["\\sffamily"] = ""
775	Font_decl_map["\\ttfamily"] = "TT"
776	}
777
778	function min(a,b)
779	{
780	return (a < b) ? a : b
781	}
782
783
784	function prefix(level)
785	{
786	# Return a prefix of up to 60 blanks
787
788	if (In_PRE)
789	return ("")
790	else
791	return (substr(" ", \
792	1, INDENT * level))
793	}
794
795
796	function print_line(line)
797	{
798	if (HTML) # must buffer in memory so that we can accumulate TOC
799	Body[++BodyLines] = line
800	else
801	print line
802	}
803
804
805	function print_toc_line(author,title,pages, extra,leaders,n,t)
806	{
807	# When we have a multiline title, the hypertext link goes only
808	# on the first line. A multiline hypertext link looks awful
809	# because of long underlines under the leading indentation.
810
811	if (pages == "") # then no leaders needed in title lines other than last one
812	t = sprintf("%31s %s%s%s", author, Title_prefix, title, Title_suffix)
813	else # last title line, with page number
814	{
815	n = html_length(title) # potentially expensive
816	extra = n % 2 # extra space for aligned leader dots
817	if (n <= MAX_TITLE_CHARS) # then need leaders
818	leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
819	min(MAX_TITLE_CHARS,n))
820	else # title (almost) fills line, so no leaders
821	leaders = substr(MIN_LEADERS_SPACE,1, \
822	(MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
823	t = sprintf("%31s %s%s%s%s%s %4s", \
824	author, Title_prefix, title, Title_suffix, \
825	(extra ? " " : ""), leaders, pages)
826	}
827
828	Title_prefix = "" # forget any hypertext
829	Title_suffix = "" # link material
830
831	# Efficency note: an earlier version accumulated the body in a
832	# single scalar like this: "Body = Body t". Profiling revealed
833	# this statement as the major hot spot, and the change to array
834	# storage made the program more than twice as fast. This
835	# suggests that awk might benefit from an optimization of
836	# "s = s t" that uses realloc() instead of malloc().
837	if (HTML)
838	Body[++BodyLines] = t
839	else
840	print t
841	}
842
843
844	function protect_SGML_characters(s)
845	{
846	gsub(/&/,"\\&",s) # NB: this one MUST be first
847	gsub(/</,"\\<",s)
848	gsub(/>/,"\\>",s)
849	gsub(/\"/,"\\"",s)
850	return (s)
851	}
852
853
854	function strip_braces(s, k)
855	{ # strip non-backslashed braces from s and return the result
856
857	return (strip_char(strip_char(s,"{"),"}"))
858	}
859
860
861	function strip_char(s,c, k)
862	{ # strip non-backslashed instances of c from s, and return the result
863	k = index(s,c)
864	if (k > 0) # then found the character
865	{
866	if (substr(s,k-1,1) != "\\") # then not backslashed char
867	s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
868	else # preserve backslashed char
869	s = substr(s,1,k) strip_char(s,k+1,c)
870	}
871	return (s)
872	}
873
874
875	function strip_html(s)
876	{
877	gsub(/<\/?[^>]*>/,"",s)
878	return (s)
879	}
880
881
882	function terminate()
883	{
884	if (HTML)
885	{
886	html_end_pre()
887
888	HTML = 0 # NB: stop line buffering
889	html_header()
890	html_toc()
891	html_body()
892	html_trailer()
893	}
894	}
895
896
897	function TeX_to_HTML(s, k,n,parts)
898	{
899	# First convert the four SGML reserved characters to SGML entities
900	if (HTML)
901	{
902	gsub(/>/, "\\>", s)
903	gsub(/</, "\\<", s)
904	gsub(/"/, "\\"", s)
905	}
906
907	gsub(/[$][$]/,"$$",s) # change display math to triple dollars for split
908	n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
909
910	s = ""
911	for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
912	s = s ((k > 1) ? "$" : "") \
913	((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
914	TeX_to_HTML_math(parts[k]))
915
916	gsub(/[$][$][$]/,"$$",s) # restore display math
917
918	return (s)
919	}
920
921
922	function TeX_to_HTML_math(s)
923	{
924	# Mostly a dummy for now, but HTML 3 could support some math translation
925
926	gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities
927
928	return (s)
929	}
930
931
932	function TeX_to_HTML_nonmath(s)
933	{
934	if (index(s,"\\") > 0) # important optimization
935	{
936	gsub(/\\slash +/,"/",s) # replace TeX slashes with conventional ones
937	gsub(/ *\\emdash +/," --- ",s) # replace BibNet emdashes with conventional ones
938	gsub(/\\%/,"%",s) # reduce TeX percents to conventional ones
939	gsub(/\\[$]/,"$",s) # reduce TeX dollars to conventional ones
940	gsub(/\\#/,"#",s) # reduce TeX sharps to conventional ones
941
942	if (HTML) # translate TeX markup to HTML
943	{
944	gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities
945	s = html_accents(s)
946	s = html_fonts(s)
947	}
948	else # plain ASCII text output: discard all TeX markup
949	{
950	gsub(/\\\&/, "\\&", s) # reduce TeX ampersands to conventional ones
951
952	gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
953	gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
954	}
955	}
956	return (s)
957	}
958
959
960	function trim(s)
961	{
962	gsub(/^[ \t]+/,"",s)
963	gsub(/[ \t]+$/,"",s)
964	return (s)
965	}
966
967
968	function vol_no_month_year()
969	{
970	return ("Volume " wrap(Volume) ", Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
971	}
972
973
974	function wrap(value)
975	{
976	return (HTML ? ("<STRONG>" value "</STRONG>") : value)
977	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/essentials/sys-apps/gawk/test/funstack.awk

Download in other formats: