source: trunk/src/riched32/textwriter.c@ 8659

Last change on this file since 8659 was 6588, checked in by sandervl, 24 years ago

updates for custombuild

File size: 5.5 KB
Line 
1/* $Id: textwriter.c,v 1.1 2001-08-25 13:37:13 sandervl Exp $ */
2/*
3 * text-writer -- RTF-to-text translation writer code.
4 *
5 * Read RTF input, write text of document (text extraction).
6 *
7 * Wrapper must call WriterInit() once before processing any files,
8 * then set up input and call BeginFile() for each input file.
9 *
10 * This installs callbacks for the text and control token classes.
11 * The control class is necessary so that special characters such as
12 * \par, \tab, \sect, etc. can be converted.
13 *
14 * It's problematic what to do with text in headers and footers, and
15 * what to do about tables.
16 *
17 * This really is quite a stupid program, for instance, it could keep
18 * track of the current leader character and dump that out when a tab
19 * is encountered.
20 *
21 * 04 Feb 91 Paul DuBois dubois@primate.wisc.edu
22 *
23 * This software may be redistributed without restriction and used for
24 * any purpose whatsoever.
25 *
26 * 04 Feb 91
27 * -Created.
28 * 27 Feb 91
29 * - Updated for distribution 1.05.
30 * 13 Jul 93
31 * - Updated to compile under THINK C 6.0.
32 * 31 Aug 93
33 * - Added Mike Sendall's entries for Macintosh char map.
34 * 07 Sep 93
35 * - Uses charset map and output sequence map for character translation.
36 * 11 Mar 94
37 * - Updated for 1.10 distribution.
38 */
39
40# include <stdio.h>
41
42# include "rtf.h"
43# include "rtf2text.h"
44# include "charlist.h"
45
46static void TextClass ();
47static void ControlClass ();
48static void Destination ();
49static void SpecialChar ();
50static void PutStdChar ();
51static void PutLitChar ();
52static void PutLitStr ();
53
54static char *outMap[rtfSC_MaxChar];
55
56static CHARLIST charlist = {0, NULL, NULL};
57
58int RTFToBuffer(char* pBuffer, int nBufferSize);
59int RTFToBuffer(char* pBuffer, int nBufferSize)
60{
61
62 /* check if the buffer is big enough to hold all characters */
63 /* we require one more for the '\0' */
64
65
66 if(nBufferSize < charlist.nCount + 1) {
67 return charlist.nCount + CHARLIST_CountChar(&charlist, '\n') + 1;
68 }
69
70 while(charlist.nCount)
71 {
72 *pBuffer = CHARLIST_Dequeue(&charlist);
73 if(*pBuffer=='\n')
74 {
75 *pBuffer = '\r';
76 pBuffer++;
77 *pBuffer = '\n';
78 }
79 pBuffer++;
80 }
81 *pBuffer = '\0';
82
83 return 0;
84}
85
86
87/*
88 * Initialize the writer.
89 */
90
91void
92WriterInit ()
93{
94 RTFReadOutputMap (outMap,1);
95}
96
97
98int
99BeginFile ()
100{
101 /* install class callbacks */
102
103 RTFSetClassCallback (rtfText, TextClass);
104 RTFSetClassCallback (rtfControl, ControlClass);
105
106 return (1);
107}
108
109
110/*
111 * Write out a character. rtfMajor contains the input character, rtfMinor
112 * contains the corresponding standard character code.
113 *
114 * If the input character isn't in the charset map, try to print some
115 * representation of it.
116 */
117
118static void
119TextClass ()
120{
121char buf[rtfBufSiz];
122
123 if (rtfMinor != rtfSC_nothing)
124 PutStdChar (rtfMinor);
125 else
126 {
127 if (rtfMajor < 128) /* in ASCII range */
128 sprintf (buf, "[[%c]]", rtfMajor);
129 else
130 sprintf (buf, "[[\\'%02x]]", rtfMajor);
131 PutLitStr (buf);
132 }
133}
134
135
136static void
137ControlClass ()
138{
139 switch (rtfMajor)
140 {
141 case rtfDestination:
142 Destination ();
143 break;
144 case rtfSpecialChar:
145 SpecialChar ();
146 break;
147 }
148}
149
150
151/*
152 * This function notices destinations that should be ignored
153 * and skips to their ends. This keeps, for instance, picture
154 * data from being considered as plain text.
155 */
156
157static void
158Destination ()
159{
160 switch (rtfMinor)
161 {
162 case rtfPict:
163 case rtfFNContSep:
164 case rtfFNContNotice:
165 case rtfInfo:
166 case rtfIndexRange:
167 case rtfITitle:
168 case rtfISubject:
169 case rtfIAuthor:
170 case rtfIOperator:
171 case rtfIKeywords:
172 case rtfIComment:
173 case rtfIVersion:
174 case rtfIDoccomm:
175 RTFSkipGroup ();
176 break;
177 }
178}
179
180
181/*
182 * The reason these use the rtfSC_xxx thingies instead of just writing
183 * out ' ', '-', '"', etc., is so that the mapping for these characters
184 * can be controlled by the text-map file.
185 */
186
187void SpecialChar ()
188{
189 switch (rtfMinor)
190 {
191 case rtfPage:
192 case rtfSect:
193 case rtfRow:
194 case rtfLine:
195 case rtfPar:
196 PutLitChar ('\n');
197 break;
198 case rtfCell:
199 PutStdChar (rtfSC_space); /* make sure cells are separated */
200 break;
201 case rtfNoBrkSpace:
202 PutStdChar (rtfSC_nobrkspace);
203 break;
204 case rtfTab:
205 PutLitChar ('\t');
206 break;
207 case rtfNoBrkHyphen:
208 PutStdChar (rtfSC_nobrkhyphen);
209 break;
210 case rtfBullet:
211 PutStdChar (rtfSC_bullet);
212 break;
213 case rtfEmDash:
214 PutStdChar (rtfSC_emdash);
215 break;
216 case rtfEnDash:
217 PutStdChar (rtfSC_endash);
218 break;
219 case rtfLQuote:
220 PutStdChar (rtfSC_quoteleft);
221 break;
222 case rtfRQuote:
223 PutStdChar (rtfSC_quoteright);
224 break;
225 case rtfLDblQuote:
226 PutStdChar (rtfSC_quotedblleft);
227 break;
228 case rtfRDblQuote:
229 PutStdChar (rtfSC_quotedblright);
230 break;
231 }
232}
233
234
235/*
236 * Eventually this should keep track of the destination of the
237 * current state and only write text when in the initial state.
238 *
239 * If the output sequence is unspecified in the output map, write
240 * the character's standard name instead. This makes map deficiencies
241 * obvious and provides incentive to fix it. :-)
242 */
243
244void PutStdChar (int stdCode)
245{
246
247 char *oStr = (char *) NULL;
248 char buf[rtfBufSiz];
249
250/* if (stdCode == rtfSC_nothing)
251 RTFPanic ("Unknown character code, logic error\n");
252*/
253 oStr = outMap[stdCode];
254 if (oStr == (char *) NULL) /* no output sequence in map */
255 {
256 sprintf (buf, "[[%s]]", RTFStdCharName (stdCode));
257 oStr = buf;
258 }
259 PutLitStr (oStr);
260}
261
262
263void PutLitChar (int c)
264{
265 CHARLIST_Enqueue(&charlist, (char) c);
266 /* fputc (c, ostream); */
267}
268
269
270static void PutLitStr (char *s)
271{
272 for(;*s;s++)
273 {
274 CHARLIST_Enqueue(&charlist, *s);
275 }
276 /* fputs (s, ostream); */
277}
Note: See TracBrowser for help on using the repository browser.