source: trunk/src/riched32/text-writer.c@ 3515

Last change on this file since 3515 was 3515, checked in by sandervl, 25 years ago

created; wine port

File size: 5.4 KB
Line 
1/*
2 * text-writer -- RTF-to-text translation writer code.
3 *
4 * Read RTF input, write text of document (text extraction).
5 *
6 * Wrapper must call WriterInit() once before processing any files,
7 * then set up input and call BeginFile() for each input file.
8 *
9 * This installs callbacks for the text and control token classes.
10 * The control class is necessary so that special characters such as
11 * \par, \tab, \sect, etc. can be converted.
12 *
13 * It's problematic what to do with text in headers and footers, and
14 * what to do about tables.
15 *
16 * This really is quite a stupid program, for instance, it could keep
17 * track of the current leader character and dump that out when a tab
18 * is encountered.
19 *
20 * 04 Feb 91 Paul DuBois dubois@primate.wisc.edu
21 *
22 * This software may be redistributed without restriction and used for
23 * any purpose whatsoever.
24 *
25 * 04 Feb 91
26 * -Created.
27 * 27 Feb 91
28 * - Updated for distribution 1.05.
29 * 13 Jul 93
30 * - Updated to compile under THINK C 6.0.
31 * 31 Aug 93
32 * - Added Mike Sendall's entries for Macintosh char map.
33 * 07 Sep 93
34 * - Uses charset map and output sequence map for character translation.
35 * 11 Mar 94
36 * - Updated for 1.10 distribution.
37 */
38
39# include <stdio.h>
40
41# include "rtf.h"
42# include "rtf2text.h"
43# include "charlist.h"
44
45static void TextClass ();
46static void ControlClass ();
47static void Destination ();
48static void SpecialChar ();
49static void PutStdChar ();
50static void PutLitChar ();
51static void PutLitStr ();
52
53static char *outMap[rtfSC_MaxChar];
54
55static CHARLIST charlist = {0, NULL, NULL};
56
57int RTFToBuffer(char* pBuffer, int nBufferSize);
58int RTFToBuffer(char* pBuffer, int nBufferSize)
59{
60
61 /* check if the buffer is big enough to hold all characters */
62 /* we require one more for the '\0' */
63
64
65 if(nBufferSize < charlist.nCount + 1) {
66 return charlist.nCount + CHARLIST_CountChar(&charlist, '\n') + 1;
67 }
68
69 while(charlist.nCount)
70 {
71 *pBuffer = CHARLIST_Dequeue(&charlist);
72 if(*pBuffer=='\n')
73 {
74 *pBuffer = '\r';
75 pBuffer++;
76 *pBuffer = '\n';
77 }
78 pBuffer++;
79 }
80 *pBuffer = '\0';
81
82 return 0;
83}
84
85
86/*
87 * Initialize the writer.
88 */
89
90void
91WriterInit ()
92{
93 RTFReadOutputMap (outMap,1);
94}
95
96
97int
98BeginFile ()
99{
100 /* install class callbacks */
101
102 RTFSetClassCallback (rtfText, TextClass);
103 RTFSetClassCallback (rtfControl, ControlClass);
104
105 return (1);
106}
107
108
109/*
110 * Write out a character. rtfMajor contains the input character, rtfMinor
111 * contains the corresponding standard character code.
112 *
113 * If the input character isn't in the charset map, try to print some
114 * representation of it.
115 */
116
117static void
118TextClass ()
119{
120char buf[rtfBufSiz];
121
122 if (rtfMinor != rtfSC_nothing)
123 PutStdChar (rtfMinor);
124 else
125 {
126 if (rtfMajor < 128) /* in ASCII range */
127 sprintf (buf, "[[%c]]", rtfMajor);
128 else
129 sprintf (buf, "[[\\'%02x]]", rtfMajor);
130 PutLitStr (buf);
131 }
132}
133
134
135static void
136ControlClass ()
137{
138 switch (rtfMajor)
139 {
140 case rtfDestination:
141 Destination ();
142 break;
143 case rtfSpecialChar:
144 SpecialChar ();
145 break;
146 }
147}
148
149
150/*
151 * This function notices destinations that should be ignored
152 * and skips to their ends. This keeps, for instance, picture
153 * data from being considered as plain text.
154 */
155
156static void
157Destination ()
158{
159 switch (rtfMinor)
160 {
161 case rtfPict:
162 case rtfFNContSep:
163 case rtfFNContNotice:
164 case rtfInfo:
165 case rtfIndexRange:
166 case rtfITitle:
167 case rtfISubject:
168 case rtfIAuthor:
169 case rtfIOperator:
170 case rtfIKeywords:
171 case rtfIComment:
172 case rtfIVersion:
173 case rtfIDoccomm:
174 RTFSkipGroup ();
175 break;
176 }
177}
178
179
180/*
181 * The reason these use the rtfSC_xxx thingies instead of just writing
182 * out ' ', '-', '"', etc., is so that the mapping for these characters
183 * can be controlled by the text-map file.
184 */
185
186void SpecialChar ()
187{
188 switch (rtfMinor)
189 {
190 case rtfPage:
191 case rtfSect:
192 case rtfRow:
193 case rtfLine:
194 case rtfPar:
195 PutLitChar ('\n');
196 break;
197 case rtfCell:
198 PutStdChar (rtfSC_space); /* make sure cells are separated */
199 break;
200 case rtfNoBrkSpace:
201 PutStdChar (rtfSC_nobrkspace);
202 break;
203 case rtfTab:
204 PutLitChar ('\t');
205 break;
206 case rtfNoBrkHyphen:
207 PutStdChar (rtfSC_nobrkhyphen);
208 break;
209 case rtfBullet:
210 PutStdChar (rtfSC_bullet);
211 break;
212 case rtfEmDash:
213 PutStdChar (rtfSC_emdash);
214 break;
215 case rtfEnDash:
216 PutStdChar (rtfSC_endash);
217 break;
218 case rtfLQuote:
219 PutStdChar (rtfSC_quoteleft);
220 break;
221 case rtfRQuote:
222 PutStdChar (rtfSC_quoteright);
223 break;
224 case rtfLDblQuote:
225 PutStdChar (rtfSC_quotedblleft);
226 break;
227 case rtfRDblQuote:
228 PutStdChar (rtfSC_quotedblright);
229 break;
230 }
231}
232
233
234/*
235 * Eventually this should keep track of the destination of the
236 * current state and only write text when in the initial state.
237 *
238 * If the output sequence is unspecified in the output map, write
239 * the character's standard name instead. This makes map deficiencies
240 * obvious and provides incentive to fix it. :-)
241 */
242
243void PutStdChar (int stdCode)
244{
245
246 char *oStr = (char *) NULL;
247 char buf[rtfBufSiz];
248
249/* if (stdCode == rtfSC_nothing)
250 RTFPanic ("Unknown character code, logic error\n");
251*/
252 oStr = outMap[stdCode];
253 if (oStr == (char *) NULL) /* no output sequence in map */
254 {
255 sprintf (buf, "[[%s]]", RTFStdCharName (stdCode));
256 oStr = buf;
257 }
258 PutLitStr (oStr);
259}
260
261
262void PutLitChar (int c)
263{
264 CHARLIST_Enqueue(&charlist, (char) c);
265 /* fputc (c, ostream); */
266}
267
268
269static void PutLitStr (char *s)
270{
271 for(;*s;s++)
272 {
273 CHARLIST_Enqueue(&charlist, *s);
274 }
275 /* fputs (s, ostream); */
276}
Note: See TracBrowser for help on using the repository browser.