source: branches/samba-3.0/source/utils/debugparse.c@ 1071

Last change on this file since 1071 was 1, checked in by Paul Smedley, 19 years ago

Initial code import

File size: 10.2 KB
Line 
1/* ========================================================================== **
2 * debugparse.c
3 *
4 * Copyright (C) 1998 by Christopher R. Hertel
5 *
6 * Email: crh@ubiqx.mn.org
7 *
8 * -------------------------------------------------------------------------- **
9 * This module is a very simple parser for Samba debug log files.
10 * -------------------------------------------------------------------------- **
11 *
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Library General Public
14 * License as published by the Free Software Foundation; either
15 * version 2 of the License, or (at your option) any later version.
16 *
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 * -------------------------------------------------------------------------- **
27 * The important function in this module is dbg_char2token(). The rest is
28 * basically fluff. (Potentially useful fluff, but still fluff.)
29 * ========================================================================== **
30 */
31
32#include "debugparse.h"
33
34/* -------------------------------------------------------------------------- **
35 * Constants...
36 *
37 * DBG_BSIZE - This internal constant is used only by dbg_test(). It is the
38 * size of the read buffer. I've tested the function using a
39 * DBG_BSIZE value of 2.
40 */
41
42#define DBG_BSIZE 128
43
44/* -------------------------------------------------------------------------- **
45 * Functions...
46 */
47
48const char *dbg_token2string( dbg_Token tok )
49 /* ------------------------------------------------------------------------ **
50 * Given a token, return a string describing the token.
51 *
52 * Input: tok - One of the set of dbg_Tokens defined in debugparse.h.
53 *
54 * Output: A string identifying the token. This is useful for debugging,
55 * etc.
56 *
57 * Note: If the token is not known, this function will return the
58 * string "<unknown>".
59 *
60 * ------------------------------------------------------------------------ **
61 */
62 {
63 switch( tok )
64 {
65 case dbg_null:
66 return( "null" );
67 case dbg_ignore:
68 return( "ignore" );
69 case dbg_header:
70 return( "header" );
71 case dbg_timestamp:
72 return( "time stamp" );
73 case dbg_level:
74 return( "level" );
75 case dbg_sourcefile:
76 return( "source file" );
77 case dbg_function:
78 return( "function" );
79 case dbg_lineno:
80 return( "line number" );
81 case dbg_message:
82 return( "message" );
83 case dbg_eof:
84 return( "[EOF]" );
85 }
86 return( "<unknown>" );
87 } /* dbg_token2string */
88
89dbg_Token dbg_char2token( dbg_Token *state, int c )
90 /* ------------------------------------------------------------------------ **
91 * Parse input one character at a time.
92 *
93 * Input: state - A pointer to a token variable. This is used to
94 * maintain the parser state between calls. For
95 * each input stream, you should set up a separate
96 * state variable and initialize it to dbg_null.
97 * Pass a pointer to it into this function with each
98 * character in the input stream. See dbg_test()
99 * for an example.
100 * c - The "current" character in the input stream.
101 *
102 * Output: A token.
103 * The token value will change when delimiters are found,
104 * which indicate a transition between syntactical objects.
105 * Possible return values are:
106 *
107 * dbg_null - The input character was an end-of-line.
108 * This resets the parser to its initial state
109 * in preparation for parsing the next line.
110 * dbg_eof - Same as dbg_null, except that the character
111 * was an end-of-file.
112 * dbg_ignore - Returned for whitespace and delimiters.
113 * These lexical tokens are only of interest
114 * to the parser.
115 * dbg_header - Indicates the start of a header line. The
116 * input character was '[' and was the first on
117 * the line.
118 * dbg_timestamp - Indicates that the input character was part
119 * of a header timestamp.
120 * dbg_level - Indicates that the input character was part
121 * of the debug-level value in the header.
122 * dbg_sourcefile - Indicates that the input character was part
123 * of the sourcefile name in the header.
124 * dbg_function - Indicates that the input character was part
125 * of the function name in the header.
126 * dbg_lineno - Indicates that the input character was part
127 * of the DEBUG call line number in the header.
128 * dbg_message - Indicates that the input character was part
129 * of the DEBUG message text.
130 *
131 * ------------------------------------------------------------------------ **
132 */
133 {
134 /* The terminating characters that we see will greatly depend upon
135 * how they are read. For example, if gets() is used instead of
136 * fgets(), then we will not see newline characters. A lot also
137 * depends on the calling function, which may handle terminators
138 * itself.
139 *
140 * '\n', '\0', and EOF are all considered line terminators. The
141 * dbg_eof token is sent back if an EOF is encountered.
142 *
143 * Warning: only allow the '\0' character to be sent if you are
144 * using gets() to read whole lines (thus replacing '\n'
145 * with '\0'). Sending '\0' at the wrong time will mess
146 * up the parsing.
147 */
148 switch( c )
149 {
150 case EOF:
151 *state = dbg_null; /* Set state to null (initial state) so */
152 return( dbg_eof ); /* that we can restart with new input. */
153 case '\n':
154 case '\0':
155 *state = dbg_null; /* A newline or eoln resets to the null state. */
156 return( dbg_null );
157 }
158
159 /* When within the body of the message, only a line terminator
160 * can cause a change of state. We've already checked for line
161 * terminators, so if the current state is dbg_msgtxt, simply
162 * return that as our current token.
163 */
164 if( dbg_message == *state )
165 return( dbg_message );
166
167 /* If we are at the start of a new line, and the input character
168 * is an opening bracket, then the line is a header line, otherwise
169 * it's a message body line.
170 */
171 if( dbg_null == *state )
172 {
173 if( '[' == c )
174 {
175 *state = dbg_timestamp;
176 return( dbg_header );
177 }
178 *state = dbg_message;
179 return( dbg_message );
180 }
181
182 /* We've taken care of terminators, text blocks and new lines.
183 * The remaining possibilities are all within the header line
184 * itself.
185 */
186
187 /* Within the header line, whitespace can be ignored *except*
188 * within the timestamp.
189 */
190 if( isspace( c ) )
191 {
192 /* Fudge. The timestamp may contain space characters. */
193 if( (' ' == c) && (dbg_timestamp == *state) )
194 return( dbg_timestamp );
195 /* Otherwise, ignore whitespace. */
196 return( dbg_ignore );
197 }
198
199 /* Okay, at this point we know we're somewhere in the header.
200 * Valid header *states* are: dbg_timestamp, dbg_level,
201 * dbg_sourcefile, dbg_function, and dbg_lineno.
202 */
203 switch( c )
204 {
205 case ',':
206 if( dbg_timestamp == *state )
207 {
208 *state = dbg_level;
209 return( dbg_ignore );
210 }
211 break;
212 case ']':
213 if( dbg_level == *state )
214 {
215 *state = dbg_sourcefile;
216 return( dbg_ignore );
217 }
218 break;
219 case ':':
220 if( dbg_sourcefile == *state )
221 {
222 *state = dbg_function;
223 return( dbg_ignore );
224 }
225 break;
226 case '(':
227 if( dbg_function == *state )
228 {
229 *state = dbg_lineno;
230 return( dbg_ignore );
231 }
232 break;
233 case ')':
234 if( dbg_lineno == *state )
235 {
236 *state = dbg_null;
237 return( dbg_ignore );
238 }
239 break;
240 }
241
242 /* If the previous block did not result in a state change, then
243 * return the current state as the current token.
244 */
245 return( *state );
246 } /* dbg_char2token */
247
248void dbg_test( void );
249void dbg_test( void )
250 /* ------------------------------------------------------------------------ **
251 * Simple test function.
252 *
253 * Input: none.
254 * Output: none.
255 * Notes: This function was used to test dbg_char2token(). It reads a
256 * Samba log file from stdin and prints parsing info to stdout.
257 * It also serves as a simple example.
258 *
259 * ------------------------------------------------------------------------ **
260 */
261 {
262 char bufr[DBG_BSIZE];
263 int i;
264 int linecount = 1;
265 dbg_Token old = dbg_null,
266 newtok= dbg_null,
267 state = dbg_null;
268
269 while( fgets( bufr, DBG_BSIZE, stdin ) )
270 {
271 for( i = 0; bufr[i]; i++ )
272 {
273 old = newtok;
274 newtok = dbg_char2token( &state, bufr[i] );
275 switch( newtok )
276 {
277 case dbg_header:
278 if( linecount > 1 )
279 (void)putchar( '\n' );
280 break;
281 case dbg_null:
282 linecount++;
283 break;
284 case dbg_ignore:
285 break;
286 default:
287 if( old != newtok )
288 (void)printf( "\n[%05d]%12s: ", linecount, dbg_token2string(newtok) );
289 (void)putchar( bufr[i] );
290 }
291 }
292 }
293 (void)putchar( '\n' );
294 } /* dbg_test */
295
296
297/* -------------------------------------------------------------------------- **
298 * This simple main line can be uncommented and used to test the parser.
299 */
300
301/*
302 * int main( void )
303 * {
304 * dbg_test();
305 * return( 0 );
306 * }
307 */
308
309/* ========================================================================== */
Note: See TracBrowser for help on using the repository browser.