| 1 | /* ========================================================================== ** | 
|---|
| 2 | *                                debugparse.c | 
|---|
| 3 | * | 
|---|
| 4 | * Copyright (C) 1998 by Christopher R. Hertel | 
|---|
| 5 | * | 
|---|
| 6 | * Email: crh@ubiqx.mn.org | 
|---|
| 7 | * | 
|---|
| 8 | * -------------------------------------------------------------------------- ** | 
|---|
| 9 | * This module is a very simple parser for Samba debug log files. | 
|---|
| 10 | * -------------------------------------------------------------------------- ** | 
|---|
| 11 | * | 
|---|
| 12 | *  This library is free software; you can redistribute it and/or | 
|---|
| 13 | *  modify it under the terms of the GNU Lesser General Public | 
|---|
| 14 | *  License as published by the Free Software Foundation; either | 
|---|
| 15 | *  version 3 of the License, or (at your option) any later version. | 
|---|
| 16 | * | 
|---|
| 17 | *  This library is distributed in the hope that it will be useful, | 
|---|
| 18 | *  but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 19 | *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 20 | *  Library General Public License for more details. | 
|---|
| 21 | * | 
|---|
| 22 | *  You should have received a copy of the GNU Lesser General Public | 
|---|
| 23 | *  License along with this library; if not, see <http://www.gnu.org/licenses/>. | 
|---|
| 24 | * | 
|---|
| 25 | * -------------------------------------------------------------------------- ** | 
|---|
| 26 | * The important function in this module is dbg_char2token().  The rest is | 
|---|
| 27 | * basically fluff.  (Potentially useful fluff, but still fluff.) | 
|---|
| 28 | * ========================================================================== ** | 
|---|
| 29 | */ | 
|---|
| 30 |  | 
|---|
| 31 | #include "debugparse.h" | 
|---|
| 32 |  | 
|---|
| 33 | /* -------------------------------------------------------------------------- ** | 
|---|
| 34 | * Constants... | 
|---|
| 35 | * | 
|---|
| 36 | *  DBG_BSIZE - This internal constant is used only by dbg_test().  It is the | 
|---|
| 37 | *          size of the read buffer.  I've tested the function using a | 
|---|
| 38 | *          DBG_BSIZE value of 2. | 
|---|
| 39 | */ | 
|---|
| 40 |  | 
|---|
| 41 | #define DBG_BSIZE 128 | 
|---|
| 42 |  | 
|---|
| 43 | /* -------------------------------------------------------------------------- ** | 
|---|
| 44 | * Functions... | 
|---|
| 45 | */ | 
|---|
| 46 |  | 
|---|
| 47 | const char *dbg_token2string( dbg_Token tok ) | 
|---|
| 48 | /* ------------------------------------------------------------------------ ** | 
|---|
| 49 | * Given a token, return a string describing the token. | 
|---|
| 50 | * | 
|---|
| 51 | *  Input:  tok - One of the set of dbg_Tokens defined in debugparse.h. | 
|---|
| 52 | * | 
|---|
| 53 | *  Output: A string identifying the token.  This is useful for debugging, | 
|---|
| 54 | *          etc. | 
|---|
| 55 | * | 
|---|
| 56 | *  Note:   If the token is not known, this function will return the | 
|---|
| 57 | *          string "<unknown>". | 
|---|
| 58 | * | 
|---|
| 59 | * ------------------------------------------------------------------------ ** | 
|---|
| 60 | */ | 
|---|
| 61 | { | 
|---|
| 62 | switch( tok ) | 
|---|
| 63 | { | 
|---|
| 64 | case dbg_null: | 
|---|
| 65 | return( "null" ); | 
|---|
| 66 | case dbg_ignore: | 
|---|
| 67 | return( "ignore" ); | 
|---|
| 68 | case dbg_header: | 
|---|
| 69 | return( "header" ); | 
|---|
| 70 | case dbg_timestamp: | 
|---|
| 71 | return( "time stamp" ); | 
|---|
| 72 | case dbg_level: | 
|---|
| 73 | return( "level" ); | 
|---|
| 74 | case dbg_sourcefile: | 
|---|
| 75 | return( "source file" ); | 
|---|
| 76 | case dbg_function: | 
|---|
| 77 | return( "function" ); | 
|---|
| 78 | case dbg_lineno: | 
|---|
| 79 | return( "line number" ); | 
|---|
| 80 | case dbg_message: | 
|---|
| 81 | return( "message" ); | 
|---|
| 82 | case dbg_eof: | 
|---|
| 83 | return( "[EOF]" ); | 
|---|
| 84 | } | 
|---|
| 85 | return( "<unknown>" ); | 
|---|
| 86 | } /* dbg_token2string */ | 
|---|
| 87 |  | 
|---|
| 88 | dbg_Token dbg_char2token( dbg_Token *state, int c ) | 
|---|
| 89 | /* ------------------------------------------------------------------------ ** | 
|---|
| 90 | * Parse input one character at a time. | 
|---|
| 91 | * | 
|---|
| 92 | *  Input:  state - A pointer to a token variable.  This is used to | 
|---|
| 93 | *                  maintain the parser state between calls.  For | 
|---|
| 94 | *                  each input stream, you should set up a separate | 
|---|
| 95 | *                  state variable and initialize it to dbg_null. | 
|---|
| 96 | *                  Pass a pointer to it into this function with each | 
|---|
| 97 | *                  character in the input stream.  See dbg_test() | 
|---|
| 98 | *                  for an example. | 
|---|
| 99 | *          c     - The "current" character in the input stream. | 
|---|
| 100 | * | 
|---|
| 101 | *  Output: A token. | 
|---|
| 102 | *          The token value will change when delimiters are found, | 
|---|
| 103 | *          which indicate a transition between syntactical objects. | 
|---|
| 104 | *          Possible return values are: | 
|---|
| 105 | * | 
|---|
| 106 | *          dbg_null        - The input character was an end-of-line. | 
|---|
| 107 | *                            This resets the parser to its initial state | 
|---|
| 108 | *                            in preparation for parsing the next line. | 
|---|
| 109 | *          dbg_eof         - Same as dbg_null, except that the character | 
|---|
| 110 | *                            was an end-of-file. | 
|---|
| 111 | *          dbg_ignore      - Returned for whitespace and delimiters. | 
|---|
| 112 | *                            These lexical tokens are only of interest | 
|---|
| 113 | *                            to the parser. | 
|---|
| 114 | *          dbg_header      - Indicates the start of a header line.  The | 
|---|
| 115 | *                            input character was '[' and was the first on | 
|---|
| 116 | *                            the line. | 
|---|
| 117 | *          dbg_timestamp   - Indicates that the input character was part | 
|---|
| 118 | *                            of a header timestamp. | 
|---|
| 119 | *          dbg_level       - Indicates that the input character was part | 
|---|
| 120 | *                            of the debug-level value in the header. | 
|---|
| 121 | *          dbg_sourcefile  - Indicates that the input character was part | 
|---|
| 122 | *                            of the sourcefile name in the header. | 
|---|
| 123 | *          dbg_function    - Indicates that the input character was part | 
|---|
| 124 | *                            of the function name in the header. | 
|---|
| 125 | *          dbg_lineno      - Indicates that the input character was part | 
|---|
| 126 | *                            of the DEBUG call line number in the header. | 
|---|
| 127 | *          dbg_message     - Indicates that the input character was part | 
|---|
| 128 | *                            of the DEBUG message text. | 
|---|
| 129 | * | 
|---|
| 130 | * ------------------------------------------------------------------------ ** | 
|---|
| 131 | */ | 
|---|
| 132 | { | 
|---|
| 133 | /* The terminating characters that we see will greatly depend upon | 
|---|
| 134 | * how they are read.  For example, if gets() is used instead of | 
|---|
| 135 | * fgets(), then we will not see newline characters.  A lot also | 
|---|
| 136 | * depends on the calling function, which may handle terminators | 
|---|
| 137 | * itself. | 
|---|
| 138 | * | 
|---|
| 139 | * '\n', '\0', and EOF are all considered line terminators.  The | 
|---|
| 140 | * dbg_eof token is sent back if an EOF is encountered. | 
|---|
| 141 | * | 
|---|
| 142 | * Warning:  only allow the '\0' character to be sent if you are | 
|---|
| 143 | *           using gets() to read whole lines (thus replacing '\n' | 
|---|
| 144 | *           with '\0').  Sending '\0' at the wrong time will mess | 
|---|
| 145 | *           up the parsing. | 
|---|
| 146 | */ | 
|---|
| 147 | switch( c ) | 
|---|
| 148 | { | 
|---|
| 149 | case EOF: | 
|---|
| 150 | *state = dbg_null;   /* Set state to null (initial state) so */ | 
|---|
| 151 | return( dbg_eof );   /* that we can restart with new input.  */ | 
|---|
| 152 | case '\n': | 
|---|
| 153 | case '\0': | 
|---|
| 154 | *state = dbg_null;   /* A newline or eoln resets to the null state. */ | 
|---|
| 155 | return( dbg_null ); | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | /* When within the body of the message, only a line terminator | 
|---|
| 159 | * can cause a change of state.  We've already checked for line | 
|---|
| 160 | * terminators, so if the current state is dbg_msgtxt, simply | 
|---|
| 161 | * return that as our current token. | 
|---|
| 162 | */ | 
|---|
| 163 | if( dbg_message == *state ) | 
|---|
| 164 | return( dbg_message ); | 
|---|
| 165 |  | 
|---|
| 166 | /* If we are at the start of a new line, and the input character | 
|---|
| 167 | * is an opening bracket, then the line is a header line, otherwise | 
|---|
| 168 | * it's a message body line. | 
|---|
| 169 | */ | 
|---|
| 170 | if( dbg_null == *state ) | 
|---|
| 171 | { | 
|---|
| 172 | if( '[' == c ) | 
|---|
| 173 | { | 
|---|
| 174 | *state = dbg_timestamp; | 
|---|
| 175 | return( dbg_header ); | 
|---|
| 176 | } | 
|---|
| 177 | *state = dbg_message; | 
|---|
| 178 | return( dbg_message ); | 
|---|
| 179 | } | 
|---|
| 180 |  | 
|---|
| 181 | /* We've taken care of terminators, text blocks and new lines. | 
|---|
| 182 | * The remaining possibilities are all within the header line | 
|---|
| 183 | * itself. | 
|---|
| 184 | */ | 
|---|
| 185 |  | 
|---|
| 186 | /* Within the header line, whitespace can be ignored *except* | 
|---|
| 187 | * within the timestamp. | 
|---|
| 188 | */ | 
|---|
| 189 | if( isspace( c ) ) | 
|---|
| 190 | { | 
|---|
| 191 | /* Fudge.  The timestamp may contain space characters. */ | 
|---|
| 192 | if( (' ' == c) && (dbg_timestamp == *state) ) | 
|---|
| 193 | return( dbg_timestamp ); | 
|---|
| 194 | /* Otherwise, ignore whitespace. */ | 
|---|
| 195 | return( dbg_ignore ); | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | /* Okay, at this point we know we're somewhere in the header. | 
|---|
| 199 | * Valid header *states* are: dbg_timestamp, dbg_level, | 
|---|
| 200 | * dbg_sourcefile, dbg_function, and dbg_lineno. | 
|---|
| 201 | */ | 
|---|
| 202 | switch( c ) | 
|---|
| 203 | { | 
|---|
| 204 | case ',': | 
|---|
| 205 | if( dbg_timestamp == *state ) | 
|---|
| 206 | { | 
|---|
| 207 | *state = dbg_level; | 
|---|
| 208 | return( dbg_ignore ); | 
|---|
| 209 | } | 
|---|
| 210 | break; | 
|---|
| 211 | case ']': | 
|---|
| 212 | if( dbg_level == *state ) | 
|---|
| 213 | { | 
|---|
| 214 | *state = dbg_sourcefile; | 
|---|
| 215 | return( dbg_ignore ); | 
|---|
| 216 | } | 
|---|
| 217 | break; | 
|---|
| 218 | case ':': | 
|---|
| 219 | if( dbg_sourcefile == *state ) | 
|---|
| 220 | { | 
|---|
| 221 | *state = dbg_function; | 
|---|
| 222 | return( dbg_ignore ); | 
|---|
| 223 | } | 
|---|
| 224 | break; | 
|---|
| 225 | case '(': | 
|---|
| 226 | if( dbg_function == *state ) | 
|---|
| 227 | { | 
|---|
| 228 | *state = dbg_lineno; | 
|---|
| 229 | return( dbg_ignore ); | 
|---|
| 230 | } | 
|---|
| 231 | break; | 
|---|
| 232 | case ')': | 
|---|
| 233 | if( dbg_lineno == *state ) | 
|---|
| 234 | { | 
|---|
| 235 | *state = dbg_null; | 
|---|
| 236 | return( dbg_ignore ); | 
|---|
| 237 | } | 
|---|
| 238 | break; | 
|---|
| 239 | } | 
|---|
| 240 |  | 
|---|
| 241 | /* If the previous block did not result in a state change, then | 
|---|
| 242 | * return the current state as the current token. | 
|---|
| 243 | */ | 
|---|
| 244 | return( *state ); | 
|---|
| 245 | } /* dbg_char2token */ | 
|---|
| 246 |  | 
|---|
| 247 | void dbg_test( void ); | 
|---|
| 248 | void dbg_test( void ) | 
|---|
| 249 | /* ------------------------------------------------------------------------ ** | 
|---|
| 250 | * Simple test function. | 
|---|
| 251 | * | 
|---|
| 252 | *  Input:  none. | 
|---|
| 253 | *  Output: none. | 
|---|
| 254 | *  Notes:  This function was used to test dbg_char2token().  It reads a | 
|---|
| 255 | *          Samba log file from stdin and prints parsing info to stdout. | 
|---|
| 256 | *          It also serves as a simple example. | 
|---|
| 257 | * | 
|---|
| 258 | * ------------------------------------------------------------------------ ** | 
|---|
| 259 | */ | 
|---|
| 260 | { | 
|---|
| 261 | char bufr[DBG_BSIZE]; | 
|---|
| 262 | int  i; | 
|---|
| 263 | int  linecount  = 1; | 
|---|
| 264 | dbg_Token old   = dbg_null, | 
|---|
| 265 | newtok= dbg_null, | 
|---|
| 266 | state = dbg_null; | 
|---|
| 267 |  | 
|---|
| 268 | while( fgets( bufr, DBG_BSIZE, stdin ) ) | 
|---|
| 269 | { | 
|---|
| 270 | for( i = 0; bufr[i]; i++ ) | 
|---|
| 271 | { | 
|---|
| 272 | old = newtok; | 
|---|
| 273 | newtok = dbg_char2token( &state, bufr[i] ); | 
|---|
| 274 | switch( newtok ) | 
|---|
| 275 | { | 
|---|
| 276 | case dbg_header: | 
|---|
| 277 | if( linecount > 1 ) | 
|---|
| 278 | (void)putchar( '\n' ); | 
|---|
| 279 | break; | 
|---|
| 280 | case dbg_null: | 
|---|
| 281 | linecount++; | 
|---|
| 282 | break; | 
|---|
| 283 | case dbg_ignore: | 
|---|
| 284 | break; | 
|---|
| 285 | default: | 
|---|
| 286 | if( old != newtok ) | 
|---|
| 287 | (void)printf( "\n[%05d]%12s: ", linecount, dbg_token2string(newtok) ); | 
|---|
| 288 | (void)putchar( bufr[i] ); | 
|---|
| 289 | } | 
|---|
| 290 | } | 
|---|
| 291 | } | 
|---|
| 292 | (void)putchar( '\n' ); | 
|---|
| 293 | } /* dbg_test */ | 
|---|
| 294 |  | 
|---|
| 295 |  | 
|---|
| 296 | /* -------------------------------------------------------------------------- ** | 
|---|
| 297 | * This simple main line can be uncommented and used to test the parser. | 
|---|
| 298 | */ | 
|---|
| 299 |  | 
|---|
| 300 | /* | 
|---|
| 301 | * int main( void ) | 
|---|
| 302 | *  { | 
|---|
| 303 | *  dbg_test(); | 
|---|
| 304 | *  return( 0 ); | 
|---|
| 305 | *  } | 
|---|
| 306 | */ | 
|---|
| 307 |  | 
|---|
| 308 | /* ========================================================================== */ | 
|---|