| [18] | 1 | Unit SearchTable;
 | 
|---|
 | 2 | 
 | 
|---|
 | 3 | // NewView - a new OS/2 Help Viewer
 | 
|---|
 | 4 | // Copyright 2003 Aaron Lawrence (aaronl at consultant dot com)
 | 
|---|
 | 5 | // This software is released under the Gnu Public License - see readme.txt
 | 
|---|
 | 6 | 
 | 
|---|
 | 7 | Interface
 | 
|---|
 | 8 | 
 | 
|---|
 | 9 | uses
 | 
|---|
| [33] | 10 |   Classes,
 | 
|---|
 | 11 |   IPFFileFormatUnit,
 | 
|---|
 | 12 |   OS2Def;
 | 
|---|
| [18] | 13 | 
 | 
|---|
 | 14 | // Code to read and use IPF search tables
 | 
|---|
 | 15 | // NB The RLE decompression was arrived at by trial and error
 | 
|---|
 | 16 | // it seems to be correct but it's difficult to test.
 | 
|---|
 | 17 | 
 | 
|---|
 | 18 | type
 | 
|---|
 | 19 |   TSearchTable = class
 | 
|---|
 | 20 |   protected
 | 
|---|
 | 21 |     _Data: pointer;
 | 
|---|
 | 22 |     _Entries: TList; // pointers to panel flag records
 | 
|---|
 | 23 |     _RecordLengthIs16Bit: boolean;
 | 
|---|
 | 24 |     _DictionaryCount: longint;
 | 
|---|
 | 25 |     _TopicCount: longint;
 | 
|---|
 | 26 | 
 | 
|---|
 | 27 |     procedure ReadEntries;
 | 
|---|
 | 28 | 
 | 
|---|
 | 29 |     Procedure Check1ByteOfFlags( b: byte;
 | 
|---|
 | 30 |                                  StartingIndex: longint;
 | 
|---|
 | 31 |                                  Results: UInt32ArrayPointer );
 | 
|---|
 | 32 | 
 | 
|---|
 | 33 |     procedure DoRLESearch( p: pbyte;
 | 
|---|
 | 34 |                            pDataEnd: pointer;
 | 
|---|
 | 35 |                            Results: UInt32ArrayPointer );
 | 
|---|
 | 36 | 
 | 
|---|
 | 37 |   public
 | 
|---|
 | 38 |     constructor Create( Data: pointer;
 | 
|---|
 | 39 |                         RecordLengthIs16Bit: boolean;
 | 
|---|
 | 40 |                         DictionaryCount: longint;
 | 
|---|
 | 41 |                         TopicCount: longint );
 | 
|---|
 | 42 |     destructor Destroy; override;
 | 
|---|
 | 43 | 
 | 
|---|
 | 44 |     // Sets Results to 1 for occurrences of DictIndex
 | 
|---|
 | 45 |     procedure Search( DictIndex: uint16;
 | 
|---|
 | 46 |                       Results: UInt32ArrayPointer );
 | 
|---|
 | 47 | 
 | 
|---|
 | 48 |   end;
 | 
|---|
 | 49 | 
 | 
|---|
 | 50 | Implementation
 | 
|---|
 | 51 | 
 | 
|---|
 | 52 | constructor TSearchTable.Create( Data: pointer;
 | 
|---|
 | 53 |                                  RecordLengthIs16Bit: boolean;
 | 
|---|
 | 54 |                                  DictionaryCount: longint;
 | 
|---|
 | 55 |                                  TopicCount: longint );
 | 
|---|
 | 56 | begin
 | 
|---|
 | 57 |   _Data := Data;
 | 
|---|
 | 58 |   _RecordLengthIs16Bit :=
 | 
|---|
 | 59 |     RecordLengthIs16Bit;
 | 
|---|
 | 60 |   _Entries := TList.Create;
 | 
|---|
 | 61 |   _DictionaryCount := DictionaryCount;
 | 
|---|
 | 62 |   _TopicCount := TopicCount;
 | 
|---|
 | 63 |   ReadEntries;
 | 
|---|
 | 64 | end;
 | 
|---|
 | 65 | 
 | 
|---|
 | 66 | destructor TSearchTable.Destroy;
 | 
|---|
 | 67 | begin
 | 
|---|
 | 68 |   _Entries.Destroy;
 | 
|---|
 | 69 | end;
 | 
|---|
 | 70 | 
 | 
|---|
 | 71 | procedure TSearchTable.ReadEntries;
 | 
|---|
 | 72 | var
 | 
|---|
 | 73 |   pWordRecord: pointer;
 | 
|---|
 | 74 |   RecordLen: uint16;
 | 
|---|
 | 75 |   WordIndex: uint16;
 | 
|---|
 | 76 | begin
 | 
|---|
 | 77 |   pWordRecord:= _Data;
 | 
|---|
 | 78 | 
 | 
|---|
 | 79 |   for WordIndex:= 0 to _DictionaryCount - 1 do
 | 
|---|
 | 80 |   begin
 | 
|---|
 | 81 |     _Entries.Add( pWordRecord );
 | 
|---|
 | 82 | 
 | 
|---|
 | 83 |     if _RecordLengthIs16Bit then
 | 
|---|
 | 84 |       RecordLen:= pUInt16( pWordRecord )^
 | 
|---|
 | 85 |     else // 8 bit
 | 
|---|
 | 86 |       RecordLen:= pUInt8( pWordRecord )^;
 | 
|---|
 | 87 |     inc( pWordRecord, RecordLen );
 | 
|---|
 | 88 |   end;
 | 
|---|
 | 89 | end;
 | 
|---|
 | 90 | 
 | 
|---|
 | 91 | 
 | 
|---|
 | 92 | // Search table decompression
 | 
|---|
 | 93 | 
 | 
|---|
 | 94 | // Looks through a single byte of 8 flags, given by b,
 | 
|---|
 | 95 | // and updates topic entries within results for any flags
 | 
|---|
 | 96 | // that are set.
 | 
|---|
 | 97 | Procedure TSearchTable.Check1ByteOfFlags( b: byte;
 | 
|---|
 | 98 |                                           StartingIndex: longint;
 | 
|---|
 | 99 |                                           Results: UInt32ArrayPointer );
 | 
|---|
 | 100 | var
 | 
|---|
 | 101 |   TopicIndex: longint;
 | 
|---|
 | 102 | begin
 | 
|---|
 | 103 |   TopicIndex:= StartingIndex;
 | 
|---|
 | 104 |   while b > 0 do
 | 
|---|
 | 105 |   begin
 | 
|---|
 | 106 |     if b and $80 > 0 then
 | 
|---|
 | 107 |       Results[ TopicIndex ] := 1;
 | 
|---|
 | 108 |     inc( TopicIndex );
 | 
|---|
 | 109 |     b:= b shl 1;
 | 
|---|
 | 110 |   end;
 | 
|---|
 | 111 | end;
 | 
|---|
 | 112 | 
 | 
|---|
 | 113 | // Decompress RLE compressed data starting at p,
 | 
|---|
 | 114 | // running til pDataEnd. Update topic entries in Results.
 | 
|---|
 | 115 | procedure TSearchTable.DoRLESearch( p: pbyte;
 | 
|---|
 | 116 |                                     pDataEnd: pointer;
 | 
|---|
 | 117 |                                     Results: UInt32ArrayPointer );
 | 
|---|
 | 118 | var
 | 
|---|
 | 119 |   TopicIndex: integer;
 | 
|---|
 | 120 | 
 | 
|---|
 | 121 |   N: integer;
 | 
|---|
 | 122 |   thebyte: byte;
 | 
|---|
 | 123 |   byte1, byte2: byte;
 | 
|---|
 | 124 | begin
 | 
|---|
 | 125 |   assert( pbyte( p )^ = 1, 'Unexpected RLE type' );
 | 
|---|
 | 126 |   inc( p ); // skip header, always 1?
 | 
|---|
 | 127 | 
 | 
|---|
 | 128 |   TopicIndex:= 0;
 | 
|---|
 | 129 | 
 | 
|---|
 | 130 |   while p < pDataEnd do
 | 
|---|
 | 131 |   begin
 | 
|---|
 | 132 |     thebyte:= p^;
 | 
|---|
 | 133 |     inc( p );
 | 
|---|
 | 134 | 
 | 
|---|
 | 135 |     if thebyte = $80 then
 | 
|---|
 | 136 |     begin
 | 
|---|
 | 137 |       // escape
 | 
|---|
 | 138 |       thebyte := p^;
 | 
|---|
 | 139 |       inc( p );
 | 
|---|
 | 140 | 
 | 
|---|
 | 141 |       if thebyte = 0 then
 | 
|---|
 | 142 |       begin
 | 
|---|
 | 143 |         // 16 bit repeat of zeroes??
 | 
|---|
 | 144 |         N := pUInt16( p )^ + 1;
 | 
|---|
 | 145 |         inc( p, 2 );
 | 
|---|
 | 146 |         inc( TopicIndex, N );
 | 
|---|
 | 147 |       end
 | 
|---|
 | 148 |       else
 | 
|---|
 | 149 |       begin
 | 
|---|
 | 150 |         // n+1 repeats of next 2 bytes???
 | 
|---|
 | 151 |         N := thebyte + 1;
 | 
|---|
 | 152 |         byte1 := p^;
 | 
|---|
 | 153 |         inc( p );
 | 
|---|
 | 154 |         byte2 := p^;
 | 
|---|
 | 155 |         inc( p );
 | 
|---|
 | 156 |         while N > 0 do
 | 
|---|
 | 157 |         begin
 | 
|---|
 | 158 |           Check1ByteOfFlags( byte1,
 | 
|---|
 | 159 |                              TopicIndex,
 | 
|---|
 | 160 |                              Results );
 | 
|---|
 | 161 |           inc( TopicIndex, 8 );
 | 
|---|
 | 162 |           Check1ByteOfFlags( byte2,
 | 
|---|
 | 163 |                              TopicIndex,
 | 
|---|
 | 164 |                              Results );
 | 
|---|
 | 165 |           inc( TopicIndex, 8 );
 | 
|---|
 | 166 |           dec( N );
 | 
|---|
 | 167 |         end;
 | 
|---|
 | 168 |       end;
 | 
|---|
 | 169 |     end
 | 
|---|
 | 170 |     else
 | 
|---|
 | 171 |     begin
 | 
|---|
 | 172 |       N:= thebyte and $7f + 1;
 | 
|---|
 | 173 | 
 | 
|---|
 | 174 |       if thebyte and $80 > 0 then
 | 
|---|
 | 175 |       begin
 | 
|---|
 | 176 |         // literal data
 | 
|---|
 | 177 |         while N > 0 do
 | 
|---|
 | 178 |         begin
 | 
|---|
 | 179 |           Check1ByteOfFlags( p^,
 | 
|---|
 | 180 |                              TopicIndex,
 | 
|---|
 | 181 |                              Results );
 | 
|---|
 | 182 |           inc( TopicIndex, 8 );
 | 
|---|
 | 183 |           inc( p );
 | 
|---|
 | 184 |           dec( N );
 | 
|---|
 | 185 |         end;
 | 
|---|
 | 186 |       end
 | 
|---|
 | 187 |       else
 | 
|---|
 | 188 |       begin
 | 
|---|
 | 189 |         // repeat of next byte
 | 
|---|
 | 190 |         thebyte := p^;
 | 
|---|
 | 191 |         inc( p );
 | 
|---|
 | 192 |         while N > 0 do
 | 
|---|
 | 193 |         begin
 | 
|---|
 | 194 |           Check1ByteOfFlags( thebyte,
 | 
|---|
 | 195 |                              TopicIndex,
 | 
|---|
 | 196 |                              Results );
 | 
|---|
 | 197 |           inc( TopicIndex, 8 );
 | 
|---|
 | 198 |           dec( N );
 | 
|---|
 | 199 |         end;
 | 
|---|
 | 200 |       end;
 | 
|---|
 | 201 |     end;
 | 
|---|
 | 202 |   end;
 | 
|---|
 | 203 | end;
 | 
|---|
 | 204 | 
 | 
|---|
 | 205 | // This function finds uses of the given word (DictIndex)
 | 
|---|
 | 206 | // using the search table. Results[ topic ] is set to
 | 
|---|
 | 207 | // non-zero for topics which contain the word.
 | 
|---|
 | 208 | procedure TSearchTable.Search( DictIndex: uint16;
 | 
|---|
 | 209 |                                Results: UInt32ArrayPointer );
 | 
|---|
 | 210 | var
 | 
|---|
 | 211 |   TopicIndex: integer;
 | 
|---|
 | 212 |   pWordRecord: pointer;
 | 
|---|
 | 213 |   RecordLen: uint16;
 | 
|---|
 | 214 |   CompressionCode: uint8;
 | 
|---|
 | 215 |   pData: pointer;
 | 
|---|
 | 216 |   pDataEnd: pointer;
 | 
|---|
 | 217 |   Flags: uint8;
 | 
|---|
 | 218 | begin
 | 
|---|
 | 219 |   pWordRecord:= _Entries[ DictIndex ];
 | 
|---|
 | 220 | 
 | 
|---|
 | 221 |   // Check search table format
 | 
|---|
 | 222 |   if _RecordLengthIs16Bit then
 | 
|---|
 | 223 |   begin
 | 
|---|
 | 224 |     RecordLen:= pUInt16( pWordRecord )^;
 | 
|---|
 | 225 |     CompressionCode:= pUInt8( pWordRecord + 2 )^;
 | 
|---|
 | 226 |     pData:= pWordRecord + 3;
 | 
|---|
 | 227 |   end
 | 
|---|
 | 228 |   else // 8 bit
 | 
|---|
 | 229 |   begin
 | 
|---|
 | 230 |     RecordLen:= pUInt8( pWordRecord )^;
 | 
|---|
 | 231 |     CompressionCode:= pUInt8( pWordRecord + 1 )^;
 | 
|---|
 | 232 |     pData:= pWordRecord + 2;
 | 
|---|
 | 233 |   end;
 | 
|---|
 | 234 | 
 | 
|---|
 | 235 |   // Decompress the search table for this word
 | 
|---|
 | 236 |   pDataEnd:= pWordRecord + RecordLen;
 | 
|---|
 | 237 |   case CompressionCode of
 | 
|---|
 | 238 |     0: // word not used anywhere.
 | 
|---|
 | 239 |       ClearUInt32Array( Results, _TopicCount );
 | 
|---|
 | 240 | 
 | 
|---|
 | 241 |     1: // used in all panels
 | 
|---|
 | 242 |       FillUInt32Array( Results, _TopicCount, 1 );
 | 
|---|
 | 243 | 
 | 
|---|
 | 244 |     2: // RLE
 | 
|---|
 | 245 |     begin
 | 
|---|
 | 246 |       ClearUInt32Array( Results, _TopicCount );
 | 
|---|
 | 247 |       DoRLESearch( pData,
 | 
|---|
 | 248 |                    pDataEnd,
 | 
|---|
 | 249 |                    Results );
 | 
|---|
 | 250 |     end;
 | 
|---|
 | 251 | 
 | 
|---|
 | 252 |     3: // list of topics containing word
 | 
|---|
 | 253 |     begin
 | 
|---|
 | 254 |       ClearUInt32Array( Results, _TopicCount );
 | 
|---|
 | 255 |       while pData < pDataEnd do
 | 
|---|
 | 256 |       begin
 | 
|---|
 | 257 |         TopicIndex:= pUInt16( pData )^;
 | 
|---|
 | 258 |         Results^[ TopicIndex ] := 1;
 | 
|---|
 | 259 |         inc( pData, 2 );
 | 
|---|
 | 260 |       end;
 | 
|---|
 | 261 |     end;
 | 
|---|
 | 262 | 
 | 
|---|
 | 263 |     4: // list of topics NOT containing word
 | 
|---|
 | 264 |     begin
 | 
|---|
 | 265 |       FillUInt32Array( Results, _TopicCount, 1 );
 | 
|---|
 | 266 | 
 | 
|---|
 | 267 |       while pData < pDataEnd do
 | 
|---|
 | 268 |       begin
 | 
|---|
 | 269 |         TopicIndex:= pUInt16( pData )^;
 | 
|---|
 | 270 |         Results^[ TopicIndex ] := 0;
 | 
|---|
 | 271 |         inc( pData, 2 );
 | 
|---|
 | 272 |       end;
 | 
|---|
 | 273 |     end;
 | 
|---|
 | 274 | 
 | 
|---|
 | 275 |     5, // compressed by truncating bit stream at last byte containing a set bit.
 | 
|---|
 | 276 |     6: // same as above but starting at non-zero byte (first word contains start topic)
 | 
|---|
 | 277 |     begin
 | 
|---|
 | 278 |       ClearUInt32Array( Results, _TopicCount );
 | 
|---|
 | 279 |       if CompressionCode = 5 then
 | 
|---|
 | 280 |       begin
 | 
|---|
 | 281 |         TopicIndex:= 0
 | 
|---|
 | 282 |       end
 | 
|---|
 | 283 |       else
 | 
|---|
 | 284 |       begin
 | 
|---|
 | 285 |         TopicIndex:= pUInt16( pData )^ * 8;
 | 
|---|
 | 286 |         inc( pData, 2  );
 | 
|---|
 | 287 |       end;
 | 
|---|
 | 288 | 
 | 
|---|
 | 289 |       while pData < pDataEnd do
 | 
|---|
 | 290 |       begin
 | 
|---|
 | 291 |         Flags:= pUInt8( pData )^;
 | 
|---|
 | 292 |         Check1ByteOfFlags( Flags,
 | 
|---|
 | 293 |                            TopicIndex,
 | 
|---|
 | 294 |                            Results );
 | 
|---|
 | 295 |         inc( TopicIndex, 8 );
 | 
|---|
 | 296 |         inc( pData );
 | 
|---|
 | 297 |       end;
 | 
|---|
 | 298 |     end;
 | 
|---|
 | 299 | 
 | 
|---|
 | 300 |     else
 | 
|---|
 | 301 |       // unknown method
 | 
|---|
 | 302 |       ClearUInt32Array( Results, _TopicCount );
 | 
|---|
 | 303 |   end;
 | 
|---|
 | 304 | end;
 | 
|---|
 | 305 | 
 | 
|---|
 | 306 | Initialization
 | 
|---|
 | 307 | End.
 | 
|---|