| 1 | Unit SearchUnit;
|
|---|
| 2 |
|
|---|
| 3 | // NewView - a new OS/2 Help Viewer
|
|---|
| 4 | // Copyright 2003 Aaron Lawrence (aaronl at consultant dot com)
|
|---|
| 5 | // This software is released under the Gnu Public License - see readme.txt
|
|---|
| 6 |
|
|---|
| 7 | Interface
|
|---|
| 8 |
|
|---|
| 9 | // Contains code to search help files.
|
|---|
| 10 |
|
|---|
| 11 | uses
|
|---|
| 12 | Classes,
|
|---|
| 13 | HelpFile,
|
|---|
| 14 | TextSearchQuery,
|
|---|
| 15 | IPFFileFormatUnit;
|
|---|
| 16 |
|
|---|
| 17 | const
|
|---|
| 18 | // match weightings
|
|---|
| 19 | mwOnlyTitleWord = 200;
|
|---|
| 20 | mwFirstTitleWord = 50;
|
|---|
| 21 | mwTitleWord = 20;
|
|---|
| 22 |
|
|---|
| 23 | mwOnlyIndexWord = 100;
|
|---|
| 24 | mwFirstIndexWord = 20;
|
|---|
| 25 | mwIndexWord = 10;
|
|---|
| 26 | mwTopicTextWord = 1;
|
|---|
| 27 |
|
|---|
| 28 | // best case match weighting of a word
|
|---|
| 29 | mwExactWord = 20;
|
|---|
| 30 |
|
|---|
| 31 |
|
|---|
| 32 | // note on weightings. The title/index weightings
|
|---|
| 33 | // are multipled by word weightings.
|
|---|
| 34 | // Topic text matches are equal to word weighting
|
|---|
| 35 | // times word weighting.
|
|---|
| 36 |
|
|---|
| 37 | procedure SearchHelpFile( HelpFile: THelpFile;
|
|---|
| 38 | Query: TTextSearchQuery;
|
|---|
| 39 | Results: TList;
|
|---|
| 40 | WordSequences: TList );
|
|---|
| 41 |
|
|---|
| 42 | // clear a lsit of word sequences (as produced by above)
|
|---|
| 43 | procedure ClearWordSequences( WordSequences: TList;
|
|---|
| 44 | DictionaryCount: longint );
|
|---|
| 45 |
|
|---|
| 46 | Implementation
|
|---|
| 47 |
|
|---|
| 48 | uses
|
|---|
| 49 | SysUtils,
|
|---|
| 50 | DebugUnit,
|
|---|
| 51 | StringUtilsUnit,
|
|---|
| 52 | HelpTopic;
|
|---|
| 53 |
|
|---|
| 54 | type
|
|---|
| 55 | TSearchType = ( stGeneral, stStarts, stExactMatch, stEnds );
|
|---|
| 56 |
|
|---|
| 57 | procedure ClearWordSequence( WordSequence: TList;
|
|---|
| 58 | DictionaryCount: longint );
|
|---|
| 59 | var
|
|---|
| 60 | StepIndex: longint;
|
|---|
| 61 | DictionaryRelevances: UInt32ArrayPointer;
|
|---|
| 62 | begin
|
|---|
| 63 | for StepIndex := 0 to WordSequence.Count - 1 do
|
|---|
| 64 | begin
|
|---|
| 65 | DictionaryRelevances := WordSequence[ StepIndex ];
|
|---|
| 66 | FreeUInt32Array( DictionaryRelevances, DictionaryCount );
|
|---|
| 67 | end;
|
|---|
| 68 | WordSequence.Clear;
|
|---|
| 69 | end;
|
|---|
| 70 |
|
|---|
| 71 | procedure ClearWordSequences( WordSequence: TList;
|
|---|
| 72 | DictionaryCount: longint );
|
|---|
| 73 | var
|
|---|
| 74 | SequenceIndex: longint;
|
|---|
| 75 | WordSequence: TList;
|
|---|
| 76 | begin
|
|---|
| 77 | for SequenceIndex := 0 to WordSequences.Count - 1 do
|
|---|
| 78 | begin
|
|---|
| 79 | WordSequence := WordSequences[ SequenceIndex ];
|
|---|
| 80 | ClearWordSequence( WordSequence,
|
|---|
| 81 | DictionaryCount );
|
|---|
| 82 | WordSequence.Destroy;
|
|---|
| 83 | end;
|
|---|
| 84 | WordSequences.Clear;
|
|---|
| 85 | end;
|
|---|
| 86 |
|
|---|
| 87 |
|
|---|
| 88 | // given a search word which is known to matche Reference word,
|
|---|
| 89 | // return the relevance
|
|---|
| 90 | function MatchedWordRelevance( const SearchWord: string;
|
|---|
| 91 | const ReferenceWord: string ): longint;
|
|---|
| 92 | begin
|
|---|
| 93 | Result := mwExactWord
|
|---|
| 94 | * Length( SearchWord )
|
|---|
| 95 | div Length( ReferenceWord );
|
|---|
| 96 | if Result = 0 then
|
|---|
| 97 | Result := 1;
|
|---|
| 98 | end;
|
|---|
| 99 |
|
|---|
| 100 | // Compares the given search word against the given
|
|---|
| 101 | // reference word. Returns a value indicating how well the
|
|---|
| 102 | // search word matches, 0 = not at all.
|
|---|
| 103 | function CompareWord( const SearchWord: string;
|
|---|
| 104 | const ReferenceWord: string ): longint;
|
|---|
| 105 | var
|
|---|
| 106 | OccurrencePos: longint;
|
|---|
| 107 | begin
|
|---|
| 108 | Result := 0;
|
|---|
| 109 | OccurrencePos := CaseInsensitivePos( SearchWord, ReferenceWord );
|
|---|
| 110 | if OccurrencePos = 0 then
|
|---|
| 111 | begin
|
|---|
| 112 | // no match
|
|---|
| 113 | exit;
|
|---|
| 114 | end;
|
|---|
| 115 |
|
|---|
| 116 | Result := MatchedWordRelevance( SearchWord, ReferenceWord );
|
|---|
| 117 | end;
|
|---|
| 118 |
|
|---|
| 119 | // Search the help file dictionary for words that match
|
|---|
| 120 | // the given search word. Partial matches are considered.
|
|---|
| 121 | // Results returns the matching word indexes.
|
|---|
| 122 | procedure SearchDictionary( HelpFile: THelpFile;
|
|---|
| 123 | SearchWord: string;
|
|---|
| 124 | Results: UInt32ArrayPointer );
|
|---|
| 125 | var
|
|---|
| 126 | tmpDictIndex: integer;
|
|---|
| 127 | pDictWord: pstring;
|
|---|
| 128 | begin
|
|---|
| 129 | for tmpDictIndex := 0 to HelpFile.DictionaryCount - 1 do
|
|---|
| 130 | begin
|
|---|
| 131 | pDictWord := HelpFile.DictionaryWordPtrs[ tmpDictIndex ];
|
|---|
| 132 | Results[ tmpDictIndex ] := CompareWord( SearchWord, pDictWord^ );
|
|---|
| 133 | end;
|
|---|
| 134 | end;
|
|---|
| 135 |
|
|---|
| 136 | // Search the help file dictionary for words that
|
|---|
| 137 | // match the given search word exactly (except for case-insensitive)
|
|---|
| 138 | procedure SearchDictionaryExact( HelpFile: THelpFile;
|
|---|
| 139 | SearchWord: string;
|
|---|
| 140 | Results: UInt32ArrayPointer );
|
|---|
| 141 | var
|
|---|
| 142 | DictIndex: integer;
|
|---|
| 143 | pDictWord: pstring;
|
|---|
| 144 | begin
|
|---|
| 145 | FillUInt32Array( Results, HelpFile.DictionaryCount, 0 );
|
|---|
| 146 |
|
|---|
| 147 | for DictIndex := 0 to HelpFile.DictionaryCount - 1 do
|
|---|
| 148 | begin
|
|---|
| 149 | pDictWord := HelpFile.DictionaryWordPtrs[ DictIndex ];
|
|---|
| 150 | if StrEqualIgnoringCase( SearchWord, pDictWord^ ) then
|
|---|
| 151 | Results[ DictIndex ] := mwExactWord;
|
|---|
| 152 | end;
|
|---|
| 153 | end;
|
|---|
| 154 |
|
|---|
| 155 | // Search the help file dictionary for words that
|
|---|
| 156 | // start with the given word
|
|---|
| 157 | procedure SearchDictionaryStarts( HelpFile: THelpFile;
|
|---|
| 158 | SearchWord: string;
|
|---|
| 159 | Results: UInt32ArrayPointer );
|
|---|
| 160 | var
|
|---|
| 161 | DictIndex: integer;
|
|---|
| 162 | DictWord: string;
|
|---|
| 163 | begin
|
|---|
| 164 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 165 | begin
|
|---|
| 166 | LogEvent(LogSearch, ' calling SearchDictionaryStarts "' + SearchWord + '"');
|
|---|
| 167 | end;
|
|---|
| 168 |
|
|---|
| 169 | FillUInt32Array( Results, HelpFile.DictionaryCount, 0 );
|
|---|
| 170 |
|
|---|
| 171 | for DictIndex := 0 to HelpFile.DictionaryCount - 1 do
|
|---|
| 172 | begin
|
|---|
| 173 | DictWord := HelpFile.DictionaryWords[ DictIndex ];
|
|---|
| 174 | if StrStartsWithIgnoringCase(DictWord, SearchWord) then
|
|---|
| 175 | begin
|
|---|
| 176 | Results[ DictIndex ] := MatchedWordRelevance( SearchWord, DictWord )
|
|---|
| 177 | end;
|
|---|
| 178 | end;
|
|---|
| 179 | end;
|
|---|
| 180 |
|
|---|
| 181 |
|
|---|
| 182 | // Search the help file dictionary for words that
|
|---|
| 183 | // end with the given word
|
|---|
| 184 | procedure SearchDictionaryEnds( HelpFile: THelpFile;
|
|---|
| 185 | SearchWord: string;
|
|---|
| 186 | Results: UInt32ArrayPointer );
|
|---|
| 187 | var
|
|---|
| 188 | DictIndex: integer;
|
|---|
| 189 | DictWord: string;
|
|---|
| 190 | begin
|
|---|
| 191 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 192 | begin
|
|---|
| 193 | LogEvent(LogSearch, ' calling SearchDictionaryEnds for "' + SearchWord + '"');
|
|---|
| 194 | end;
|
|---|
| 195 | FillUInt32Array( Results, HelpFile.DictionaryCount, 0 );
|
|---|
| 196 |
|
|---|
| 197 | for DictIndex := 0 to HelpFile.DictionaryCount - 1 do
|
|---|
| 198 | begin
|
|---|
| 199 | DictWord := HelpFile.DictionaryWords[ DictIndex ];
|
|---|
| 200 | if StrEndsWithIgnoringCase(DictWord, SearchWord ) then
|
|---|
| 201 | begin
|
|---|
| 202 | Results[ DictIndex ] := MatchedWordRelevance( SearchWord, DictWord );
|
|---|
| 203 | end;
|
|---|
| 204 | end;
|
|---|
| 205 | end;
|
|---|
| 206 |
|
|---|
| 207 | // Search titles of topics for given searchword
|
|---|
| 208 | procedure SearchTopicTitles( HelpFile: THelpFile;
|
|---|
| 209 | SearchWord: string;
|
|---|
| 210 | Results: UInt32ArrayPointer );
|
|---|
| 211 | var
|
|---|
| 212 | TopicIndex: longint;
|
|---|
| 213 | pTitle: pstring;
|
|---|
| 214 | TitleWord: string;
|
|---|
| 215 | Topic: TTopic;
|
|---|
| 216 | TitleWordIndex: longint;
|
|---|
| 217 | WordRelevance: longint;
|
|---|
| 218 | TitleWordRelevance: longint;
|
|---|
| 219 | tmpTitleWords : TStringList;
|
|---|
| 220 | i : integer;
|
|---|
| 221 | begin
|
|---|
| 222 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 223 | begin
|
|---|
| 224 | LogEvent(LogSearch, ' calling SearchTopicTitles for "' + SearchWord + '"');
|
|---|
| 225 | end;
|
|---|
| 226 |
|
|---|
| 227 | tmpTitleWords := TStringList.Create;
|
|---|
| 228 |
|
|---|
| 229 | // Search topic titles
|
|---|
| 230 | for TopicIndex:= 0 to HelpFile.TopicCount - 1 do
|
|---|
| 231 | begin
|
|---|
| 232 | Topic:= HelpFile.Topics[ TopicIndex ];
|
|---|
| 233 | pTitle:= Topic.TitlePtr;
|
|---|
| 234 | TitleWordIndex := 0;
|
|---|
| 235 |
|
|---|
| 236 | tmpTitleWords.Clear;
|
|---|
| 237 | StrExtractStringsQuoted(tmpTitleWords, pTitle^);
|
|---|
| 238 |
|
|---|
| 239 | for i := 0 to tmpTitleWords.count-1 do
|
|---|
| 240 | begin
|
|---|
| 241 | TitleWord := tmpTitleWords[i];
|
|---|
| 242 |
|
|---|
| 243 | WordRelevance := CompareWord( SearchWord, TitleWord );
|
|---|
| 244 | if WordRelevance > 0 then
|
|---|
| 245 | begin
|
|---|
| 246 | if TitleWordIndex = 0 then
|
|---|
| 247 | begin
|
|---|
| 248 | // matching the first word is best
|
|---|
| 249 | if i = tmpTitleWords.count-1 then
|
|---|
| 250 | begin
|
|---|
| 251 | // in fact it's the only word
|
|---|
| 252 | TitleWordRelevance := mwOnlyTitleWord * WordRelevance
|
|---|
| 253 | end
|
|---|
| 254 | else
|
|---|
| 255 | TitleWordRelevance := mwFirstTitleWord * WordRelevance
|
|---|
| 256 | end
|
|---|
| 257 | else
|
|---|
| 258 | begin
|
|---|
| 259 | TitleWordRelevance := mwTitleWord * WordRelevance;
|
|---|
| 260 | end;
|
|---|
| 261 | inc( Results[ TopicIndex ],
|
|---|
| 262 | TitleWordRelevance );
|
|---|
| 263 | end;
|
|---|
| 264 | inc( TitleWordIndex );
|
|---|
| 265 | end;
|
|---|
| 266 | end;
|
|---|
| 267 | tmpTitleWords.Destroy;
|
|---|
| 268 | end;
|
|---|
| 269 |
|
|---|
| 270 | // Search index entries for given searchword
|
|---|
| 271 | procedure SearchIndex( HelpFile: THelpFile;
|
|---|
| 272 | SearchWord: string;
|
|---|
| 273 | Results: UInt32ArrayPointer );
|
|---|
| 274 | var
|
|---|
| 275 | IndexIndex: longint;
|
|---|
| 276 | pIndexEntry: pstring;
|
|---|
| 277 | IndexEntryWord: string;
|
|---|
| 278 | Topic: TTopic;
|
|---|
| 279 | IndexEntryWordIndex: longint;
|
|---|
| 280 | WordRelevance: longint;
|
|---|
| 281 | IndexEntryWordRelevance: longint;
|
|---|
| 282 | tmpIndexWords : TStringList;
|
|---|
| 283 | i : integer;
|
|---|
| 284 | begin
|
|---|
| 285 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 286 | begin
|
|---|
| 287 | LogEvent(LogSearch, ' calling SearchIndex for "' + SearchWord + '"');
|
|---|
| 288 | end;
|
|---|
| 289 |
|
|---|
| 290 | tmpIndexWords := TStringList.Create;
|
|---|
| 291 |
|
|---|
| 292 | for IndexIndex := 0 to HelpFile.Index.Count - 1 do
|
|---|
| 293 | begin
|
|---|
| 294 | Topic := HelpFile.Index.Objects[ IndexIndex ] as TTopic;
|
|---|
| 295 | pIndexEntry := HelpFile.IndexEntryPtr[ IndexIndex ];
|
|---|
| 296 | IndexEntryWordIndex := 0;
|
|---|
| 297 |
|
|---|
| 298 | tmpIndexWords.Clear;
|
|---|
| 299 | StrExtractStringsQuoted(tmpIndexWords, pIndexEntry^);
|
|---|
| 300 |
|
|---|
| 301 | for i := 0 to tmpIndexWords.count-1 do
|
|---|
| 302 | begin
|
|---|
| 303 | IndexEntryWord := tmpIndexWords[i];
|
|---|
| 304 |
|
|---|
| 305 | WordRelevance := CompareWord( SearchWord, IndexEntryWord );
|
|---|
| 306 | if WordRelevance > 0 then
|
|---|
| 307 | begin
|
|---|
| 308 | if IndexEntryWordIndex = 0 then
|
|---|
| 309 | begin
|
|---|
| 310 | // matching the first word is best
|
|---|
| 311 | if i = tmpIndexWords.count-1 then
|
|---|
| 312 | begin
|
|---|
| 313 | // in fact it's the only word
|
|---|
| 314 | IndexEntryWordRelevance := mwOnlyIndexWord * WordRelevance
|
|---|
| 315 | end
|
|---|
| 316 | else
|
|---|
| 317 | IndexEntryWordRelevance := mwFirstIndexWord * WordRelevance
|
|---|
| 318 | end
|
|---|
| 319 | else
|
|---|
| 320 | begin
|
|---|
| 321 | IndexEntryWordRelevance := mwIndexWord * WordRelevance;
|
|---|
| 322 | end;
|
|---|
| 323 | inc( Results[ Topic.Index ],
|
|---|
| 324 | IndexEntryWordRelevance );
|
|---|
| 325 | end;
|
|---|
| 326 | inc( IndexEntryWordIndex );
|
|---|
| 327 | end;
|
|---|
| 328 | end;
|
|---|
| 329 |
|
|---|
| 330 | tmpIndexWords.Destroy;
|
|---|
| 331 | end;
|
|---|
| 332 |
|
|---|
| 333 | // ------------------------------------------------------
|
|---|
| 334 |
|
|---|
| 335 | // Master search function. Given a search query,
|
|---|
| 336 | // searches topic text, titles, index entries.
|
|---|
| 337 | // Matching topics are added to TList, with their
|
|---|
| 338 | // SearchRelevance set appropriately.
|
|---|
| 339 | procedure SearchHelpFile( HelpFile: THelpFile;
|
|---|
| 340 | Query: TTextSearchQuery;
|
|---|
| 341 | Results: TList;
|
|---|
| 342 | WordSequences: TList );
|
|---|
| 343 | var
|
|---|
| 344 | tmpTopicCount: longint;
|
|---|
| 345 | tmpTopic: TTopic;
|
|---|
| 346 | tmpTopicIndex: longint;
|
|---|
| 347 | tmpTermIndex: longint;
|
|---|
| 348 | tmpTerm: TSearchTerm;
|
|---|
| 349 |
|
|---|
| 350 | DictionaryRelevances: UInt32ArrayPointer;
|
|---|
| 351 |
|
|---|
| 352 | TopicsMatchingDictWord: UInt32ArrayPointer; // flags
|
|---|
| 353 | TopicsMatchingTermPart: UInt32ArrayPointer; // flags
|
|---|
| 354 | TopicsMatchingTerm: UInt32ArrayPointer; // flag then relevances
|
|---|
| 355 | TopicRelevances: UInt32ArrayPointer;
|
|---|
| 356 | TopicsExcluded: UInt32ArrayPointer;
|
|---|
| 357 |
|
|---|
| 358 | TopicRelevanceForTerm: longint;
|
|---|
| 359 |
|
|---|
| 360 | WordRelevance: longint;
|
|---|
| 361 | DictIndex: longint;
|
|---|
| 362 |
|
|---|
| 363 | TermPartIndex: longint;
|
|---|
| 364 | TermPart: string;
|
|---|
| 365 |
|
|---|
| 366 | s: string;
|
|---|
| 367 |
|
|---|
| 368 | TermWordSequence: TList;
|
|---|
| 369 | begin
|
|---|
| 370 | LogEvent(LogSearch, 'SearchHelpFile');
|
|---|
| 371 | Query.Log;
|
|---|
| 372 |
|
|---|
| 373 | if HelpFile.SearchTable = nil then
|
|---|
| 374 | begin
|
|---|
| 375 | exit;
|
|---|
| 376 | end;
|
|---|
| 377 |
|
|---|
| 378 | // Reset flags per topic
|
|---|
| 379 | tmpTopicCount := HelpFile.TopicCount;
|
|---|
| 380 |
|
|---|
| 381 | // Get memory for topic relevance arrays
|
|---|
| 382 |
|
|---|
| 383 | AllocUInt32Array( TopicsMatchingDictWord, tmpTopicCount );
|
|---|
| 384 | AllocUInt32Array( TopicsMatchingTermPart, tmpTopicCount );
|
|---|
| 385 | AllocUInt32Array( TopicsMatchingTerm, tmpTopicCount );
|
|---|
| 386 | AllocUInt32Array( TopicRelevances, tmpTopicCount ); // functions as a flag and a cumulative relevance
|
|---|
| 387 |
|
|---|
| 388 | AllocUInt32Array( TopicsExcluded, tmpTopicCount ); // Exclusions are treated as boolean only
|
|---|
| 389 |
|
|---|
| 390 |
|
|---|
| 391 | ClearUInt32Array( TopicRelevances, tmpTopicCount );
|
|---|
| 392 | ClearUInt32Array( TopicsExcluded, tmpTopicCount );
|
|---|
| 393 |
|
|---|
| 394 | for tmpTermIndex := 0 to Query.TermCount - 1 do
|
|---|
| 395 | begin
|
|---|
| 396 | tmpTerm := Query.Term[ tmpTermIndex ];
|
|---|
| 397 |
|
|---|
| 398 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 399 | begin
|
|---|
| 400 | LogEvent(LogSearch, 'Searching for term "'
|
|---|
| 401 | + tmpTerm.Text
|
|---|
| 402 | + '", '
|
|---|
| 403 | + IntToStr( tmpTerm.Parts.Count )
|
|---|
| 404 | + ' parts' );
|
|---|
| 405 | end;
|
|---|
| 406 |
|
|---|
| 407 | // look thru all parts of the term. eg. CAKE_SAUSAGE
|
|---|
| 408 |
|
|---|
| 409 | TermWordSequence := TList.Create;
|
|---|
| 410 |
|
|---|
| 411 | if WordSequences <> nil then
|
|---|
| 412 | if tmpTerm.CombineMethod <> cmExcluded then
|
|---|
| 413 | begin
|
|---|
| 414 | // this term is an inclusive one, so we want to remember the matches
|
|---|
| 415 | WordSequences.Add( TermWordSequence );
|
|---|
| 416 | end;
|
|---|
| 417 |
|
|---|
| 418 | for TermPartIndex := 0 to tmpTerm.Parts.Count - 1 do
|
|---|
| 419 | begin
|
|---|
| 420 | TermPart := tmpTerm.Parts[ TermPartIndex ];
|
|---|
| 421 |
|
|---|
| 422 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 423 | begin
|
|---|
| 424 | LogEvent(LogSearch, ' Searching for TermPart [' + TermPart + ']' );
|
|---|
| 425 | end;
|
|---|
| 426 |
|
|---|
| 427 | AllocUInt32Array( DictionaryRelevances, HelpFile.DictionaryCount );
|
|---|
| 428 |
|
|---|
| 429 | TermWordSequence.Add( DictionaryRelevances );
|
|---|
| 430 |
|
|---|
| 431 | // Search the dictionary for matches.
|
|---|
| 432 | // alpha numeric match
|
|---|
| 433 |
|
|---|
| 434 | if tmpTerm.Parts.Count = 1 then
|
|---|
| 435 | begin
|
|---|
| 436 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 437 | begin
|
|---|
| 438 | LogEvent(LogSearch, ' Term has only one part...' );
|
|---|
| 439 | LogEvent(LogSearch, ' SearchDictionary [' + TermPart + ']' );
|
|---|
| 440 | end;
|
|---|
| 441 |
|
|---|
| 442 | // general match allowing all kinds of partial matches
|
|---|
| 443 | SearchDictionary( HelpFile, TermPart, DictionaryRelevances )
|
|---|
| 444 | end
|
|---|
| 445 |
|
|---|
| 446 | else if TermPartIndex = 0 then
|
|---|
| 447 | begin
|
|---|
| 448 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 449 | begin
|
|---|
| 450 | LogEvent(LogSearch, ' Term has more then one part... we are at first' );
|
|---|
| 451 | LogEvent(LogSearch, ' SearchDictionaryEnd [' + TermPart + ']' );
|
|---|
| 452 | end;
|
|---|
| 453 |
|
|---|
| 454 | // first term part: word must match end of a topic word e.g. must end in "cake"
|
|---|
| 455 | SearchDictionaryEnds( HelpFile, TermPart, DictionaryRelevances )
|
|---|
| 456 | end
|
|---|
| 457 |
|
|---|
| 458 | else if TermPartIndex = tmpTerm.Parts.Count - 1 then
|
|---|
| 459 | begin
|
|---|
| 460 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 461 | begin
|
|---|
| 462 | LogEvent(LogSearch, ' Term has more then one part... we are at last' );
|
|---|
| 463 | LogEvent(LogSearch, ' SearchDictionaryEnd [' + TermPart + ']' );
|
|---|
| 464 | end;
|
|---|
| 465 |
|
|---|
| 466 | // last term part: word must match start of a topic word e.g. must start with "sausage"
|
|---|
| 467 | SearchDictionaryStarts( HelpFile, TermPart, DictionaryRelevances )
|
|---|
| 468 | end
|
|---|
| 469 |
|
|---|
| 470 | else
|
|---|
| 471 | begin
|
|---|
| 472 | if IsLogAspectsEnabled(LogSearch) then
|
|---|
| 473 | begin
|
|---|
| 474 | LogEvent(LogSearch, ' Term has more then one part... we are inside' );
|
|---|
| 475 | LogEvent(LogSearch, ' SearchDictionaryEnd [' + TermPart + ']' );
|
|---|
| 476 | end;
|
|---|
| 477 |
|
|---|
| 478 | // intermediate term part: word must match exactly e.g. must be "_"
|
|---|
| 479 | SearchDictionaryExact( HelpFile, TermPart, DictionaryRelevances )
|
|---|
| 480 | end;
|
|---|
| 481 |
|
|---|
| 482 | // For each word in the dictionary that matches
|
|---|
| 483 | // this search term part, search topic texts
|
|---|
| 484 |
|
|---|
| 485 | LogEvent(LogSearch, ' Dictionary search done' );
|
|---|
| 486 | ClearUInt32Array( TopicsMatchingTermPart, tmpTopicCount );
|
|---|
| 487 |
|
|---|
| 488 | for DictIndex := 0 to HelpFile.DictionaryCount - 1 do
|
|---|
| 489 | begin
|
|---|
| 490 | WordRelevance := DictionaryRelevances[ DictIndex ];
|
|---|
| 491 | if WordRelevance > 0 then
|
|---|
| 492 | begin
|
|---|
| 493 | // Search for occurrences of this word
|
|---|
| 494 | // within the text of topics
|
|---|
| 495 | HelpFile.SearchTable.Search( DictIndex,
|
|---|
| 496 | TopicsMatchingDictWord );
|
|---|
| 497 |
|
|---|
| 498 | // debug
|
|---|
| 499 | s := HelpFile.DictionaryWords[ DictIndex ];
|
|---|
| 500 | // TopicRelevancesForDictWord now contains 1
|
|---|
| 501 | // for topics that contain this word.
|
|---|
| 502 |
|
|---|
| 503 | OrUInt32Array( TopicsMatchingDictWord,
|
|---|
| 504 | TopicsMatchingTermPart,
|
|---|
| 505 | tmpTopicCount );
|
|---|
| 506 | end
|
|---|
| 507 | end;
|
|---|
| 508 |
|
|---|
| 509 | LogEvent(LogSearch, ' Topic searches done' );
|
|---|
| 510 |
|
|---|
| 511 | if TermPartIndex = 0 then
|
|---|
| 512 | // first part, just copy
|
|---|
| 513 | CopyUInt32Array( TopicsMatchingTermPart,
|
|---|
| 514 | TopicsMatchingTerm,
|
|---|
| 515 | tmpTopicCount )
|
|---|
| 516 | else
|
|---|
| 517 | // and with previous term part results
|
|---|
| 518 | AndUInt32Array( TopicsMatchingTermPart,
|
|---|
| 519 | TopicsMatchingTerm,
|
|---|
| 520 | tmpTopicCount );
|
|---|
| 521 |
|
|---|
| 522 | // loop for next term part (IPF word)
|
|---|
| 523 | end;
|
|---|
| 524 |
|
|---|
| 525 | // Now we have searched the dictionary and worked out matching topics
|
|---|
| 526 | // for all parts of the term. Now combine all together
|
|---|
| 527 |
|
|---|
| 528 | LogEvent(LogSearch, 'Checking for sequences' );
|
|---|
| 529 | for tmpTopicIndex := 0 to tmpTopicCount - 1 do
|
|---|
| 530 | begin
|
|---|
| 531 | if TopicsMatchingTerm[ tmpTopicIndex ] > 0 then
|
|---|
| 532 | begin
|
|---|
| 533 | tmpTopic := HelpFile.Topics[ tmpTopicIndex ];
|
|---|
| 534 | // Topic text contained a match for the all the parts
|
|---|
| 535 | // of the term.
|
|---|
| 536 | // Now we need to:
|
|---|
| 537 | // - verify that they actually occur all in a sequence (if it's a multi-part term)
|
|---|
| 538 | // - count occurrences for relevance.
|
|---|
| 539 |
|
|---|
| 540 | TopicRelevanceForTerm := tmpTopic.SearchForWordSequences( TermWordSequence, false ); // don't stop at first match
|
|---|
| 541 |
|
|---|
| 542 | TopicRelevanceForTerm := TopicRelevanceForTerm div tmpTerm.Parts.Count; // divide to bring back into scale
|
|---|
| 543 |
|
|---|
| 544 | TopicsMatchingTerm[ tmpTopicIndex ] := TopicRelevanceForTerm;
|
|---|
| 545 |
|
|---|
| 546 | end;
|
|---|
| 547 | end;
|
|---|
| 548 |
|
|---|
| 549 | if WordSequences = nil then
|
|---|
| 550 | begin
|
|---|
| 551 | // we don't need to keep the sequence
|
|---|
| 552 | ClearWordSequence( TermWordSequence,
|
|---|
| 553 | HelpFile.DictionaryCount );
|
|---|
| 554 | TermWordSequence.Destroy;
|
|---|
| 555 | end;
|
|---|
| 556 |
|
|---|
| 557 | // Search titles and index
|
|---|
| 558 |
|
|---|
| 559 | LogEvent(LogSearch, ' Searching titles' );
|
|---|
| 560 | SearchTopicTitles( HelpFile, tmpTerm.Text, TopicsMatchingTerm );
|
|---|
| 561 |
|
|---|
| 562 | LogEvent(LogSearch, ' Searching index' );
|
|---|
| 563 | SearchIndex( HelpFile, tmpTerm.Text, TopicsMatchingTerm );
|
|---|
| 564 |
|
|---|
| 565 | LogEvent(LogSearch, ' Combining' );
|
|---|
| 566 | case tmpTerm.CombineMethod of
|
|---|
| 567 | cmOptional:
|
|---|
| 568 | begin
|
|---|
| 569 | LogEvent(LogSearch, ' Combining optional');
|
|---|
| 570 | AddUInt32Array( TopicsMatchingTerm,
|
|---|
| 571 | TopicRelevances,
|
|---|
| 572 | tmpTopicCount );
|
|---|
| 573 | end;
|
|---|
| 574 |
|
|---|
| 575 | cmRequired:
|
|---|
| 576 | begin
|
|---|
| 577 | LogEvent(LogSearch, ' Combining required');
|
|---|
| 578 | // if zero then add to exclusions
|
|---|
| 579 | NotOrUInt32Array( TopicsMatchingTerm,
|
|---|
| 580 | TopicsExcluded,
|
|---|
| 581 | tmpTopicCount );
|
|---|
| 582 |
|
|---|
| 583 | AddUInt32Array( TopicsMatchingTerm,
|
|---|
| 584 | TopicRelevances,
|
|---|
| 585 | tmpTopicCount );
|
|---|
| 586 | end;
|
|---|
| 587 |
|
|---|
| 588 | cmExcluded:
|
|---|
| 589 | begin
|
|---|
| 590 | LogEvent(LogSearch, ' Combining excluded');
|
|---|
| 591 | OrUInt32Array( TopicsMatchingTerm,
|
|---|
| 592 | TopicsExcluded,
|
|---|
| 593 | tmpTopicCount );
|
|---|
| 594 | end;
|
|---|
| 595 | end;
|
|---|
| 596 |
|
|---|
| 597 | // Term.ClearMatches;
|
|---|
| 598 |
|
|---|
| 599 | // loop for next term...
|
|---|
| 600 | end;
|
|---|
| 601 |
|
|---|
| 602 | LogEvent(LogSearch, 'Search completed, converting to list' );
|
|---|
| 603 |
|
|---|
| 604 | // Now convert to list form.
|
|---|
| 605 |
|
|---|
| 606 | for tmpTopicIndex := 0 to tmpTopicCount - 1 do
|
|---|
| 607 | begin
|
|---|
| 608 | if TopicsExcluded[ tmpTopicIndex ] = 0 then
|
|---|
| 609 | begin
|
|---|
| 610 | tmpTopic := HelpFile.Topics[ tmpTopicIndex ];
|
|---|
| 611 | tmpTopic.SearchRelevance := TopicRelevances[ tmpTopicIndex ];
|
|---|
| 612 | if tmpTopic.SearchRelevance > 0 then
|
|---|
| 613 | begin
|
|---|
| 614 | Results.Add( tmpTopic );
|
|---|
| 615 | end;
|
|---|
| 616 | end;
|
|---|
| 617 | end;
|
|---|
| 618 |
|
|---|
| 619 | LogEvent(LogSearch, 'Freeing arrays' );
|
|---|
| 620 | FreeUInt32Array( TopicRelevances, tmpTopicCount );
|
|---|
| 621 | FreeUInt32Array( TopicsExcluded, tmpTopicCount );
|
|---|
| 622 | FreeUInt32Array( TopicsMatchingTerm, tmpTopicCount );
|
|---|
| 623 | FreeUInt32Array( TopicsMatchingTermPart, tmpTopicCount );
|
|---|
| 624 | FreeUInt32Array( TopicsMatchingDictWord, tmpTopicCount );
|
|---|
| 625 |
|
|---|
| 626 | LogEvent(LogSearch, 'Done' );
|
|---|
| 627 | end;
|
|---|
| 628 |
|
|---|
| 629 | Initialization
|
|---|
| 630 | End.
|
|---|