- Timestamp:
- May 5, 2011, 5:36:53 AM (14 years ago)
- Location:
- trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk
- Property svn:mergeinfo changed
/branches/vendor/nokia/qt/4.7.2 (added) merged: 845 /branches/vendor/nokia/qt/current merged: 844 /branches/vendor/nokia/qt/4.6.3 removed
- Property svn:mergeinfo changed
-
trunk/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp
r769 r846 1 1 /**************************************************************************** 2 2 ** 3 ** Copyright (C) 201 0Nokia Corporation and/or its subsidiary(-ies).3 ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). 4 4 ** All rights reserved. 5 5 ** Contact: Nokia Corporation (qt-info@nokia.com) … … 40 40 ****************************************************************************/ 41 41 42 #include "fulltextsearch/qindexreader_p.h" 43 #include "fulltextsearch/qqueryparser_p.h" 44 #include "fulltextsearch/qsearchable_p.h" 45 #include "qclucenefieldnames_p.h" 42 46 #include "qhelpenginecore.h" 43 #include "fulltextsearch/qsearchable_p.h" 44 #include "fulltextsearch/qqueryparser_p.h" 45 #include "fulltextsearch/qindexreader_p.h" 47 46 48 #include "qhelpsearchindexreader_clucene_p.h" 47 49 … … 50 52 #include <QtCore/QString> 51 53 #include <QtCore/QFileInfo> 54 #include <QtCore/QSharedPointer> 52 55 #include <QtCore/QStringList> 53 56 #include <QtCore/QTextStream> … … 108 111 try { 109 112 #endif 110 QCLuceneBooleanQuery booleanQuery; 113 QCLuceneBooleanQuery booleanQueryTitle; 114 QCLuceneBooleanQuery booleanQueryContent; 111 115 QCLuceneStandardAnalyzer analyzer; 112 if (!buildQuery(booleanQuery, queryList, analyzer)) { 116 const QStringList& attribList = 117 engine.filterAttributes(engine.currentFilter()); 118 bool titleQueryIsValid = buildQuery(queryList, TitleTokenizedField, 119 attribList, booleanQueryTitle, analyzer); 120 bool contentQueryIsValid = buildQuery(queryList, ContentField, 121 attribList, booleanQueryContent, analyzer); 122 if (!titleQueryIsValid && !contentQueryIsValid) { 113 123 emit searchingFinished(0); 114 124 return; 115 125 } 116 126 117 const QStringList attribList = engine.filterAttributes(engine.currentFilter()); 118 if (!attribList.isEmpty()) { 119 QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") 120 + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer); 121 122 if (!query) { 127 QCLuceneIndexSearcher indexSearcher(indexPath); 128 129 // QCLuceneHits object must be allocated on the heap, because 130 // there is no default constructor. 131 QSharedPointer<QCLuceneHits> titleHits; 132 QSharedPointer<QCLuceneHits> contentHits; 133 if (titleQueryIsValid) { 134 titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( 135 indexSearcher.search(booleanQueryTitle))); 136 } 137 if (contentQueryIsValid) { 138 contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( 139 indexSearcher.search(booleanQueryContent))); 140 } 141 bool boost = true; 142 if ((titleHits.isNull() || titleHits->length() == 0) 143 && (contentHits.isNull() || contentHits->length() == 0)) { 144 booleanQueryTitle = QCLuceneBooleanQuery(); 145 booleanQueryContent = QCLuceneBooleanQuery(); 146 titleQueryIsValid = 147 buildTryHarderQuery(queryList, TitleTokenizedField, 148 attribList, booleanQueryTitle, analyzer); 149 contentQueryIsValid = 150 buildTryHarderQuery(queryList, ContentField, attribList, 151 booleanQueryContent, analyzer); 152 if (!titleQueryIsValid && !contentQueryIsValid) { 123 153 emit searchingFinished(0); 124 154 return; 125 155 } 126 booleanQuery.add(query, true, true, false); 127 } 128 129 QCLuceneIndexSearcher indexSearcher(indexPath); 130 QCLuceneHits hits = indexSearcher.search(booleanQuery); 131 132 bool boost = true; 133 QCLuceneBooleanQuery tryHarderQuery; 134 if (hits.length() == 0) { 135 if (buildTryHarderQuery(tryHarderQuery, queryList, analyzer)) { 136 if (!attribList.isEmpty()) { 137 QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") 138 + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), 139 analyzer); 140 tryHarderQuery.add(query, true, true, false); 141 } 142 hits = indexSearcher.search(tryHarderQuery); 143 boost = (hits.length() == 0); 156 if (titleQueryIsValid) { 157 titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( 158 indexSearcher.search(booleanQueryTitle))); 144 159 } 145 } 160 if (contentQueryIsValid) { 161 contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( 162 indexSearcher.search(booleanQueryContent))); 163 } 164 boost = false; 165 } 166 QList<QSharedPointer<QCLuceneHits> > cluceneHitsList; 167 if (!titleHits.isNull()) 168 cluceneHitsList.append(titleHits); 169 if (!contentHits.isNull()) 170 cluceneHitsList.append(contentHits); 146 171 147 172 QSet<QString> pathSet; … … 149 174 const QStringList namespaceList = engine.registeredDocumentations(); 150 175 151 for (qint32 i = 0; i < hits.length(); i++) { 152 document = hits.document(i); 153 const QString path = document.get(QLatin1String("path")); 154 if (!pathSet.contains(path) && namespaceList.contains( 155 document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) { 156 pathSet.insert(path); 157 hitList.append(qMakePair(path, document.get(QLatin1String("title")))); 176 foreach (const QSharedPointer<QCLuceneHits> &hits, cluceneHitsList) { 177 for (qint32 i = 0; i < hits->length(); i++) { 178 document = hits->document(i); 179 const QString path = document.get(PathField); 180 if (!pathSet.contains(path) && namespaceList.contains( 181 document.get(NamespaceField), Qt::CaseInsensitive)) { 182 pathSet.insert(path); 183 hitList.append(qMakePair(path, document.get(TitleField))); 184 } 185 document.clear(); 186 187 mutex.lock(); 188 if (m_cancel) { 189 mutex.unlock(); 190 emit searchingFinished(0); 191 return; 192 } 193 mutex.unlock(); 158 194 } 159 document.clear();160 161 mutex.lock();162 if (m_cancel) {163 mutex.unlock();164 emit searchingFinished(0);165 return;166 }167 mutex.unlock();168 195 } 169 196 … … 185 212 } 186 213 187 bool QHelpSearchIndexReaderClucene::defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery, 188 QCLuceneStandardAnalyzer &analyzer) 189 { 190 const QLatin1String c("content"); 191 const QLatin1String t("titleTokenized"); 192 193 QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer); 194 QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer); 195 if (query && query2) { 196 booleanQuery.add(query, true, false, false); 197 booleanQuery.add(query2, true, false, false); 214 bool QHelpSearchIndexReaderClucene::buildQuery( 215 const QList<QHelpSearchQuery> &queries, const QString &fieldName, 216 const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, 217 QCLuceneAnalyzer &analyzer) 218 { 219 bool queryIsValid = false; 220 foreach (const QHelpSearchQuery &query, queries) { 221 if (fieldName != ContentField && isNegativeQuery(query)) { 222 queryIsValid = false; 223 break; 224 } 225 switch (query.fieldName) { 226 case QHelpSearchQuery::FUZZY: 227 if (addFuzzyQuery(query, fieldName, booleanQuery, analyzer)) 228 queryIsValid = true; 229 break; 230 case QHelpSearchQuery::WITHOUT: 231 if (fieldName != ContentField) 232 return false; 233 if (addWithoutQuery(query, fieldName, booleanQuery)) 234 queryIsValid = true; 235 break; 236 case QHelpSearchQuery::PHRASE: 237 if (addPhraseQuery(query, fieldName, booleanQuery)) 238 queryIsValid = true; 239 break; 240 case QHelpSearchQuery::ALL: 241 if (addAllQuery(query, fieldName, booleanQuery)) 242 queryIsValid = true; 243 break; 244 case QHelpSearchQuery::DEFAULT: 245 if (addDefaultQuery(query, fieldName, true, booleanQuery, analyzer)) 246 queryIsValid = true; 247 break; 248 case QHelpSearchQuery::ATLEAST: 249 if (addAtLeastQuery(query, fieldName, booleanQuery, analyzer)) 250 queryIsValid = true; 251 break; 252 default: 253 Q_ASSERT(!"Invalid field name"); 254 } 255 } 256 257 if (queryIsValid && !filterAttributes.isEmpty()) { 258 queryIsValid = 259 addAttributesQuery(filterAttributes, booleanQuery, analyzer); 260 } 261 262 return queryIsValid; 263 } 264 265 bool QHelpSearchIndexReaderClucene::buildTryHarderQuery( 266 const QList<QHelpSearchQuery> &queries, const QString &fieldName, 267 const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, 268 QCLuceneAnalyzer &analyzer) 269 { 270 if (queries.isEmpty()) 271 return false; 272 const QHelpSearchQuery &query = queries.front(); 273 if (query.fieldName != QHelpSearchQuery::DEFAULT) 274 return false; 275 if (isNegativeQuery(query)) 276 return false; 277 if (!addDefaultQuery(query, fieldName, false, booleanQuery, analyzer)) 278 return false; 279 if (filterAttributes.isEmpty()) 198 280 return true; 199 } 200 201 return false; 202 } 203 204 bool QHelpSearchIndexReaderClucene::buildQuery(QCLuceneBooleanQuery &booleanQuery, 205 const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer) 206 { 207 foreach (const QHelpSearchQuery query, queryList) { 208 switch (query.fieldName) { 209 case QHelpSearchQuery::FUZZY: { 210 const QLatin1String fuzzy("~"); 211 foreach (const QString &term, query.wordList) { 212 if (term.isEmpty() 213 || !defaultQuery(term.toLower() + fuzzy, booleanQuery, analyzer)) { 214 return false; 215 } 216 } 217 } break; 218 219 case QHelpSearchQuery::WITHOUT: { 220 QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); 221 foreach (const QString &term, query.wordList) { 222 if (stopWords.contains(term, Qt::CaseInsensitive)) 223 continue; 224 225 QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( 226 QLatin1String("content"), term.toLower())); 227 QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( 228 QLatin1String("titleTokenized"), term.toLower())); 229 230 if (query && query2) { 231 booleanQuery.add(query, true, false, true); 232 booleanQuery.add(query2, true, false, true); 233 } else { 234 return false; 235 } 236 } 237 } break; 238 239 case QHelpSearchQuery::PHRASE: { 240 const QString &term = query.wordList.at(0).toLower(); 241 if (term.contains(QLatin1Char(' '))) { 242 QStringList termList = term.split(QLatin1String(" ")); 243 QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); 244 QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); 245 foreach (const QString &term, termList) { 246 if (!stopWords.contains(term, Qt::CaseInsensitive)) 247 q->addTerm(QCLuceneTerm(QLatin1String("content"), term.toLower())); 248 } 249 booleanQuery.add(q, true, true, false); 250 } else { 251 QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( 252 QLatin1String("content"), term.toLower())); 253 QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( 254 QLatin1String("titleTokenized"), term.toLower())); 255 256 if (query && query2) { 257 booleanQuery.add(query, true, true, false); 258 booleanQuery.add(query2, true, false, false); 259 } else { 260 return false; 261 } 262 } 263 } break; 264 265 case QHelpSearchQuery::ALL: { 266 QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); 267 foreach (const QString &term, query.wordList) { 268 if (stopWords.contains(term, Qt::CaseInsensitive)) 269 continue; 270 271 QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( 272 QLatin1String("content"), term.toLower())); 273 274 if (query) { 275 booleanQuery.add(query, true, true, false); 276 } else { 277 return false; 278 } 279 } 280 } break; 281 282 case QHelpSearchQuery::DEFAULT: { 283 foreach (const QString &term, query.wordList) { 284 QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(), 285 QLatin1String("content"), analyzer); 286 287 if (query) 288 booleanQuery.add(query, true, true, false); 289 } 290 } break; 291 292 case QHelpSearchQuery::ATLEAST: { 293 foreach (const QString &term, query.wordList) { 294 if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery, analyzer)) 295 return false; 296 } 297 } 298 } 299 } 300 281 return addAttributesQuery(filterAttributes, booleanQuery, analyzer); 282 } 283 284 bool QHelpSearchIndexReaderClucene::isNegativeQuery(const QHelpSearchQuery &query) const 285 { 286 const QString &search = query.wordList.join(" "); 287 return search.contains('!') || search.contains('-') 288 || search.contains(QLatin1String(" NOT ")); 289 } 290 291 bool QHelpSearchIndexReaderClucene::addFuzzyQuery(const QHelpSearchQuery &query, 292 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery, 293 QCLuceneAnalyzer &analyzer) 294 { 295 bool queryIsValid = false; 296 const QLatin1String fuzzy("~"); 297 foreach (const QString &term, query.wordList) { 298 if (!term.isEmpty()) { 299 QCLuceneQuery *lQuery = 300 QCLuceneQueryParser::parse(term + fuzzy, fieldName, analyzer); 301 if (lQuery != 0) { 302 booleanQuery.add(lQuery, true, false, false); 303 queryIsValid = true; 304 } 305 } 306 } 307 return queryIsValid; 308 } 309 310 bool QHelpSearchIndexReaderClucene::addWithoutQuery(const QHelpSearchQuery &query, 311 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) 312 { 313 bool queryIsValid = false; 314 const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords(); 315 foreach (const QString &term, query.wordList) { 316 if (stopWords.contains(term, Qt::CaseInsensitive)) 317 continue; 318 QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( 319 fieldName, term.toLower())); 320 booleanQuery.add(lQuery, true, false, true); 321 queryIsValid = true; 322 } 323 return queryIsValid; 324 } 325 326 bool QHelpSearchIndexReaderClucene::addPhraseQuery(const QHelpSearchQuery &query, 327 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) 328 { 329 bool queryIsValid = false; 330 const QString &term = query.wordList.at(0).toLower(); 331 if (term.contains(QLatin1Char(' '))) { 332 const QStringList termList = term.split(QLatin1String(" ")); 333 QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); 334 const QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); 335 foreach (const QString &term, termList) { 336 if (!stopWords.contains(term, Qt::CaseInsensitive)) 337 q->addTerm(QCLuceneTerm(fieldName, term.toLower())); 338 } 339 if (!q->getTerms().isEmpty()) { 340 booleanQuery.add(q, true, true, false); 341 queryIsValid = true; 342 } 343 } else { 344 QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( 345 fieldName, term.toLower())); 346 booleanQuery.add(lQuery, true, true, false); 347 queryIsValid = true; 348 } 349 return queryIsValid; 350 } 351 352 bool QHelpSearchIndexReaderClucene::addAllQuery(const QHelpSearchQuery &query, 353 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) 354 { 355 bool queryIsValid = false; 356 const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords(); 357 foreach (const QString &term, query.wordList) { 358 if (stopWords.contains(term, Qt::CaseInsensitive)) 359 continue; 360 QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( 361 fieldName, term.toLower())); 362 booleanQuery.add(lQuery, true, true, false); 363 queryIsValid = true; 364 } 365 return queryIsValid; 366 } 367 368 bool QHelpSearchIndexReaderClucene::addDefaultQuery(const QHelpSearchQuery &query, 369 const QString &fieldName, bool allTermsRequired, 370 QCLuceneBooleanQuery &booleanQuery, 371 QCLuceneAnalyzer &analyzer) 372 { 373 bool queryIsValid = false; 374 foreach (const QString &term, query.wordList) { 375 QCLuceneQuery *lQuery = 376 QCLuceneQueryParser::parse(term.toLower(), fieldName, analyzer); 377 if (lQuery) { 378 booleanQuery.add(lQuery, true, allTermsRequired, false); 379 queryIsValid = true; 380 } 381 } 382 return queryIsValid; 383 } 384 385 bool QHelpSearchIndexReaderClucene::addAtLeastQuery( 386 const QHelpSearchQuery &query, const QString &fieldName, 387 QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer) 388 { 389 bool queryIsValid = false; 390 foreach (const QString &term, query.wordList) { 391 if (!term.isEmpty()) { 392 QCLuceneQuery *lQuery = 393 QCLuceneQueryParser::parse(term, fieldName, analyzer); 394 if (lQuery) { 395 booleanQuery.add(lQuery, true, false, false); 396 queryIsValid = true; 397 } 398 } 399 } 400 return queryIsValid; 401 } 402 403 bool QHelpSearchIndexReaderClucene::addAttributesQuery( 404 const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, 405 QCLuceneAnalyzer &analyzer) 406 { 407 QCLuceneQuery* lQuery = QCLuceneQueryParser::parse(QLatin1String("+") 408 + filterAttributes.join(QLatin1String(" +")), AttributeField, analyzer); 409 if (!lQuery) 410 return false; 411 booleanQuery.add(lQuery, true, true, false); 301 412 return true; 302 }303 304 bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery,305 const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer)306 {307 bool retVal = false;308 foreach (const QHelpSearchQuery query, queryList) {309 switch (query.fieldName) {310 default: break;311 case QHelpSearchQuery::DEFAULT: {312 foreach (const QString &term, query.wordList) {313 QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(),314 QLatin1String("content"), analyzer);315 316 if (query) {317 retVal = true;318 booleanQuery.add(query, true, false, false);319 }320 }321 } break;322 }323 }324 return retVal;325 413 } 326 414 … … 328 416 QList<QHelpSearchEngine::SearchHit> &hitList, const QList<QHelpSearchQuery> &queryList) 329 417 { 330 foreach (const QHelpSearchQuery query, queryList) {418 foreach (const QHelpSearchQuery &query, queryList) { 331 419 if (query.fieldName != QHelpSearchQuery::DEFAULT) 332 420 continue; … … 336 424 QCLuceneStandardAnalyzer analyzer; 337 425 QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse( 338 joinedQuery, QLatin1String("content"), analyzer);426 joinedQuery, ContentField, analyzer); 339 427 340 428 if (parsedQuery) { … … 343 431 } 344 432 345 int length = QString(QLatin1String("content:")).length(); 346 int index = joinedQuery.indexOf(QLatin1String("content:")); 433 const QString contentString(ContentField + QLatin1String(":")); 434 int length = contentString.length(); 435 int index = joinedQuery.indexOf(contentString); 347 436 348 437 QString term; … … 350 439 QStringList searchTerms; 351 440 while (index != -1) { 352 nextIndex = joinedQuery.indexOf( QLatin1String("content:"), index + 1);441 nextIndex = joinedQuery.indexOf(contentString, index + 1); 353 442 term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified(); 354 443 if (term.startsWith(QLatin1String("\""))
Note:
See TracChangeset
for help on using the changeset viewer.