source: trunk/tools/assistant/lib/qhelpsearchindexwriter_default.cpp

Last change on this file was 846, checked in by Dmitry A. Kuminov, 14 years ago

trunk: Merged in qt 4.7.2 sources from branches/vendor/nokia/qt.

File size: 11.2 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation (qt-info@nokia.com)
6**
7** This file is part of the Qt Assistant of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at qt-info@nokia.com.
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qhelpsearchindexwriter_default_p.h"
43#include "qhelp_global.h"
44#include "qhelpenginecore.h"
45
46#include <QtCore/QDir>
47#include <QtCore/QSet>
48#include <QtCore/QUrl>
49#include <QtCore/QFile>
50#include <QtCore/QRegExp>
51#include <QtCore/QVariant>
52#include <QtCore/QFileInfo>
53#include <QtCore/QTextCodec>
54#include <QtCore/QTextStream>
55
56QT_BEGIN_NAMESPACE
57
58namespace fulltextsearch {
59namespace std {
60
61Writer::Writer(const QString &path)
62 : indexPath(path)
63 , indexFile(QString())
64 , documentFile(QString())
65{
66 // nothing todo
67}
68
69Writer::~Writer()
70{
71 reset();
72}
73
74void Writer::reset()
75{
76 for(QHash<QString, Entry*>::ConstIterator it =
77 index.begin(); it != index.end(); ++it) {
78 delete it.value();
79 }
80
81 index.clear();
82 documentList.clear();
83}
84
85bool Writer::writeIndex() const
86{
87 bool status;
88 QFile idxFile(indexFile);
89 if (!(status = idxFile.open(QFile::WriteOnly)))
90 return status;
91
92 QDataStream indexStream(&idxFile);
93 for(QHash<QString, Entry*>::ConstIterator it =
94 index.begin(); it != index.end(); ++it) {
95 indexStream << it.key();
96 indexStream << it.value()->documents.count();
97 indexStream << it.value()->documents;
98 }
99 idxFile.close();
100
101 QFile docFile(documentFile);
102 if (!(status = docFile.open(QFile::WriteOnly)))
103 return status;
104
105 QDataStream docStream(&docFile);
106 foreach(const QStringList &list, documentList) {
107 docStream << list.at(0);
108 docStream << list.at(1);
109 }
110 docFile.close();
111
112 return status;
113}
114
115void Writer::removeIndex() const
116{
117 QFile idxFile(indexFile);
118 if (idxFile.exists())
119 idxFile.remove();
120
121 QFile docFile(documentFile);
122 if (docFile.exists())
123 docFile.remove();
124}
125
126void Writer::setIndexFile(const QString &namespaceName, const QString &attributes)
127{
128 QString extension = namespaceName + QLatin1String("@") + attributes;
129 indexFile = indexPath + QLatin1String("/indexdb40.") + extension;
130 documentFile = indexPath + QLatin1String("/indexdoc40.") + extension;
131}
132
133void Writer::insertInIndex(const QString &string, int docNum)
134{
135 if (string == QLatin1String("amp") || string == QLatin1String("nbsp"))
136 return;
137
138 Entry *entry = 0;
139 if (index.count())
140 entry = index[string];
141
142 if (entry) {
143 if (entry->documents.last().docNumber != docNum)
144 entry->documents.append(Document(docNum, 1));
145 else
146 entry->documents.last().frequency++;
147 } else {
148 index.insert(string, new Entry(docNum));
149 }
150}
151
152void Writer::insertInDocumentList(const QString &title, const QString &url)
153{
154 documentList.append(QStringList(title) << url);
155}
156
157
158QHelpSearchIndexWriter::QHelpSearchIndexWriter()
159 : QThread()
160 , m_cancel(false)
161{
162 // nothing todo
163}
164
165QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
166{
167 mutex.lock();
168 this->m_cancel = true;
169 waitCondition.wakeOne();
170 mutex.unlock();
171
172 wait();
173}
174
175void QHelpSearchIndexWriter::cancelIndexing()
176{
177 mutex.lock();
178 this->m_cancel = true;
179 mutex.unlock();
180}
181
182void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
183 const QString &indexFilesFolder,
184 bool reindex)
185{
186 wait();
187 QMutexLocker lock(&mutex);
188
189 this->m_cancel = false;
190 this->m_reindex = reindex;
191 this->m_collectionFile = collectionFile;
192 this->m_indexFilesFolder = indexFilesFolder;
193
194 start(QThread::LowestPriority);
195}
196
197void QHelpSearchIndexWriter::run()
198{
199 mutex.lock();
200
201 if (m_cancel) {
202 mutex.unlock();
203 return;
204 }
205
206 const bool reindex(this->m_reindex);
207 const QLatin1String key("DefaultSearchNamespaces");
208 const QString collectionFile(this->m_collectionFile);
209 const QString indexPath = m_indexFilesFolder;
210
211 mutex.unlock();
212
213 QHelpEngineCore engine(collectionFile, 0);
214 if (!engine.setupData())
215 return;
216
217 if (reindex)
218 engine.setCustomValue(key, QLatin1String(""));
219
220 const QStringList registeredDocs = engine.registeredDocumentations();
221 const QStringList indexedNamespaces = engine.customValue(key).toString().
222 split(QLatin1String("|"), QString::SkipEmptyParts);
223
224 emit indexingStarted();
225
226 QStringList namespaces;
227 Writer writer(indexPath);
228 foreach(const QString &namespaceName, registeredDocs) {
229 mutex.lock();
230 if (m_cancel) {
231 mutex.unlock();
232 return;
233 }
234 mutex.unlock();
235
236 // if indexed, continue
237 namespaces.append(namespaceName);
238 if (indexedNamespaces.contains(namespaceName))
239 continue;
240
241 const QList<QStringList> attributeSets =
242 engine.filterAttributeSets(namespaceName);
243
244 foreach (const QStringList &attributes, attributeSets) {
245 // cleanup maybe old or unfinished files
246 writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
247 writer.removeIndex();
248
249 QSet<QString> documentsSet;
250 const QList<QUrl> docFiles = engine.files(namespaceName, attributes);
251 foreach(QUrl url, docFiles) {
252 if (m_cancel)
253 return;
254
255 // get rid of duplicated files
256 if (url.hasFragment())
257 url.setFragment(QString());
258
259 QString s = url.toString();
260 if (s.endsWith(QLatin1String(".html"))
261 || s.endsWith(QLatin1String(".htm"))
262 || s.endsWith(QLatin1String(".txt")))
263 documentsSet.insert(s);
264 }
265
266 int docNum = 0;
267 const QStringList documentsList(documentsSet.toList());
268 foreach(const QString &url, documentsList) {
269 if (m_cancel)
270 return;
271
272 QByteArray data(engine.fileData(url));
273 if (data.isEmpty())
274 continue;
275
276 QTextStream s(data);
277 QString en = QHelpGlobal::codecFromData(data);
278 s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
279
280 QString text = s.readAll();
281 if (text.isNull())
282 continue;
283
284 QString title = QHelpGlobal::documentTitle(text);
285
286 int j = 0;
287 int i = 0;
288 bool valid = true;
289 const QChar *buf = text.unicode();
290 QChar str[64];
291 QChar c = buf[0];
292
293 while ( j < text.length() ) {
294 if (m_cancel)
295 return;
296
297 if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
298 valid = false;
299 if ( i > 1 )
300 writer.insertInIndex(QString(str,i), docNum);
301 i = 0;
302 c = buf[++j];
303 continue;
304 }
305 if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
306 valid = true;
307 c = buf[++j];
308 continue;
309 }
310 if ( !valid ) {
311 c = buf[++j];
312 continue;
313 }
314 if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
315 str[i] = c.toLower();
316 ++i;
317 } else {
318 if ( i > 1 )
319 writer.insertInIndex(QString(str,i), docNum);
320 i = 0;
321 }
322 c = buf[++j];
323 }
324 if ( i > 1 )
325 writer.insertInIndex(QString(str,i), docNum);
326
327 docNum++;
328 writer.insertInDocumentList(title, url);
329 }
330
331 if (writer.writeIndex()) {
332 engine.setCustomValue(key, addNamespace(
333 engine.customValue(key).toString(), namespaceName));
334 }
335
336 writer.reset();
337 }
338 }
339
340 QStringListIterator qsli(indexedNamespaces);
341 while (qsli.hasNext()) {
342 const QString namespaceName = qsli.next();
343 if (namespaces.contains(namespaceName))
344 continue;
345
346 const QList<QStringList> attributeSets =
347 engine.filterAttributeSets(namespaceName);
348
349 foreach (const QStringList &attributes, attributeSets) {
350 writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
351 writer.removeIndex();
352 }
353
354 engine.setCustomValue(key, removeNamespace(
355 engine.customValue(key).toString(), namespaceName));
356 }
357
358 emit indexingFinished();
359}
360
361QString QHelpSearchIndexWriter::addNamespace(const QString namespaces,
362 const QString &namespaceName)
363{
364 QString value = namespaces;
365 if (!value.contains(namespaceName))
366 value.append(namespaceName).append(QLatin1String("|"));
367
368 return value;
369}
370
371QString QHelpSearchIndexWriter::removeNamespace(const QString namespaces,
372 const QString &namespaceName)
373{
374 QString value = namespaces;
375 if (value.contains(namespaceName))
376 value.remove(namespaceName + QLatin1String("|"));
377
378 return value;
379}
380
381} // namespace std
382} // namespace fulltextsearch
383
384QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.