9#include "basicindexingjob.h"
12#include "transaction.h"
13#include "baloodebug.h"
16#include <QCoreApplication>
21#include <KFileMetaData/Extractor>
22#include <KFileMetaData/MimeUtils>
35 , m_workerPipe(&m_input, &m_output)
41 static int s_idleTimeout = 1000 * 60 * 1;
43 m_idleTime->addIdleTimeout(s_idleTimeout);
45 qCInfo(BALOO) <<
"Busy, paced indexing";
49 qCInfo(BALOO) <<
"Not busy, fast indexing";
53 using WorkerPipe = Baloo::Private::WorkerPipe;
55 connect(&m_workerPipe, &WorkerPipe::newDocumentIds,
this, &App::slotNewBatch);
68void App::slotNewBatch(
const QVector<quint64>& ids)
72 Database *db = globalDatabaseInstance();
73 if (db->open(Database::CreateDatabase) != Database::OpenResult::Success) {
74 qCCritical(BALOO) <<
"Failed to open the database";
78 Q_ASSERT(m_tr ==
nullptr);
81 m_idleTime->catchNextResumeEvent();
87 m_tr = std::make_unique<Transaction>(db, Transaction::ReadWrite);
95 m_notifyNewData.setEnabled(
false);
98void App::processNextFile()
100 if (!m_ids.isEmpty()) {
101 quint64
id = m_ids.takeFirst();
105 m_tr->removeDocument(
id);
110 bool indexed = index(m_tr.get(), url,
id);
112 int delay = (m_isBusy && indexed) ? 10 : 0;
116 bool ok = m_tr->commit();
124 m_workerPipe.batchFinished();
128bool App::index(Transaction* tr,
const QString& url, quint64
id)
130 if (!m_config.shouldBeIndexed(url)) {
133 qCDebug(BALOO) <<
"Found" << url <<
"in the ContentIndexingDB, although it should be skipped";
134 tr->removeDocument(
id);
135 m_workerPipe.urlFailed(url);
142 if (!m_config.shouldMimeTypeBeIndexed(mimetype)) {
143 qCDebug(BALOO) <<
"Skipping" << url <<
"- mimetype:" <<
mimetype;
146 tr->removePhaseOne(
id);
147 m_workerPipe.urlFailed(url);
154 if (
mimetype.startsWith(QLatin1String(
"text/"))) {
155 QFileInfo fileInfo(url);
156 if (fileInfo.size() >= 10 * 1024 * 1024) {
157 qCDebug(BALOO) <<
"Skipping large " << url <<
"- mimetype:" <<
mimetype;
158 tr->removePhaseOne(
id);
159 m_workerPipe.urlFailed(url);
163 qCDebug(BALOO) <<
"Indexing" <<
id << url <<
mimetype;
164 m_workerPipe.urlStarted(url);
169 BasicIndexingJob basicIndexer(url, mimetype, BasicIndexingJob::NoLevel);
170 if (!basicIndexer.index()) {
171 qCDebug(BALOO) <<
"Skipping non-existing file " << url <<
"- mimetype:" <<
mimetype;
172 tr->removePhaseOne(
id);
173 m_workerPipe.urlFailed(url);
177 Baloo::Document doc = basicIndexer.document();
179 Result result(url, mimetype, KFileMetaData::ExtractionResult::ExtractMetaData | KFileMetaData::ExtractionResult::ExtractPlainText);
180 result.setDocument(doc);
182 const QList<KFileMetaData::Extractor*> exList = m_extractorCollection.fetchExtractors(mimetype);
184 for (KFileMetaData::Extractor* ex : exList) {
185 ex->extract(&result);
189 if (doc.id() !=
id) {
190 qCWarning(BALOO) << url <<
"id seems to have changed. Perhaps baloo was not running, and this file was deleted + re-created";
191 tr->removeDocument(
id);
192 if (!
tr->hasDocument(doc.id())) {
193 tr->addDocument(result.document());
195 tr->replaceDocument(result.document(), DocumentTerms | DocumentData);
198 tr->replaceDocument(result.document(), DocumentTerms | DocumentData);
200 tr->removePhaseOne(doc.id());
201 m_workerPipe.urlFinished(url);
205#include "moc_app.cpp"
void setEnabled(bool enable)
void timeoutReached(int identifier, int msec)
static KIdleTime * instance()
Implements storage for docIds without any associated data Instantiated for:
KIOCORE_EXPORT MimetypeJob * mimetype(const QUrl &url, JobFlags flags=DefaultFlags)
QString decodeName(const QByteArray &localFileName)
bool exists() const const
QString tr(const char *sourceText, const char *disambiguation, int n)
void activated(QSocketDescriptor socket, QSocketNotifier::Type type)
bool isEmpty() const const
QFuture< ArgsType< Signal > > connect(Sender *sender, Signal signal)