Baloo

searchstore.cpp
1/*
2 This file is part of the KDE Baloo Project
3 SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <vhanda@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
6*/
7
8#include "baloodebug.h"
9#include "searchstore.h"
10#include "global.h"
11
12#include "database.h"
13#include "term.h"
14#include "transaction.h"
15#include "enginequery.h"
16#include "termgenerator.h"
17#include "andpostingiterator.h"
18#include "orpostingiterator.h"
19
20#include <QDateTime>
21
22#include <KFileMetaData/PropertyInfo>
23#include <KFileMetaData/TypeInfo>
24#include <KFileMetaData/Types>
25
26#include <algorithm>
27#include <array>
28#include <tuple>
29
30namespace Baloo {
31
32namespace {
33QPair<quint32, quint32> calculateTimeRange(const QDateTime& dt, Term::Comparator com)
34{
35 Q_ASSERT(dt.isValid());
36
37 if (com == Term::Equal) {
38 // Timestamps in DB are quint32 relative to Epoch (1970...2106)
39 auto start = static_cast<quint32>(dt.date().startOfDay().toSecsSinceEpoch());
40 auto end = static_cast<quint32>(dt.date().endOfDay().toSecsSinceEpoch());
41 return {start, end};
42 }
43
44 quint32 timet = dt.toSecsSinceEpoch();
45 if (com == Term::LessEqual) {
46 return {0, timet};
47 }
48 if (com == Term::Less) {
49 return {0, timet - 1};
50 }
51 if (com == Term::GreaterEqual) {
52 return {timet, std::numeric_limits<quint32>::max()};
53 }
54 if (com == Term::Greater) {
55 return {timet + 1, std::numeric_limits<quint32>::max()};
56 }
57
58 Q_ASSERT_X(0, __func__, "mtime query must contain a valid comparator");
59 return {0, 0};
60}
61
62struct InternalProperty {
63 const char* propertyName;
64 const char* prefix;
65 QMetaType::Type valueType;
66};
67constexpr std::array<InternalProperty, 7> internalProperties{{{"content", "", QMetaType::QString},
68 {"filename", "F", QMetaType::QString},
69 {"mimetype", "M", QMetaType::QString},
70 {"rating", "R", QMetaType::Int},
71 {"tag", "TAG-", QMetaType::QString},
72 {"tags", "TA", QMetaType::QString},
73 {"usercomment", "C", QMetaType::QString}}};
74
75std::pair<QByteArray, QMetaType::Type> propertyInfo(const QByteArray &property)
76{
77 auto it = std::find_if(std::begin(internalProperties), std::end(internalProperties),
78 [&property] (const InternalProperty& entry) { return property == entry.propertyName; });
79 if (it != std::end(internalProperties)) {
80 return { (*it).prefix, (*it).valueType };
81 } else {
83 if (pi.property() == KFileMetaData::Property::Empty) {
85 }
86 int propPrefix = static_cast<int>(pi.property());
87 return {QByteArray('X' + QByteArray::number(propPrefix) + '-'), pi.valueType()};
88 }
89}
90
91EngineQuery constructEqualsQuery(const QByteArray& prefix, const QString& value)
92{
93 // We use the TermGenerator to normalize the words in the value and to
94 // split it into other words. If we split the words, we then add them as a
95 // phrase query.
96 const QByteArrayList terms = TermGenerator::termList(value);
97
99 queries.reserve(terms.size());
100 for (const QByteArray& term : terms) {
101 QByteArray arr = prefix + term;
102 // FIXME - compatibility hack, to find truncated terms with old
103 // DBs, remove on next DB bump
104 if (arr.size() > 25) {
105 queries << EngineQuery(arr.left(25), EngineQuery::StartsWith);
106 } else {
107 queries << EngineQuery(arr);
108 }
109 }
110
111 if (queries.isEmpty()) {
112 return EngineQuery();
113 } else if (queries.size() == 1) {
114 return queries.first();
115 } else {
116 return EngineQuery(queries);
117 }
118}
119
120EngineQuery constructContainsQuery(const QByteArray& prefix, const QString& value)
121{
122 auto query = constructEqualsQuery(prefix, value);
123 if (query.op() == EngineQuery::Equal) {
124 if (query.term().size() >= 3) {
125 query.setOp(EngineQuery::StartsWith);
126 }
127 }
128 return query;
129}
130
131EngineQuery constructTypeQuery(const QString& value)
132{
133 Q_ASSERT(!value.isEmpty());
134
136 if (ti == KFileMetaData::Type::Empty) {
137 qCDebug(BALOO) << "Type" << value << "does not exist";
138 return EngineQuery();
139 }
140 int num = static_cast<int>(ti.type());
141
142 return EngineQuery('T' + QByteArray::number(num));
143}
144} // namespace
145
146SearchStore::SearchStore()
147 : m_db(nullptr)
148{
149 m_db = globalDatabaseInstance();
150 if (!m_db->open(Database::ReadOnlyDatabase)) {
151 m_db = nullptr;
152 }
153}
154
155SearchStore::~SearchStore()
156{
157}
158
159// Return the result with-in [offset, offset + limit)
160ResultList SearchStore::exec(const Term& term, uint offset, int limit, bool sortResults)
161{
162 if (!m_db || !m_db->isOpen()) {
163 return ResultList();
164 }
165
166 Transaction tr(m_db, Transaction::ReadOnly);
167 std::unique_ptr<PostingIterator> it(constructQuery(&tr, term));
168 if (!it) {
169 return ResultList();
170 }
171
172 if (sortResults) {
174 while (it->next()) {
175 quint64 id = it->docId();
176 quint32 mtime = tr.documentTimeInfo(id).mTime;
177 resultIds << std::pair<quint64, quint32>{id, mtime};
178
179 Q_ASSERT(id > 0);
180 }
181
182 // Not enough results within range, no need to sort.
183 if (offset >= static_cast<uint>(resultIds.size())) {
184 return ResultList();
185 }
186
187 auto compFunc = [](const std::pair<quint64, quint32>& lhs,
188 const std::pair<quint64, quint32>& rhs) {
189 return lhs.second > rhs.second;
190 };
191
192 std::sort(resultIds.begin(), resultIds.end(), compFunc);
193 if (limit < 0) {
194 limit = resultIds.size();
195 }
196
197 ResultList results;
198 const uint end = qMin(static_cast<uint>(resultIds.size()), offset + static_cast<uint>(limit));
199 results.reserve(end - offset);
200 for (uint i = offset; i < end; i++) {
201 const quint64 id = resultIds[i].first;
202 Result res{tr.documentUrl(id), id};
203
204 results.emplace_back(res);
205 }
206
207 return results;
208 }
209 else {
210 ResultList results;
211 uint ulimit = limit < 0 ? UINT_MAX : limit;
212
213 while (offset && it->next()) {
214 offset--;
215 }
216
217 while (ulimit && it->next()) {
218 const quint64 id = it->docId();
219 Q_ASSERT(id > 0);
220 Result res{tr.documentUrl(id), id};
221 Q_ASSERT(!res.filePath.isEmpty());
222
223 results.emplace_back(res);
224
225 ulimit--;
226 }
227
228 return results;
229 }
230}
231
232PostingIterator* SearchStore::constructQuery(Transaction* tr, const Term& term)
233{
234 Q_ASSERT(tr);
235
236 if (term.operation() == Term::And || term.operation() == Term::Or) {
237 const QList<Term> subTerms = term.subTerms();
239 vec.reserve(subTerms.size());
240
241 for (const Term& t : subTerms) {
242 auto iterator = constructQuery(tr, t);
243 // constructQuery returns a nullptr to signal an empty list
244 if (iterator) {
245 vec << iterator;
246 } else if (term.operation() == Term::And) {
247 return nullptr;
248 }
249 }
250
251 if (vec.isEmpty()) {
252 return nullptr;
253 } else if (vec.size() == 1) {
254 return vec.takeFirst();
255 }
256
257 if (term.operation() == Term::And) {
258 return new AndPostingIterator(vec);
259 } else {
260 return new OrPostingIterator(vec);
261 }
262 }
263
264 if (term.value().isNull()) {
265 return nullptr;
266 }
267 Q_ASSERT(term.value().isValid());
268 Q_ASSERT(term.comparator() != Term::Auto);
269 Q_ASSERT(term.comparator() == Term::Contains ? term.value().typeId() == QMetaType::QString : true);
270
271 const QVariant value = term.value();
272 const QByteArray property = term.property().toLower().toUtf8();
273
274 if (property == "type" || property == "kind") {
275 EngineQuery q = constructTypeQuery(value.toString());
276 return tr->postingIterator(q);
277 }
278 else if (property == "includefolder") {
279 const QByteArray folder = value.toString().toUtf8();
280
281 if (folder.isEmpty()) {
282 return nullptr;
283 }
284 if (!folder.startsWith('/')) {
285 return nullptr;
286 }
287
288 quint64 id = tr->documentId(folder);
289 if (!id) {
290 qCDebug(BALOO) << "Folder" << value.toString() << "not indexed";
291 return nullptr;
292 }
293
294 return tr->docUrlIter(id);
295 }
296 else if (property == "modified" || property == "mtime") {
297 if (value.typeId() == QMetaType::QByteArray) {
298 // Used by Baloo::Query
299 QByteArray ba = value.toByteArray();
300 Q_ASSERT(ba.size() >= 4);
301
302 int year = ba.mid(0, 4).toInt();
303 int month = ba.mid(4, 2).toInt();
304 int day = ba.mid(6, 2).toInt();
305
306 Q_ASSERT(year);
307
308 // uses 0 to represent whole month or whole year
309 month = month >= 0 && month <= 12 ? month : 0;
310 day = day >= 0 && day <= 31 ? day : 0;
311
312 QDate startDate(year, month ? month : 1, day ? day : 1);
313 QDate endDate(startDate);
314
315 if (month == 0) {
316 endDate.setDate(endDate.year(), 12, 31);
317 } else if (day == 0) {
318 endDate.setDate(endDate.year(), endDate.month(), endDate.daysInMonth());
319 }
320
321 return tr->mTimeRangeIter(startDate.startOfDay().toSecsSinceEpoch(), endDate.endOfDay().toSecsSinceEpoch());
322 } else if (value.typeId() == QMetaType::QString) {
323 const QDateTime dt = value.toDateTime();
324 QPair<quint32, quint32> timerange = calculateTimeRange(dt, term.comparator());
325 if ((timerange.first == 0) && (timerange.second == 0)) {
326 return nullptr;
327 }
328 return tr->mTimeRangeIter(timerange.first, timerange.second);
329 } else {
330 Q_ASSERT_X(0, "SearchStore::constructQuery", "modified property must contain date/datetime values");
331 return nullptr;
332 }
333 } else if (property == "tag") {
334 if (term.comparator() == Term::Equal) {
335 const QByteArray prefix = "TAG-";
336 EngineQuery q = EngineQuery(prefix + value.toByteArray());
337 return tr->postingIterator(q);
338 } else if (term.comparator() == Term::Contains) {
339 const QByteArray prefix = "TA";
340 EngineQuery q = constructEqualsQuery(prefix, value.toString());
341 return tr->postingIterator(q);
342 } else {
343 Q_ASSERT(0);
344 return nullptr;
345 }
346 } else if (property == "") {
347 Term cterm(QStringLiteral("content"), term.value(), term.comparator());
348 Term fterm(QStringLiteral("filename"), term.value(), term.comparator());
349 return constructQuery(tr, Term{cterm, Term::Operation::Or, fterm});
350 }
351
352 QByteArray prefix;
354 if (!property.isEmpty()) {
355 std::tie(prefix, valueType) = propertyInfo(property);
356 if (valueType == QMetaType::UnknownType) {
357 return nullptr;
358 }
359 }
360
361 auto com = term.comparator();
362 if (com == Term::Contains && valueType == QMetaType::Int) {
363 com = Term::Equal;
364 }
365 if (com == Term::Contains) {
366 EngineQuery q = constructContainsQuery(prefix, value.toString());
367 return tr->postingIterator(q);
368 }
369
370 if (com == Term::Equal) {
371 EngineQuery q = constructEqualsQuery(prefix, value.toString());
372 return tr->postingIterator(q);
373 }
374
375 PostingDB::Comparator pcom;
376 if (com == Term::Greater || com == Term::GreaterEqual) {
377 pcom = PostingDB::GreaterEqual;
378 } else if (com == Term::Less || com == Term::LessEqual) {
379 pcom = PostingDB::LessEqual;
380 }
381
382 // FIXME -- has to be kept in sync with the code from
383 // Baloo::Result::add
384 if (valueType == QMetaType::Int) {
385 qlonglong intVal = value.toLongLong();
386
387 if (term.comparator() == Term::Greater) {
388 intVal++;
389 } else if (term.comparator() == Term::Less) {
390 intVal--;
391 }
392
393 return tr->postingCompIterator(prefix, intVal, pcom);
394
395 } else if (valueType == QMetaType::Double) {
396 double dVal = value.toDouble();
397 return tr->postingCompIterator(prefix, dVal, pcom);
398
399 } else if (valueType == QMetaType::QDateTime) {
400 QDateTime dt = value.toDateTime();
401 const QByteArray ba = dt.toString(Qt::ISODate).toUtf8();
402 return tr->postingCompIterator(prefix, ba, pcom);
403
404 } else {
405 qCDebug(BALOO) << "Comparison must be with an integer";
406 }
407
408 return nullptr;
409}
410
411} // namespace Baloo
Property::Property property() const
static PropertyInfo fromName(const QString &name)
QMetaType::Type valueType() const
Type::Type type() const
static TypeInfo fromName(const QString &name)
The result class is where all the data extracted by the KFileMetaData extractors is saved to.
Definition result.h:27
Q_SCRIPTABLE Q_NOREPLY void start()
std::optional< QSqlQuery > query(const QString &queryStatement)
Implements storage for docIds without any associated data Instantiated for:
Definition coding.cpp:11
const QList< QKeySequence > & end()
bool isEmpty() const const
QByteArray left(qsizetype len) const const
QByteArray mid(qsizetype pos, qsizetype len) const const
QByteArray number(double n, char format, int precision)
qsizetype size() const const
bool startsWith(QByteArrayView bv) const const
int toInt(bool *ok, int base) const const
QByteArray toLower() const const
QDate date() const const
bool isValid() const const
qint64 toSecsSinceEpoch() const const
QString toString(QStringView format, QCalendar cal) const const
iterator begin()
iterator end()
T & first()
bool isEmpty() const const
void reserve(qsizetype size)
qsizetype size() const const
value_type takeFirst()
QString fromUtf8(QByteArrayView str)
bool isEmpty() const const
QByteArray toUtf8() const const
QByteArray toByteArray() const const
QDateTime toDateTime() const const
double toDouble(bool *ok) const const
qlonglong toLongLong(bool *ok) const const
QString toString() const const
int typeId() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:51:40 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.