Baloo

searchstore.cpp
1 /*
2  This file is part of the KDE Baloo Project
3  SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
6 */
7 
8 #include "baloodebug.h"
9 #include "searchstore.h"
10 #include "global.h"
11 
12 #include "database.h"
13 #include "term.h"
14 #include "transaction.h"
15 #include "enginequery.h"
16 #include "termgenerator.h"
17 #include "andpostingiterator.h"
18 #include "orpostingiterator.h"
19 
20 #include <QDateTime>
21 
22 #include <KFileMetaData/PropertyInfo>
23 #include <KFileMetaData/TypeInfo>
24 #include <KFileMetaData/Types>
25 
26 #include <algorithm>
27 #include <array>
28 #include <tuple>
29 
30 namespace Baloo {
31 
32 namespace {
33 QPair<quint32, quint32> calculateTimeRange(const QDateTime& dt, Term::Comparator com)
34 {
35  Q_ASSERT(dt.isValid());
36 
37  if (com == Term::Equal) {
38  // Timestamps in DB are quint32 relative to Epoch (1970...2106)
39  auto start = static_cast<quint32>(dt.date().startOfDay().toSecsSinceEpoch());
40  auto end = static_cast<quint32>(dt.date().endOfDay().toSecsSinceEpoch());
41  return {start, end};
42  }
43 
44  quint32 timet = dt.toSecsSinceEpoch();
45  if (com == Term::LessEqual) {
46  return {0, timet};
47  }
48  if (com == Term::Less) {
49  return {0, timet - 1};
50  }
51  if (com == Term::GreaterEqual) {
52  return {timet, std::numeric_limits<quint32>::max()};
53  }
54  if (com == Term::Greater) {
55  return {timet + 1, std::numeric_limits<quint32>::max()};
56  }
57 
58  Q_ASSERT_X(0, __func__, "mtime query must contain a valid comparator");
59  return {0, 0};
60 }
61 
62 struct InternalProperty {
63  const char* propertyName;
64  const char* prefix;
65  QVariant::Type valueType;
66 };
67 constexpr std::array<InternalProperty, 7> internalProperties {{
68  { "content", "", QVariant::String },
69  { "filename", "F", QVariant::String },
70  { "mimetype", "M", QVariant::String },
71  { "rating", "R", QVariant::Int },
72  { "tag", "TAG-", QVariant::String },
73  { "tags", "TA", QVariant::String },
74  { "usercomment", "C", QVariant::String }
75 }};
76 
77 std::pair<QByteArray, QVariant::Type> propertyInfo(const QByteArray& property)
78 {
79  auto it = std::find_if(std::begin(internalProperties), std::end(internalProperties),
80  [&property] (const InternalProperty& entry) { return property == entry.propertyName; });
81  if (it != std::end(internalProperties)) {
82  return { (*it).prefix, (*it).valueType };
83  } else {
85  if (pi.property() == KFileMetaData::Property::Empty) {
86  return { QByteArray(), QVariant::Invalid };
87  }
88  int propPrefix = static_cast<int>(pi.property());
89  return { 'X' + QByteArray::number(propPrefix) + '-', pi.valueType() };
90  }
91 }
92 
93 EngineQuery constructEqualsQuery(const QByteArray& prefix, const QString& value)
94 {
95  // We use the TermGenerator to normalize the words in the value and to
96  // split it into other words. If we split the words, we then add them as a
97  // phrase query.
98  const QByteArrayList terms = TermGenerator::termList(value);
99 
100  QVector<EngineQuery> queries;
101  queries.reserve(terms.size());
102  for (const QByteArray& term : terms) {
103  QByteArray arr = prefix + term;
104  // FIXME - compatibility hack, to find truncated terms with old
105  // DBs, remove on next DB bump
106  if (arr.size() > 25) {
107  queries << EngineQuery(arr.left(25), EngineQuery::StartsWith);
108  } else {
109  queries << EngineQuery(arr);
110  }
111  }
112 
113  if (queries.isEmpty()) {
114  return EngineQuery();
115  } else if (queries.size() == 1) {
116  return queries.first();
117  } else {
118  return EngineQuery(queries);
119  }
120 }
121 
122 EngineQuery constructContainsQuery(const QByteArray& prefix, const QString& value)
123 {
124  auto query = constructEqualsQuery(prefix, value);
125  if (query.op() == EngineQuery::Equal) {
126  if (query.term().size() >= 3) {
127  query.setOp(EngineQuery::StartsWith);
128  }
129  }
130  return query;
131 }
132 
133 EngineQuery constructTypeQuery(const QString& value)
134 {
135  Q_ASSERT(!value.isEmpty());
136 
137  KFileMetaData::TypeInfo ti = KFileMetaData::TypeInfo::fromName(value);
138  if (ti == KFileMetaData::Type::Empty) {
139  qCDebug(BALOO) << "Type" << value << "does not exist";
140  return EngineQuery();
141  }
142  int num = static_cast<int>(ti.type());
143 
144  return EngineQuery('T' + QByteArray::number(num));
145 }
146 } // namespace
147 
148 SearchStore::SearchStore()
149  : m_db(nullptr)
150 {
151  m_db = globalDatabaseInstance();
152  if (!m_db->open(Database::ReadOnlyDatabase)) {
153  m_db = nullptr;
154  }
155 }
156 
157 SearchStore::~SearchStore()
158 {
159 }
160 
161 // Return the result with-in [offset, offset + limit)
162 ResultList SearchStore::exec(const Term& term, uint offset, int limit, bool sortResults)
163 {
164  if (!m_db || !m_db->isOpen()) {
165  return ResultList();
166  }
167 
168  Transaction tr(m_db, Transaction::ReadOnly);
169  std::unique_ptr<PostingIterator> it(constructQuery(&tr, term));
170  if (!it) {
171  return ResultList();
172  }
173 
174  if (sortResults) {
176  while (it->next()) {
177  quint64 id = it->docId();
178  quint32 mtime = tr.documentTimeInfo(id).mTime;
179  resultIds << std::pair<quint64, quint32>{id, mtime};
180 
181  Q_ASSERT(id > 0);
182  }
183 
184  // Not enough results within range, no need to sort.
185  if (offset >= static_cast<uint>(resultIds.size())) {
186  return ResultList();
187  }
188 
189  auto compFunc = [](const std::pair<quint64, quint32>& lhs,
190  const std::pair<quint64, quint32>& rhs) {
191  return lhs.second > rhs.second;
192  };
193 
194  std::sort(resultIds.begin(), resultIds.end(), compFunc);
195  if (limit < 0) {
196  limit = resultIds.size();
197  }
198 
199  ResultList results;
200  const uint end = qMin(static_cast<uint>(resultIds.size()), offset + static_cast<uint>(limit));
201  results.reserve(end - offset);
202  for (uint i = offset; i < end; i++) {
203  const quint64 id = resultIds[i].first;
204  Result res{tr.documentUrl(id), id};
205 
206  results.emplace_back(res);
207  }
208 
209  return results;
210  }
211  else {
212  ResultList results;
213  uint ulimit = limit < 0 ? UINT_MAX : limit;
214 
215  while (offset && it->next()) {
216  offset--;
217  }
218 
219  while (ulimit && it->next()) {
220  const quint64 id = it->docId();
221  Q_ASSERT(id > 0);
222  Result res{tr.documentUrl(id), id};
223  Q_ASSERT(!res.filePath.isEmpty());
224 
225  results.emplace_back(res);
226 
227  ulimit--;
228  }
229 
230  return results;
231  }
232 }
233 
234 PostingIterator* SearchStore::constructQuery(Transaction* tr, const Term& term)
235 {
236  Q_ASSERT(tr);
237 
238  if (term.operation() == Term::And || term.operation() == Term::Or) {
239  const QList<Term> subTerms = term.subTerms();
241  vec.reserve(subTerms.size());
242 
243  for (const Term& t : subTerms) {
244  auto iterator = constructQuery(tr, t);
245  // constructQuery returns a nullptr to signal an empty list
246  if (iterator) {
247  vec << iterator;
248  } else if (term.operation() == Term::And) {
249  return nullptr;
250  }
251  }
252 
253  if (vec.isEmpty()) {
254  return nullptr;
255  } else if (vec.size() == 1) {
256  return vec.takeFirst();
257  }
258 
259  if (term.operation() == Term::And) {
260  return new AndPostingIterator(vec);
261  } else {
262  return new OrPostingIterator(vec);
263  }
264  }
265 
266  if (term.value().isNull()) {
267  return nullptr;
268  }
269  Q_ASSERT(term.value().isValid());
270  Q_ASSERT(term.comparator() != Term::Auto);
271  Q_ASSERT(term.comparator() == Term::Contains ? term.value().type() == QVariant::String : true);
272 
273  const QVariant value = term.value();
274  const QByteArray property = term.property().toLower().toUtf8();
275 
276  if (property == "type" || property == "kind") {
277  EngineQuery q = constructTypeQuery(value.toString());
278  return tr->postingIterator(q);
279  }
280  else if (property == "includefolder") {
281  const QByteArray folder = value.toString().toUtf8();
282 
283  if (folder.isEmpty()) {
284  return nullptr;
285  }
286  if (!folder.startsWith('/')) {
287  return nullptr;
288  }
289 
290  quint64 id = tr->documentId(folder);
291  if (!id) {
292  qCDebug(BALOO) << "Folder" << value.toString() << "not indexed";
293  return nullptr;
294  }
295 
296  return tr->docUrlIter(id);
297  }
298  else if (property == "modified" || property == "mtime") {
299  if (value.type() == QVariant::ByteArray) {
300  // Used by Baloo::Query
301  QByteArray ba = value.toByteArray();
302  Q_ASSERT(ba.size() >= 4);
303 
304  int year = ba.mid(0, 4).toInt();
305  int month = ba.mid(4, 2).toInt();
306  int day = ba.mid(6, 2).toInt();
307 
308  Q_ASSERT(year);
309 
310  // uses 0 to represent whole month or whole year
311  month = month >= 0 && month <= 12 ? month : 0;
312  day = day >= 0 && day <= 31 ? day : 0;
313 
314  QDate startDate(year, month ? month : 1, day ? day : 1);
315  QDate endDate(startDate);
316 
317  if (month == 0) {
318  endDate.setDate(endDate.year(), 12, 31);
319  } else if (day == 0) {
320  endDate.setDate(endDate.year(), endDate.month(), endDate.daysInMonth());
321  }
322 
323  return tr->mTimeRangeIter(startDate.startOfDay().toSecsSinceEpoch(), endDate.endOfDay().toSecsSinceEpoch());
324  }
325  else if (value.type() == QVariant::String) {
326  const QDateTime dt = value.toDateTime();
327  QPair<quint32, quint32> timerange = calculateTimeRange(dt, term.comparator());
328  if ((timerange.first == 0) && (timerange.second == 0)) {
329  return nullptr;
330  }
331  return tr->mTimeRangeIter(timerange.first, timerange.second);
332  }
333  else {
334  Q_ASSERT_X(0, "SearchStore::constructQuery", "modified property must contain date/datetime values");
335  return nullptr;
336  }
337  } else if (property == "tag") {
338  if (term.comparator() == Term::Equal) {
339  const QByteArray prefix = "TAG-";
340  EngineQuery q = EngineQuery(prefix + value.toByteArray());
341  return tr->postingIterator(q);
342  } else if (term.comparator() == Term::Contains) {
343  const QByteArray prefix = "TA";
344  EngineQuery q = constructEqualsQuery(prefix, value.toString());
345  return tr->postingIterator(q);
346  } else {
347  Q_ASSERT(0);
348  return nullptr;
349  }
350  } else if (property == "") {
351  Term cterm(QStringLiteral("content"), term.value(), term.comparator());
352  Term fterm(QStringLiteral("filename"), term.value(), term.comparator());
353  return constructQuery(tr, Term{cterm, Term::Operation::Or, fterm});
354  }
355 
356  QByteArray prefix;
357  QVariant::Type valueType = QVariant::String;
358  if (!property.isEmpty()) {
359  std::tie(prefix, valueType) = propertyInfo(property);
360  if (valueType == QVariant::Invalid) {
361  return nullptr;
362  }
363  }
364 
365  auto com = term.comparator();
366  if (com == Term::Contains && valueType == QVariant::Int) {
367  com = Term::Equal;
368  }
369  if (com == Term::Contains) {
370  EngineQuery q = constructContainsQuery(prefix, value.toString());
371  return tr->postingIterator(q);
372  }
373 
374  if (com == Term::Equal) {
375  EngineQuery q = constructEqualsQuery(prefix, value.toString());
376  return tr->postingIterator(q);
377  }
378 
379  PostingDB::Comparator pcom;
380  if (com == Term::Greater || com == Term::GreaterEqual) {
381  pcom = PostingDB::GreaterEqual;
382  } else if (com == Term::Less || com == Term::LessEqual) {
383  pcom = PostingDB::LessEqual;
384  }
385 
386  // FIXME -- has to be kept in sync with the code from
387  // Baloo::Result::add
388  if (valueType == QVariant::Int) {
389  qlonglong intVal = value.toLongLong();
390 
391  if (term.comparator() == Term::Greater) {
392  intVal++;
393  } else if (term.comparator() == Term::Less) {
394  intVal--;
395  }
396 
397  return tr->postingCompIterator(prefix, intVal, pcom);
398 
399  } else if (valueType == QVariant::Double) {
400  double dVal = value.toDouble();
401  return tr->postingCompIterator(prefix, dVal, pcom);
402 
403  } else if (valueType == QVariant::DateTime) {
404  QDateTime dt = value.toDateTime();
405  const QByteArray ba = dt.toString(Qt::ISODate).toUtf8();
406  return tr->postingCompIterator(prefix, ba, pcom);
407 
408  } else {
409  qCDebug(BALOO) << "Comparison must be with an integer";
410  }
411 
412  return nullptr;
413 }
414 
415 } // namespace Baloo
std::optional< QSqlQuery > query(const QString &queryStatement)
bool isEmpty() const const
Property::Property property() const
QByteArray toLower() const const
QString fromUtf8(const char *str, int size)
QVector::iterator begin()
QVariant::Type valueType() const
Q_SCRIPTABLE Q_NOREPLY void start()
QByteArray number(int n, int base)
QByteArray toByteArray() const const
qlonglong toLongLong(bool *ok) const const
T & first()
int size() const const
double toDouble(bool *ok) const const
QVariant::Type type() const const
The result class is where all the data extracted by the KFileMetaData extractors is saved to....
Definition: result.h:26
Implements storage for docIds without any associated data Instantiated for:
Definition: coding.cpp:11
QByteArray mid(int pos, int len) const const
bool isEmpty() const const
QByteArray toUtf8() const const
bool startsWith(const QByteArray &ba) const const
static PropertyInfo fromName(const QString &name)
void reserve(int size)
T takeFirst()
QVector::iterator end()
int toInt(bool *ok, int base) const const
qint64 toSecsSinceEpoch() const const
QDateTime toDateTime() const const
QByteArray left(int len) const const
bool isEmpty() const const
QDate date() const const
bool isValid() const const
int size() const const
int size() const const
QDateTime startOfDay(Qt::TimeSpec spec, int offsetSeconds) const const
QString toString(Qt::DateFormat format) const const
QDateTime endOfDay(Qt::TimeSpec spec, int offsetSeconds) const const
const QList< QKeySequence > & end()
QString toString() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Wed Nov 29 2023 03:56:26 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.