Akonadi Search

contactcompleter.cpp
1 /*
2  * This file is part of the KDE Akonadi Search Project
3  * SPDX-FileCopyrightText: 2013 Vishesh Handa <[email protected]>
4  *
5  * SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
6  *
7  */
8 
9 #include <xapian.h>
10 
11 #include "akonadi_search_pim_debug.h"
12 #include "contactcompleter.h"
13 #include "query.h"
14 
15 #include <QElapsedTimer>
16 #include <QFile>
17 #include <QStandardPaths>
18 
19 using namespace Akonadi::Search::PIM;
20 
21 ContactCompleter::ContactCompleter(const QString &prefix, int limit)
22  : m_prefix(prefix.toLower())
23  , m_limit(limit)
24 {
25 }
26 
27 static QStringList processEnquire(Xapian::Enquire &enq, int limit)
28 {
29  QElapsedTimer timer;
30  timer.start();
31 
32  // Retrieves no results but provides statistics - it's very quick
33  auto statsmset = enq.get_mset(0, 0);
34  qCDebug(AKONADI_SEARCH_PIM_LOG) << "Query:" << QString::fromStdString(enq.get_query().get_description());
35  qCDebug(AKONADI_SEARCH_PIM_LOG) << "Estimated matches:" << statsmset.get_matches_estimated();
36  const int matchEstimate = statsmset.get_matches_estimated();
37 
39  list.reserve(std::min(limit, matchEstimate));
40  int duplicates = 0;
41  int firstItem = 0;
42  // We run the query multiple times, since we may discard some results as duplicates.
43  while (list.size() < limit) {
44  // Always query the "limit"-count of results:
45  // * if estimate is less than limit, we make sure we don't miss results any due to wrong estimate
46  // * if estimate is more than limit, we don't want to query more documents than needed
47  Xapian::MSet mset = enq.get_mset(firstItem, limit);
48  if (mset.empty()) { // there are no more non-duplicate results
49  break;
50  }
51 
52  for (auto it = mset.begin(), end = mset.end(); it != end && list.size() < limit; ++it) {
53  const auto entry = QString::fromStdString(it.get_document().get_data());
54  // TODO: Be smarter about the deduplication by fixing the indexing code:
55  // If we store mailbox name and address as separate named terms then we could deduplicate
56  // purely based on the email address.
57  if (!list.contains(entry, Qt::CaseInsensitive)) {
58  qCDebug(AKONADI_SEARCH_PIM_LOG, "Match: \"%s\" (%d%%), docid %u", qUtf8Printable(entry), it.get_percent(), *it);
59  list.push_back(entry);
60  } else {
61  ++duplicates;
62  qCDebug(AKONADI_SEARCH_PIM_LOG, "Skipped duplicate match \"%s\" (%d%%) docid %u", qUtf8Printable(entry), it.get_percent(), *it);
63  }
64  ++firstItem;
65  }
66  }
67 
68  qCDebug(AKONADI_SEARCH_PIM_LOG) << "Collected" << list.size() << "results in" << timer.elapsed() << "ms, skipped" << duplicates << "duplicates.";
69  return list;
70 }
71 
72 QStringList ContactCompleter::complete()
73 {
74  const QString dir = Query::defaultLocation(QStringLiteral("emailContacts"));
75  Xapian::Database db;
76  try {
77  db = Xapian::Database(QFile::encodeName(dir).toStdString());
78  } catch (const Xapian::DatabaseOpeningError &) {
79  qCWarning(AKONADI_SEARCH_PIM_LOG) << "Xapian Database does not exist at " << dir;
80  return {};
81  } catch (const Xapian::DatabaseCorruptError &) {
82  qCWarning(AKONADI_SEARCH_PIM_LOG) << "Xapian Database corrupted";
83  return {};
84  } catch (const Xapian::DatabaseError &e) {
85  qCWarning(AKONADI_SEARCH_PIM_LOG) << QString::fromStdString(e.get_type()) << QString::fromStdString(e.get_description());
86  return {};
87  } catch (...) {
88  qCWarning(AKONADI_SEARCH_PIM_LOG) << "Random exception, but we do not want to crash";
89  return {};
90  }
91 
92  Xapian::QueryParser parser;
93  parser.set_database(db);
94 
95  const int flags = Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_PARTIAL;
96  const Xapian::Query q = parser.parse_query(m_prefix.toStdString(), flags);
97 
98  Xapian::Enquire enq(db);
99  enq.set_query(q);
100  enq.set_sort_by_relevance();
101  // TODO: extend the indexer to use value slots for the normalized email address so that
102  // duplicates can be collapsed by Xapian::Enquire::set_collapse_key()
103 
104  int retryCount = 0;
105  for (;;) {
106  try {
107  return processEnquire(enq, m_limit);
108  } catch (const Xapian::DatabaseCorruptError &e) {
109  qCWarning(AKONADI_SEARCH_PIM_LOG) << "The emailContacts Xapian database is corrupted:" << QString::fromStdString(e.get_description());
110  return {};
111  } catch (const Xapian::DatabaseModifiedError &e) {
112  db.reopen();
113  retryCount++;
114  if (retryCount > 3) {
115  qCWarning(AKONADI_SEARCH_PIM_LOG) << "The emailContacts Xapian database seems broken:" << QString::fromStdString(e.get_description());
116  return {};
117  }
118  continue; // try again
119  }
120  }
121 }
CaseInsensitive
QByteArray encodeName(const QString &fileName)
bool contains(const QString &str, Qt::CaseSensitivity cs) const const
void push_back(const T &value)
KIOFILEWIDGETS_EXPORT QStringList list(const QString &fileClass)
void reserve(int alloc)
int size() const const
QString fromStdString(const std::string &str)
std::string toStdString() const const
qint64 elapsed() const const
PIM specific search API.
KIOFILEWIDGETS_EXPORT QString dir(const QString &fileClass)
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Fri Dec 1 2023 04:09:05 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.