Akonadi Search

xapianqueryparser.cpp
1 /*
2  * SPDX-FileCopyrightText: 2014 Vishesh Handa <[email protected]>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  *
6  */
7 
8 #include "xapianqueryparser.h"
9 
10 #include "akonadi_search_xapian_debug.h"
11 #include <QStringList>
12 #include <QTextBoundaryFinder>
13 
14 using namespace Akonadi::Search;
15 
16 XapianQueryParser::XapianQueryParser() = default;
17 
18 void XapianQueryParser::setDatabase(Xapian::Database *db)
19 {
20  m_db = db;
21 }
22 
23 namespace
24 {
25 struct Term {
26  std::string t;
27  uint count;
28 
29  // pop_heap pops the largest element, we want the smallest to be popped
30  bool operator<(const Term &rhs) const
31  {
32  return count > rhs.count;
33  }
34 };
35 
36 Xapian::Query makeQuery(const QString &string, int position, Xapian::Database *db)
37 {
38  if (!db) {
39  const QByteArray arr = string.toUtf8();
40  const std::string stdString(arr.constData(), arr.size());
41  return Xapian::Query(stdString, 1, position);
42  }
43 
44  // Lets just keep the top x (+1 for push_heap)
45  static const int MaxTerms = 100;
46  QList<Term> topTerms;
47  topTerms.reserve(MaxTerms + 1);
48 
49  const std::string stdString(string.toStdString());
50  Xapian::TermIterator it = db->allterms_begin(stdString);
51  Xapian::TermIterator end = db->allterms_end(stdString);
52  for (; it != end; ++it) {
53  Term term;
54  term.t = *it;
55  term.count = db->get_collection_freq(term.t);
56 
57  if (topTerms.size() < MaxTerms) {
58  topTerms.push_back(term);
59  std::push_heap(topTerms.begin(), topTerms.end());
60  } else {
61  // Remove the term with the min count
62  topTerms.push_back(term);
63  std::push_heap(topTerms.begin(), topTerms.end());
64 
65  std::pop_heap(topTerms.begin(), topTerms.end());
66  topTerms.pop_back();
67  }
68  }
69 
70  QList<Xapian::Query> queries;
71  queries.reserve(topTerms.size());
72 
73  for (const Term &term : std::as_const(topTerms)) {
74  queries << Xapian::Query(term.t, 1, position);
75  }
76 
77  if (queries.isEmpty()) {
78  return Xapian::Query(string.toStdString(), 1, position);
79  }
80  Xapian::Query finalQ(Xapian::Query::OP_SYNONYM, queries.begin(), queries.end());
81  return finalQ;
82 }
83 
84 bool containsSpace(const QString &string)
85 {
86  for (const QChar &ch : string) {
87  if (ch.isSpace()) {
88  return true;
89  }
90  }
91 
92  return false;
93 }
94 }
95 
96 Xapian::Query XapianQueryParser::parseQuery(const QString &text, const QString &prefix)
97 {
98  /*
99  Xapian::QueryParser parser;
100  parser.set_default_op(Xapian::Query::OP_AND);
101 
102  if (m_db)
103  parser.set_database(*m_db);
104 
105  int flags = Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_PARTIAL;
106 
107  std::string stdString(text.toStdString());
108  return parser.parse_query(stdString, flags);
109  */
110 
111  if (text.isEmpty()) {
112  return {};
113  }
114 
115  QList<Xapian::Query> queries;
116  QList<Xapian::Query> phraseQueries;
117 
118  int start = 0;
119  int end = 0;
120  int position = 0;
121 
122  bool inDoubleQuotes = false;
123  bool inSingleQuotes = false;
124  bool inPhrase = false;
125 
127  for (; bf.position() != -1; bf.toNextBoundary()) {
128  if (bf.boundaryReasons() & QTextBoundaryFinder::StartOfItem) {
129  //
130  // Check the previous delimiter
131  int pos = bf.position();
132  if (pos != end) {
133  QString delim = text.mid(end, pos - end);
134  if (delim.contains(QLatin1Char('"'))) {
135  if (inDoubleQuotes) {
136  queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end());
137  phraseQueries.clear();
138  inDoubleQuotes = false;
139  } else {
140  inDoubleQuotes = true;
141  }
142  } else if (delim.contains(QLatin1Char('\''))) {
143  if (inSingleQuotes) {
144  queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end());
145  phraseQueries.clear();
146  inSingleQuotes = false;
147  } else {
148  inSingleQuotes = true;
149  }
150  } else if (!containsSpace(delim)) {
151  if (!inPhrase && !queries.isEmpty()) {
152  phraseQueries << queries.takeLast();
153  }
154  inPhrase = true;
155  } else if (inPhrase && !phraseQueries.isEmpty()) {
156  queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end());
157  phraseQueries.clear();
158  inPhrase = false;
159  }
160  }
161 
162  start = bf.position();
163  continue;
164  } else if (bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
165  end = bf.position();
166 
167  QString str = text.mid(start, end - start);
168 
169  // Get the string ready for saving
170  str = str.toLower();
171 
172  // Remove all accents
173  const QString denormalized = str.normalized(QString::NormalizationForm_KD);
174  QString cleanString;
175  for (const QChar &ch : denormalized) {
176  auto cat = ch.category();
178  cleanString.append(ch);
179  }
180  }
181 
182  str = cleanString.normalized(QString::NormalizationForm_KC);
183  const QStringList lst = str.split(QLatin1Char('_'), Qt::SkipEmptyParts);
184  for (const QString &t : lst) {
185  const QString term = prefix + t;
186 
187  position++;
188  if (inDoubleQuotes || inSingleQuotes || inPhrase) {
189  const QByteArray arr = term.toUtf8();
190  const std::string str(arr.constData(), arr.length());
191  phraseQueries << Xapian::Query(str, 1, position);
192  } else {
193  if (m_autoExpand) {
194  queries << makeQuery(term, position, m_db);
195  } else {
196  queries << Xapian::Query(term.toStdString(), 1, position);
197  }
198  }
199  }
200  }
201  }
202 
203  if (inPhrase) {
204  queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end());
205  phraseQueries.clear();
206  }
207 
208  if (!phraseQueries.isEmpty()) {
209  queries << phraseQueries;
210  phraseQueries.clear();
211  }
212 
213  if (queries.size() == 1) {
214  return queries.first();
215  }
216  return {Xapian::Query::OP_AND, queries.begin(), queries.end()};
217 }
218 
220 {
221  m_autoExpand = autoexpand;
222 }
223 
224 Xapian::Query XapianQueryParser::expandWord(const QString &word, const QString &prefix)
225 {
226  const std::string stdString((prefix + word).toUtf8().constData());
227  Xapian::TermIterator it = m_db->allterms_begin(stdString);
228  Xapian::TermIterator end = m_db->allterms_end(stdString);
229 
230  QList<Xapian::Query> queries;
231  for (; it != end; ++it) {
232  queries << Xapian::Query(*it);
233  }
234 
235  if (queries.isEmpty()) {
236  return Xapian::Query(stdString);
237  }
238  Xapian::Query finalQ(Xapian::Query::OP_SYNONYM, queries.begin(), queries.end());
239  return finalQ;
240 }
T & first()
void setAutoExapand(bool autoexpand)
Set if each word in the string should be treated as a partial word and should be expanded to every po...
NormalizationForm_KD
QStringList split(const QString &sep, QString::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
Mark_NonSpacing
Q_SCRIPTABLE Q_NOREPLY void start()
void push_back(const T &value)
QString normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const const
void reserve(int alloc)
int size() const const
Akonadi search infrastructure.
Definition: core/query.h:20
SkipEmptyParts
bool isEmpty() const const
std::string toStdString() const const
QByteArray toUtf8() const const
bool isEmpty() const const
QString toLower() const const
Search term.
Definition: term.h:26
const char * constData() const const
void clear()
QList::iterator begin()
int size() const const
int length() const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
Xapian::Query expandWord(const QString &word, const QString &prefix=QString())
Expands word to every possible option which it can be expanded to.
QList::iterator end()
QString mid(int position, int n) const const
void pop_back()
T takeLast()
const QList< QKeySequence > & end()
QString & append(QChar ch)
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Wed Nov 29 2023 04:08:49 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.