KIO

kuriikwsfiltereng.cpp
1/*
2 This file is part of the KDE project
3 SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org>
4 SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com>
5 SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org>
6
7 Advanced web shortcuts:
8 SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at>
9
10 SPDX-License-Identifier: GPL-2.0-or-later
11*/
12
13#include "kuriikwsfiltereng_p.h"
14#include "searchprovider.h"
15
16#include <KConfig>
17#include <KConfigGroup>
18#include <kprotocolinfo.h>
19
20#include <QDBusConnection>
21#include <QLoggingCategory>
22#include <QRegularExpression>
23#include <QStringEncoder>
24
25Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws", QtWarningMsg)
26using namespace KIO;
27
28/**
29 * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator
30 * unit tests still pass (they're usually run as part of "make test").
31 */
32
33KURISearchFilterEngine::KURISearchFilterEngine()
34{
35 configure();
36 // Only after initial load, we would want to reparse the files on config changes.
37 // When the registry is constructed, it automatically loads the searchproviders
38 m_reloadRegistry = true;
40 .connect(QString(), QStringLiteral("/"), QStringLiteral("org.kde.KUriFilterPlugin"), QStringLiteral("configure"), this, SLOT(configure()));
41}
42
43KURISearchFilterEngine::~KURISearchFilterEngine() = default;
44
45// static
46QStringList KURISearchFilterEngine::defaultSearchProviders()
47{
48 static const QStringList defaultProviders{QStringLiteral("google"),
49 QStringLiteral("youtube"),
50 QStringLiteral("yahoo"),
51 QStringLiteral("wikipedia"),
52 QStringLiteral("wikit")};
53 return defaultProviders;
54}
55
56SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const
57{
58 const auto getProviderForKey = [this, &searchTerm](const QString &key) {
59 SearchProvider *provider = nullptr;
60 // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be
61 // the case if the delimiter is switched to space, see kiowidgets_space_separator_test
62 if (!key.isEmpty() && (key.contains(QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(key, false))) {
63 provider = m_registry.findByKey(key);
64 if (provider) {
65 if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(provider->desktopEntryName())) {
66 qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm;
67 } else {
68 provider = nullptr;
69 }
70 }
71 }
72 return provider;
73 };
74
75 SearchProvider *provider = nullptr;
76 if (m_bWebShortcutsEnabled) {
77 QString key;
78 if (typedString.contains(QLatin1Char('!'))) {
79 const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)"));
80 const auto match = bangRegex.match(typedString);
81 if (match.hasMatch() && match.lastCapturedIndex() == 1) {
82 key = match.captured(1);
83 searchTerm = QString(typedString).remove(bangRegex);
84 }
85 }
86
87 // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained
88 // in the query. To avoid not returning any results we check if we can find a provider for the key, if not
89 // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660
90 if (!key.isEmpty()) {
91 provider = getProviderForKey(key);
92 if (!provider) {
93 key.clear();
94 }
95 }
96 if (key.isEmpty()) {
97 const int pos = typedString.indexOf(QLatin1Char(m_cKeywordDelimiter));
98 if (pos > -1) {
99 key = typedString.left(pos).toLower(); // #169801
100 searchTerm = typedString.mid(pos + 1);
101 } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') {
102 key = typedString;
103 searchTerm = typedString.mid(pos + 1);
104 }
105 provider = getProviderForKey(key);
106 }
107
108 qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString;
109 }
110
111 return provider;
112}
113
114SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const
115{
116 SearchProvider *provider = nullptr;
117 const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut);
118
119 if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) {
120 // Make sure we ignore supported protocols, e.g. "smb:", "http:"
121 const int pos = typedString.indexOf(QLatin1Char(':'));
122
123 if (pos == -1 || !KProtocolInfo::isKnownProtocol(typedString.left(pos), false)) {
124 provider = m_registry.findByDesktopName(defaultSearchProvider);
125 }
126 }
127
128 return provider;
129}
130
131QByteArray KURISearchFilterEngine::name() const
132{
133 return "kuriikwsfilter";
134}
135
136char KURISearchFilterEngine::keywordDelimiter() const
137{
138 return m_cKeywordDelimiter;
139}
140
141QString KURISearchFilterEngine::defaultSearchEngine() const
142{
143 return m_defaultWebShortcut;
144}
145
146QStringList KURISearchFilterEngine::favoriteEngineList() const
147{
148 return m_preferredWebShortcuts;
149}
150
151KURISearchFilterEngine *KURISearchFilterEngine::self()
152{
153 static KURISearchFilterEngine self;
154 return &self;
155}
156
157QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const
158{
159 // Returns the number of query words
160 QString userquery = query;
161
162 // Do some pre-encoding, before we can start the work:
163 {
164 const static QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\""));
165 // Temporarily substitute spaces in quoted strings (" " -> "%20")
166 // Needed to split user query into StringList correctly.
167 int start = 0;
169 while ((match = qsexpr.match(userquery, start)).hasMatch()) {
170 QString str = match.captured(0);
171 str.replace(QLatin1Char(' '), QLatin1String("%20"));
172 userquery.replace(match.capturedStart(0), match.capturedLength(0), str);
173 start = match.capturedStart(0) + str.size(); // Move after last quote
174 }
175 }
176
177 // Split user query between spaces:
179
180 // Back-substitute quoted strings (%20 -> " "):
181 userquery.replace(QLatin1String("%20"), QLatin1String(" "));
182 l.replaceInStrings(QStringLiteral("%20"), QStringLiteral(" "));
183
184 qCDebug(category) << "Generating substitution map:\n";
185 // Generate substitution map from user query:
186 for (int i = 0; i <= l.count(); i++) {
187 int pos = 0;
188 QString v;
189
190 // Add whole user query (\{0}) to substitution map:
191 if (i == 0) {
192 v = userquery;
193 }
194 // Add partial user query items to substitution map:
195 else {
196 v = l[i - 1];
197 }
198
199 // Insert partial queries (referenced by \1 ... \n) to map:
200 map.insert(QString::number(i), v);
201
202 // Insert named references (referenced by \name) to map:
203 if ((i > 0) && (pos = v.indexOf(QLatin1Char('='))) > 0) {
204 QString s = v.mid(pos + 1);
205 QString k = v.left(pos);
206
207 // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0')
208 s.replace(QLatin1String("%5C"), QLatin1String("\\"));
209 map.insert(k, s);
210 }
211 }
212
213 return l;
214}
215
216static QString encodeString(const QString &s, QStringEncoder &codec)
217{
218 // we encode all characters, including the space character BUG: 304276
219 QByteArray encoded = QByteArray(codec.encode(s)).toPercentEncoding();
220 return QString::fromUtf8(encoded);
221}
222
223QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QStringEncoder &codec) const
224{
225 QString newurl = url;
226 QStringList ql = modifySubstitutionMap(map, userquery);
227 const int count = ql.count();
228
229 // Substitute references (\{ref1,ref2,...}) with values from user query:
230 {
231 const static QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}"));
232 // Substitute reflists (\{ref1,ref2,...}):
233 int start = 0;
235 while ((match = reflistRe.match(newurl, start)).hasMatch()) {
236 bool found = false;
237
238 // bool rest = false;
239 QString v;
240 const QString rlstring = match.captured(1);
241
242 // \{@} gets a special treatment later
243 if (rlstring == QLatin1String("@")) {
244 v = QStringLiteral("\\@");
245 found = true;
246 }
247
248 // TODO: strip whitespaces around commas
249 const QStringList refList = rlstring.split(QLatin1Char(','), Qt::SkipEmptyParts);
250
251 for (const QString &rlitem : refList) {
252 if (found) {
253 break;
254 }
255
256 const static QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)"));
257 const QRegularExpressionMatch rangeMatch = rangeRe.match(rlitem);
258 // Substitute a range of keywords
259 if (rangeMatch.hasMatch()) {
260 int first = rangeMatch.captured(1).toInt();
261 int last = rangeMatch.captured(2).toInt();
262
263 if (first == 0) {
264 first = 1;
265 }
266
267 if (last == 0) {
268 last = count;
269 }
270
271 for (int i = first; i <= last; i++) {
272 v += map[QString::number(i)] + QLatin1Char(' ');
273 // Remove used value from ql (needed for \{@}):
274 ql[i - 1].clear();
275 }
276
277 v = v.trimmed();
278 if (!v.isEmpty()) {
279 found = true;
280 }
281
282 v = encodeString(v, codec);
283 } else if (rlitem.startsWith(QLatin1Char('\"')) && rlitem.endsWith(QLatin1Char('\"'))) {
284 // Use default string from query definition:
285 found = true;
286 QString s = rlitem.mid(1, rlitem.length() - 2);
287 v = encodeString(s, codec);
288 } else if (map.contains(rlitem)) {
289 // Use value from substitution map:
290 found = true;
291 v = encodeString(map[rlitem], codec);
292
293 // Remove used value from ql (needed for \{@}):
294 const QChar c = rlitem.at(0); // rlitem can't be empty at this point
295 if (c == QLatin1Char('0')) {
296 // It's a numeric reference to '0'
297 for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) {
298 (*it).clear();
299 }
300 } else if ((c >= QLatin1String("0")) && (c <= QLatin1String("9"))) { // krazy:excludeall=doublequote_chars
301 // It's a numeric reference > '0'
302 int n = rlitem.toInt();
303 ql[n - 1].clear();
304 } else {
305 // It's a alphanumeric reference
307 while ((it != ql.end()) && !it->startsWith(rlitem + QLatin1Char('='))) {
308 ++it;
309 }
310 if (it != ql.end()) {
311 it->clear();
312 }
313 }
314
315 // Encode '+', otherwise it would be interpreted as space in the resulting url:
316 v.replace(QLatin1Char('+'), QLatin1String("%2B"));
317 } else if (rlitem == QLatin1String("@")) {
318 v = QStringLiteral("\\@");
319 }
320 }
321
322 newurl.replace(match.capturedStart(0), match.capturedLength(0), v);
323 start = match.capturedStart(0) + v.size();
324 }
325
326 // Special handling for \{@};
327 {
328 // Generate list of unmatched strings:
329 QString v = ql.join(QLatin1Char(' ')).simplified();
330 v = encodeString(v, codec);
331
332 // Substitute \{@} with list of unmatched query strings
333 newurl.replace(QLatin1String("\\@"), v);
334 }
335 }
336
337 return newurl;
338}
339
340QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const
341{
342 SubstMap map;
343 return formatResult(url, cset1, cset2, query, isMalformed, map);
344}
345
346QUrl KURISearchFilterEngine::formatResult(const QString &url,
347 const QString &cset1,
348 const QString &cset2,
349 const QString &userquery,
350 bool /* isMalformed */,
351 SubstMap &map) const
352{
353 // Return nothing if userquery is empty and it contains
354 // substitution strings...
355 if (userquery.isEmpty() && url.indexOf(QLatin1String("\\{")) > 0) {
356 return QUrl();
357 }
358
359 // Create a codec for the desired encoding so that we can transcode the user's "url".
360 QString cseta = cset1;
361 if (cseta.isEmpty()) {
362 cseta = QStringLiteral("UTF-8");
363 }
364
365 QStringEncoder csetacodec(cseta.toLatin1().constData());
366 if (!csetacodec.isValid()) {
367 cseta = QStringLiteral("UTF-8");
369 }
370
371 // Add charset indicator for the query to substitution map:
372 map.insert(QStringLiteral("ikw_charset"), cseta);
373
374 // Add charset indicator for the fallback query to substitution map:
375 QString csetb = cset2;
376 if (csetb.isEmpty()) {
377 csetb = QStringLiteral("UTF-8");
378 }
379 map.insert(QStringLiteral("wsc_charset"), csetb);
380
381 QString newurl = substituteQuery(url, map, userquery, csetacodec);
382
383 return QUrl(newurl, QUrl::StrictMode);
384}
385
386void KURISearchFilterEngine::configure()
387{
388 qCDebug(category) << "Keywords Engine: Loading config...";
389
390 // Load the config.
392 KConfigGroup group = config.group(QStringLiteral("General"));
393
394 m_cKeywordDelimiter = group.readEntry("KeywordDelimiter", ":").at(0).toLatin1();
395 m_bWebShortcutsEnabled = group.readEntry("EnableWebShortcuts", true);
396 m_defaultWebShortcut = group.readEntry("DefaultWebShortcut", "duckduckgo");
397 m_bUseOnlyPreferredWebShortcuts = group.readEntry("UsePreferredWebShortcutsOnly", false);
398
399 QStringList defaultPreferredShortcuts;
400 if (!group.hasKey("PreferredWebShortcuts")) {
401 defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders();
402 }
403 m_preferredWebShortcuts = group.readEntry("PreferredWebShortcuts", defaultPreferredShortcuts);
404
405 // Use either a white space or a : as the keyword delimiter...
406 if (strchr(" :", m_cKeywordDelimiter) == nullptr) {
407 m_cKeywordDelimiter = ':';
408 }
409
410 qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled;
411 qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut;
412 qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter;
413 if (m_reloadRegistry) {
414 m_registry.reload();
415 }
416}
417
418SearchProviderRegistry *KURISearchFilterEngine::registry()
419{
420 return &m_registry;
421}
422
423#include "moc_kuriikwsfiltereng_p.cpp"
KConfigGroup group(const QString &group)
bool hasKey(const char *key) const
QString readEntry(const char *key, const char *aDefault=nullptr) const
static bool isKnownProtocol(const QUrl &url)
Returns whether a protocol is installed that is able to handle url.
QString desktopEntryName() const
Returns the desktop filename of the search provider without any extension.
Q_SCRIPTABLE Q_NOREPLY void start()
KSERVICE_EXPORT KService::List query(FilterFunc filterFunc)
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
A namespace for KIO globals.
KGuiItem configure()
QString name(StandardShortcut id)
const char * constData() const const
QByteArray toPercentEncoding(const QByteArray &exclude, const QByteArray &include, char percent) const const
char toLatin1() const const
bool connect(const QString &service, const QString &path, const QString &interface, const QString &name, QObject *receiver, const char *slot)
QDBusConnection sessionBus()
typedef Iterator
iterator begin()
void clear()
qsizetype count() const const
iterator end()
QString captured(QStringView name) const const
bool hasMatch() const const
const QChar at(qsizetype position) const const
void clear()
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
QString fromUtf8(QByteArrayView str)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString left(qsizetype n) const const
QString mid(qsizetype position, qsizetype n) const const
QString number(double n, char format, int precision)
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
QString simplified() const const
qsizetype size() const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
int toInt(bool *ok, int base) const const
QByteArray toLatin1() const const
QString toLower() const const
QString trimmed() const const
DecodedData< QStringView > encode(QStringView in)
QString join(QChar separator) const const
QStringList & replaceInStrings(QStringView before, QStringView after, Qt::CaseSensitivity cs)
SkipEmptyParts
QFuture< void > map(Iterator begin, Iterator end, MapFunctor &&function)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:18:52 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.