KCoreAddons

kfuzzymatcher.cpp
1/*
2 This file is part of the KDE libraries
3
4 SPDX-FileCopyrightText: 2017 Forrest Smith <forrestthewoods@gmail.com>
5 SPDX-FileCopyrightText: 2021 Waqar Ahmed <waqar.17a@gmail.com>
6
7 SPDX-License-Identifier: LGPL-2.0-or-later
8*/
9#include "kfuzzymatcher.h"
10
11#include <QList>
12#include <QString>
13#include <QStringView>
14
15/**
16 * Custom toLower function which is much faster than
17 * c.toLower() directly
18 */
19static inline constexpr QChar toLower(QChar c)
20{
21 return c.isLower() ? c : c.toLower();
22}
23
24// internal
25// clang-format off
26static bool match_recursive(QStringView::const_iterator pattern,
28 int &outScore,
29 const QStringView::const_iterator strBegin,
30 const QStringView::const_iterator strEnd,
31 const QStringView::const_iterator patternEnd,
32 const uint8_t *srcMatches,
33 uint8_t *matches,
34 int nextMatch,
35 int &totalMatches,
36 int &recursionCount)
37{
38 static constexpr int recursionLimit = 10;
39 // max number of matches allowed, this should be enough
40 static constexpr int maxMatches = 256;
41
42 // Count recursions
43 ++recursionCount;
44 if (recursionCount >= recursionLimit) {
45 return false;
46 }
47
48 // Detect end of strings
49 if (pattern == patternEnd || str == strEnd) {
50 return false;
51 }
52
53 // Recursion params
54 bool recursiveMatch = false;
55 uint8_t bestRecursiveMatches[maxMatches];
56 int bestRecursiveScore = 0;
57
58 // Loop through pattern and str looking for a match
59 bool firstMatch = true;
60 QChar currentPatternChar = toLower(*pattern);
61 // Are we matching in sequence start from start?
62 bool matchingInSequence = true;
63 while (pattern != patternEnd && str != strEnd) {
64 // Found match
65 if (currentPatternChar == toLower(*str)) {
66 // Supplied matches buffer was too short
67 if (nextMatch >= maxMatches) {
68 return false;
69 }
70
71 // "Copy-on-Write" srcMatches into matches
72 if (firstMatch && srcMatches) {
73 memcpy(matches, srcMatches, nextMatch);
74 firstMatch = false;
75 }
76
77 // Recursive call that "skips" this match
78 uint8_t recursiveMatches[maxMatches];
79 int recursiveScore = 0;
80 const auto strNextChar = std::next(str);
81 if (!matchingInSequence && match_recursive(pattern, strNextChar, recursiveScore, strBegin,
82 strEnd, patternEnd, matches, recursiveMatches,
83 nextMatch, totalMatches, recursionCount)) {
84 // Pick best recursive score
85 if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
86 memcpy(bestRecursiveMatches, recursiveMatches, maxMatches);
87 bestRecursiveScore = recursiveScore;
88 }
89 recursiveMatch = true;
90 }
91
92 // Advance
93 matches[nextMatch++] = (uint8_t)(std::distance(strBegin, str));
94 ++pattern;
95 currentPatternChar = toLower(*pattern);
96 } else {
97 matchingInSequence = false;
98 }
99 ++str;
100 }
101
102 // Determine if full pattern was matched
103 const bool matched = pattern == patternEnd;
104
105 // Calculate score
106 if (matched) {
107 static constexpr int sequentialBonus = 25;
108 static constexpr int separatorBonus = 25; // bonus if match occurs after a separator
109 static constexpr int camelBonus = 25; // bonus if match is uppercase and prev is lower
110 static constexpr int firstLetterBonus = 15; // bonus if the first letter is matched
111
112 static constexpr int leadingLetterPenalty = -5; // penalty applied for every letter in str before the first match
113 static constexpr int maxLeadingLetterPenalty = -15; // maximum penalty for leading letters
114 static constexpr int unmatchedLetterPenalty = -1; // penalty for every letter that doesn't matter
115
116 static constexpr int nonBeginSequenceBonus = 10;
117
118 // Initialize score
119 outScore = 100;
120
121 // Apply leading letter penalty
122 const int penalty = std::max(leadingLetterPenalty * matches[0], maxLeadingLetterPenalty);
123
124 outScore += penalty;
125
126 // Apply unmatched penalty
127 const int unmatched = (int)(std::distance(strBegin, strEnd)) - nextMatch;
128 outScore += unmatchedLetterPenalty * unmatched;
129
130 uint8_t seqs[maxMatches] = {0};
131
132 // Apply ordering bonuses
133 int j = 0;
134 for (int i = 0; i < nextMatch; ++i) {
135 const uint8_t currIdx = matches[i];
136
137 if (i > 0) {
138 const uint8_t prevIdx = matches[i - 1];
139
140 // Sequential
141 if (currIdx == (prevIdx + 1)) {
142 if (j > 0 && seqs[j - 1] == i - 1){
143 outScore += sequentialBonus;
144 seqs[j++] = i;
145 } else {
146 // In sequence, but from first char
147 outScore += nonBeginSequenceBonus;
148 }
149 }
150 }
151
152 // Check for bonuses based on neighbor character value
153 if (currIdx > 0) {
154 // Camel case
155 const QChar neighbor = *(strBegin + currIdx - 1);
156 const QChar curr = *(strBegin + currIdx);
157 if (neighbor.isLower() && curr.isUpper()) {
158 outScore += camelBonus;
159 }
160
161 // Separator
162 const bool neighborSeparator = neighbor == QLatin1Char('_') || neighbor == QLatin1Char(' ');
163 if (neighborSeparator) {
164 outScore += separatorBonus;
165 }
166 } else {
167 // First letter
168 outScore += firstLetterBonus;
169 seqs[j++] = i;
170 }
171 }
172 }
173
174 totalMatches = nextMatch;
175
176 // Return best result
177 if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
178 // Recursive score is better than "this"
179 memcpy(matches, bestRecursiveMatches, maxMatches);
180 outScore = bestRecursiveScore;
181 return true;
182 } else if (matched) {
183 // "this" score is better than recursive
184 return true;
185 } else {
186 // no match
187 return false;
188 }
189}
190// clang-format on
191
192static bool match_internal(QStringView pattern, QStringView str, int &outScore, unsigned char *matches)
193{
194 if (pattern.isEmpty()) {
195 return true;
196 }
197
198 int recursionCount = 0;
199
200 auto strIt = str.cbegin();
201 auto patternIt = pattern.cbegin();
202 const auto patternEnd = pattern.cend();
203 const auto strEnd = str.cend();
204
205 int total = 0;
206 return match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, 0, total, recursionCount);
207}
208
209/**************************************************************/
210
212{
213 auto patternIt = pattern.cbegin();
214 /**
215 * Instead of doing
216 *
217 * strIt.toLower() == patternIt.toLower()
218 *
219 * we convert patternIt to Upper / Lower as needed and compare with strIt. This
220 * saves us from calling toLower() on both strings, making things a little bit faster
221 */
222 bool lower = patternIt->isLower();
223 QChar cUp = lower ? patternIt->toUpper() : *patternIt;
224 QChar cLow = lower ? *patternIt : patternIt->toLower();
225 for (auto strIt = str.cbegin(); strIt != str.cend() && patternIt != pattern.cend(); ++strIt) {
226 if (*strIt == cLow || *strIt == cUp) {
227 ++patternIt;
228 lower = patternIt->isLower();
229 cUp = lower ? patternIt->toUpper() : *patternIt;
230 cLow = lower ? *patternIt : patternIt->toLower();
231 }
232 }
233
234 return patternIt == pattern.cend();
235}
236
238{
239 /**
240 * Simple substring matching to flush out non-matching strings
241 */
242 const bool simpleMatch = matchSimple(pattern, str);
243
245 result.matched = false;
246 result.score = 0;
247
248 if (!simpleMatch) {
249 return result;
250 }
251
252 // actual algorithm
253 int score = 0;
254 uint8_t matches[256];
255 const bool matched = match_internal(pattern, str, score, matches);
256 result.matched = matched;
257 result.score = score;
258 return result;
259}
260
262{
264 if (pattern.isEmpty()) {
265 return ranges;
266 }
267
268 int totalMatches = 0;
269 int score = 0;
270 int recursionCount = 0;
271
272 auto strIt = str.cbegin();
273 auto patternIt = pattern.cbegin();
274 const auto patternEnd = pattern.cend();
275 const auto strEnd = str.cend();
276
277 uint8_t matches[256];
278 auto res = match_recursive(patternIt, strIt, score, strIt, strEnd, patternEnd, nullptr, matches, 0, totalMatches, recursionCount);
279 // didn't match? => We don't care about results
280 if (!res && type == RangeType::FullyMatched) {
281 return {};
282 }
283
284 int previousMatch = 0;
285 for (int i = 0; i < totalMatches; ++i) {
286 auto matchPos = matches[i];
287 /**
288 * Check if this match is part of the previous
289 * match. If it is, we increase the length of
290 * the last range.
291 */
292 if (!ranges.isEmpty() && matchPos == previousMatch + 1) {
293 ranges.last().length++;
294 } else {
295 /**
296 * This is a new match inside the string
297 */
298 ranges.push_back({/* start: */ matchPos, /* length: */ 1});
299 }
300 previousMatch = matchPos;
301 }
302
303 return ranges;
304}
KCOREADDONS_EXPORT bool matchSimple(QStringView pattern, QStringView str)
Simple fuzzy matching of chars in pattern with chars in str sequentially.
RangeType
The type of matches to consider when requesting for ranges.
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
This is the main function which does scored fuzzy matching.
KCOREADDONS_EXPORT QList< KFuzzyMatcher::Range > matchedRanges(QStringView pattern, QStringView str, RangeType type=RangeType::FullyMatched)
A function which returns the positions + lengths where the pattern matched inside the str.
bool isLower(char32_t ucs4)
bool isUpper(char32_t ucs4)
char32_t toLower(char32_t ucs4)
bool isEmpty() const const
T & last()
void push_back(parameter_type value)
const_iterator cbegin() const const
const_iterator cend() const const
typedef const_iterator
bool isEmpty() const const
The result of a fuzzy match.
int score
Score of this match.
bool matched
true if match was successful
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Jun 14 2024 11:58:27 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.