KCoreAddons

kfuzzymatcher.cpp
1 /*
2  This file is part of the KDE libraries
3 
4  SPDX-FileCopyrightText: 2017 Forrest Smith <[email protected]>
5  SPDX-FileCopyrightText: 2021 Waqar Ahmed <[email protected]>
6 
7  SPDX-License-Identifier: LGPL-2.0-or-later
8 */
9 #include "kfuzzymatcher.h"
10 
11 #include <QString>
12 #include <QStringView>
13 #include <QVector>
14 
15 /**
16  * Custom toLower function which is much faster than
17  * c.toLower() directly
18  */
19 static inline constexpr QChar toLower(QChar c)
20 {
21  return c.isLower() ? c : c.toLower();
22 }
23 
24 // internal
25 // clang-format off
26 static bool match_recursive(QStringView::const_iterator pattern,
28  int &outScore,
29  const QStringView::const_iterator strBegin,
30  const QStringView::const_iterator strEnd,
31  const QStringView::const_iterator patternEnd,
32  const uint8_t *srcMatches,
33  uint8_t *matches,
34  int nextMatch,
35  int &totalMatches,
36  int &recursionCount)
37 {
38  static constexpr int recursionLimit = 10;
39  // max number of matches allowed, this should be enough
40  static constexpr int maxMatches = 256;
41 
42  // Count recursions
43  ++recursionCount;
44  if (recursionCount >= recursionLimit) {
45  return false;
46  }
47 
48  // Detect end of strings
49  if (pattern == patternEnd || str == strEnd) {
50  return false;
51  }
52 
53  // Recursion params
54  bool recursiveMatch = false;
55  uint8_t bestRecursiveMatches[maxMatches];
56  int bestRecursiveScore = 0;
57 
58  // Loop through pattern and str looking for a match
59  bool firstMatch = true;
60  QChar currentPatternChar = toLower(*pattern);
61  // Are we matching in sequence start from start?
62  bool matchingInSequence = true;
63  while (pattern != patternEnd && str != strEnd) {
64  // Found match
65  if (currentPatternChar == toLower(*str)) {
66  // Supplied matches buffer was too short
67  if (nextMatch >= maxMatches) {
68  return false;
69  }
70 
71  // "Copy-on-Write" srcMatches into matches
72  if (firstMatch && srcMatches) {
73  memcpy(matches, srcMatches, nextMatch);
74  firstMatch = false;
75  }
76 
77  // Recursive call that "skips" this match
78  uint8_t recursiveMatches[maxMatches];
79  int recursiveScore = 0;
80  const auto strNextChar = std::next(str);
81  if (!matchingInSequence && match_recursive(pattern, strNextChar, recursiveScore, strBegin,
82  strEnd, patternEnd, matches, recursiveMatches,
83  nextMatch, totalMatches, recursionCount)) {
84  // Pick best recursive score
85  if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
86  memcpy(bestRecursiveMatches, recursiveMatches, maxMatches);
87  bestRecursiveScore = recursiveScore;
88  }
89  recursiveMatch = true;
90  }
91 
92  // Advance
93  matches[nextMatch++] = (uint8_t)(std::distance(strBegin, str));
94  ++pattern;
95  currentPatternChar = toLower(*pattern);
96  } else {
97  matchingInSequence = false;
98  }
99  ++str;
100  }
101 
102  // Determine if full pattern was matched
103  const bool matched = pattern == patternEnd;
104 
105  // Calculate score
106  if (matched) {
107  static constexpr int sequentialBonus = 25;
108  static constexpr int separatorBonus = 25; // bonus if match occurs after a separator
109  static constexpr int camelBonus = 25; // bonus if match is uppercase and prev is lower
110  static constexpr int firstLetterBonus = 15; // bonus if the first letter is matched
111 
112  static constexpr int leadingLetterPenalty = -5; // penalty applied for every letter in str before the first match
113  static constexpr int maxLeadingLetterPenalty = -15; // maximum penalty for leading letters
114  static constexpr int unmatchedLetterPenalty = -1; // penalty for every letter that doesn't matter
115 
116  static constexpr int nonBeginSequenceBonus = 10;
117 
118  // Initialize score
119  outScore = 100;
120 
121  // Apply leading letter penalty
122  const int penalty = std::max(leadingLetterPenalty * matches[0], maxLeadingLetterPenalty);
123 
124  outScore += penalty;
125 
126  // Apply unmatched penalty
127  const int unmatched = (int)(std::distance(strBegin, strEnd)) - nextMatch;
128  outScore += unmatchedLetterPenalty * unmatched;
129 
130  uint8_t seqs[maxMatches] = {0};
131 
132  // Apply ordering bonuses
133  int j = 0;
134  for (int i = 0; i < nextMatch; ++i) {
135  const uint8_t currIdx = matches[i];
136 
137  if (i > 0) {
138  const uint8_t prevIdx = matches[i - 1];
139 
140  // Sequential
141  if (currIdx == (prevIdx + 1)) {
142  if (j > 0 && seqs[j - 1] == i - 1){
143  outScore += sequentialBonus;
144  seqs[j++] = i;
145  } else {
146  // In sequence, but from first char
147  outScore += nonBeginSequenceBonus;
148  }
149  }
150  }
151 
152  // Check for bonuses based on neighbor character value
153  if (currIdx > 0) {
154  // Camel case
155  const QChar neighbor = *(strBegin + currIdx - 1);
156  const QChar curr = *(strBegin + currIdx);
157  if (neighbor.isLower() && curr.isUpper()) {
158  outScore += camelBonus;
159  }
160 
161  // Separator
162  const bool neighborSeparator = neighbor == QLatin1Char('_') || neighbor == QLatin1Char(' ');
163  if (neighborSeparator) {
164  outScore += separatorBonus;
165  }
166  } else {
167  // First letter
168  outScore += firstLetterBonus;
169  seqs[j++] = i;
170  }
171  }
172  }
173 
174  totalMatches = nextMatch;
175 
176  // Return best result
177  if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
178  // Recursive score is better than "this"
179  memcpy(matches, bestRecursiveMatches, maxMatches);
180  outScore = bestRecursiveScore;
181  return true;
182  } else if (matched) {
183  // "this" score is better than recursive
184  return true;
185  } else {
186  // no match
187  return false;
188  }
189 }
190 // clang-format on
191 
192 static bool match_internal(QStringView pattern, QStringView str, int &outScore, unsigned char *matches)
193 {
194  if (pattern.isEmpty()) {
195  return true;
196  }
197 
198  int recursionCount = 0;
199 
200  auto strIt = str.cbegin();
201  auto patternIt = pattern.cbegin();
202  const auto patternEnd = pattern.cend();
203  const auto strEnd = str.cend();
204 
205  int total = 0;
206  return match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, 0, total, recursionCount);
207 }
208 
209 /**************************************************************/
210 
212 {
213  auto patternIt = pattern.cbegin();
214  /**
215  * Instead of doing
216  *
217  * strIt.toLower() == patternIt.toLower()
218  *
219  * we convert patternIt to Upper / Lower as needed and compare with strIt. This
220  * saves us from calling toLower() on both strings, making things a little bit faster
221  */
222  bool lower = patternIt->isLower();
223  QChar cUp = lower ? patternIt->toUpper() : *patternIt;
224  QChar cLow = lower ? *patternIt : patternIt->toLower();
225  for (auto strIt = str.cbegin(); strIt != str.cend() && patternIt != pattern.cend(); ++strIt) {
226  if (*strIt == cLow || *strIt == cUp) {
227  ++patternIt;
228  lower = patternIt->isLower();
229  cUp = lower ? patternIt->toUpper() : *patternIt;
230  cLow = lower ? *patternIt : patternIt->toLower();
231  }
232  }
233 
234  return patternIt == pattern.cend();
235 }
236 
238 {
239  /**
240  * Simple substring matching to flush out non-matching strings
241  */
242  const bool simpleMatch = matchSimple(pattern, str);
243 
244  KFuzzyMatcher::Result result;
245  result.matched = false;
246  result.score = 0;
247 
248  if (!simpleMatch) {
249  return result;
250  }
251 
252  // actual algorithm
253  int score = 0;
254  uint8_t matches[256];
255  const bool matched = match_internal(pattern, str, score, matches);
256  result.matched = matched;
257  result.score = score;
258  return result;
259 }
260 
262 {
264  if (pattern.isEmpty()) {
265  return ranges;
266  }
267 
268  int totalMatches = 0;
269  int score = 0;
270  int recursionCount = 0;
271 
272  auto strIt = str.cbegin();
273  auto patternIt = pattern.cbegin();
274  const auto patternEnd = pattern.cend();
275  const auto strEnd = str.cend();
276 
277  uint8_t matches[256];
278  auto res = match_recursive(patternIt, strIt, score, strIt, strEnd, patternEnd, nullptr, matches, 0, totalMatches, recursionCount);
279  // didn't match? => We don't care about results
280  if (!res && type == RangeType::FullyMatched) {
281  return {};
282  }
283 
284  int previousMatch = 0;
285  for (int i = 0; i < totalMatches; ++i) {
286  auto matchPos = matches[i];
287  /**
288  * Check if this match is part of the previous
289  * match. If it is, we increase the length of
290  * the last range.
291  */
292  if (!ranges.isEmpty() && matchPos == previousMatch + 1) {
293  ranges.last().length++;
294  } else {
295  /**
296  * This is a new match inside the string
297  */
298  ranges.push_back({/* start: */ matchPos, /* length: */ 1});
299  }
300  previousMatch = matchPos;
301  }
302 
303  return ranges;
304 }
bool isEmpty() const const
The result of a fuzzy match.
Definition: kfuzzymatcher.h:83
KCOREADDONS_EXPORT bool matchSimple(QStringView pattern, QStringView str)
Simple fuzzy matching of chars in pattern with chars in str sequentially.
int score
Score of this match.
Definition: kfuzzymatcher.h:88
T & last()
bool matched
true if match was successful
Definition: kfuzzymatcher.h:90
KCOREADDONS_EXPORT QVector< KFuzzyMatcher::Range > matchedRanges(QStringView pattern, QStringView str, RangeType type=RangeType::FullyMatched)
A function which returns the positions + lengths where the pattern matched inside the str...
QStringView::const_iterator cbegin() const const
typedef const_iterator
bool isUpper() const const
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
This is the main function which does scored fuzzy matching.
QChar toLower() const const
RangeType
The type of matches to consider when requesting for ranges.
QChar toUpper() const const
bool isEmpty() const const
void push_back(const T &value)
QStringView::const_iterator cend() const const
bool isLower() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2022 The KDE developers.
Generated on Mon Jan 24 2022 22:54:37 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.