KCoreAddons

kfuzzymatcher.cpp
1 /*
2  This file is part of the KDE libraries
3 
4  SPDX-FileCopyrightText: 2017 Forrest Smith <[email protected]>
5  SPDX-FileCopyrightText: 2021 Waqar Ahmed <[email protected]>
6 
7  SPDX-License-Identifier: LGPL-2.0-or-later
8 */
9 #include "kfuzzymatcher.h"
10 
11 #include <QString>
12 #include <QStringView>
13 #include <QVector>
14 
15 /**
16  * Custom toLower function which is much faster than
17  * c.toLower() directly
18  */
19 static inline constexpr QChar toLower(QChar c)
20 {
21  return c.isLower() ? c : c.toLower();
22 }
23 
24 // internal
25 // clang-format off
26 static bool match_recursive(QStringView::const_iterator pattern,
28  int &outScore,
29  const QStringView::const_iterator strBegin,
30  const QStringView::const_iterator strEnd,
31  const QStringView::const_iterator patternEnd,
32  const uint8_t *srcMatches,
33  uint8_t *matches,
34  int nextMatch,
35  int &totalMatches,
36  int &recursionCount)
37 {
38  static constexpr int recursionLimit = 10;
39  // max number of matches allowed, this should be enough
40  static constexpr int maxMatches = 256;
41 
42  // Count recursions
43  ++recursionCount;
44  if (recursionCount >= recursionLimit) {
45  return false;
46  }
47 
48  // Detect end of strings
49  if (pattern == patternEnd || str == strEnd) {
50  return false;
51  }
52 
53  // Recursion params
54  bool recursiveMatch = false;
55  uint8_t bestRecursiveMatches[maxMatches];
56  int bestRecursiveScore = 0;
57 
58  // Loop through pattern and str looking for a match
59  bool firstMatch = true;
60  QChar currentPatternChar = toLower(*pattern);
61  // Are we matching in sequence start from start?
62  bool matchingInSequence = true;
63  while (pattern != patternEnd && str != strEnd) {
64  // Found match
65  if (currentPatternChar == toLower(*str)) {
66  // Supplied matches buffer was too short
67  if (nextMatch >= maxMatches) {
68  return false;
69  }
70 
71  // "Copy-on-Write" srcMatches into matches
72  if (firstMatch && srcMatches) {
73  memcpy(matches, srcMatches, nextMatch);
74  firstMatch = false;
75  }
76 
77  // Recursive call that "skips" this match
78  uint8_t recursiveMatches[maxMatches];
79  int recursiveScore = 0;
80  const auto strNextChar = std::next(str);
81  if (!matchingInSequence && match_recursive(pattern, strNextChar, recursiveScore, strBegin,
82  strEnd, patternEnd, matches, recursiveMatches,
83  nextMatch, totalMatches, recursionCount)) {
84  // Pick best recursive score
85  if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
86  memcpy(bestRecursiveMatches, recursiveMatches, maxMatches);
87  bestRecursiveScore = recursiveScore;
88  }
89  recursiveMatch = true;
90  }
91 
92  // Advance
93  matches[nextMatch++] = (uint8_t)(std::distance(strBegin, str));
94  ++pattern;
95  currentPatternChar = toLower(*pattern);
96  } else {
97  matchingInSequence = false;
98  }
99  ++str;
100  }
101 
102  // Determine if full pattern was matched
103  const bool matched = pattern == patternEnd;
104 
105  // Calculate score
106  if (matched) {
107  static constexpr int sequentialBonus = 25;
108  static constexpr int separatorBonus = 25; // bonus if match occurs after a separator
109  static constexpr int camelBonus = 25; // bonus if match is uppercase and prev is lower
110  static constexpr int firstLetterBonus = 15; // bonus if the first letter is matched
111 
112  static constexpr int leadingLetterPenalty = -5; // penalty applied for every letter in str before the first match
113  static constexpr int maxLeadingLetterPenalty = -15; // maximum penalty for leading letters
114  static constexpr int unmatchedLetterPenalty = -1; // penalty for every letter that doesn't matter
115 
116  static constexpr int nonBeginSequenceBonus = 10;
117 
118  // Initialize score
119  outScore = 100;
120 
121  // Apply leading letter penalty
122  const int penalty = std::max(leadingLetterPenalty * matches[0], maxLeadingLetterPenalty);
123 
124  outScore += penalty;
125 
126  // Apply unmatched penalty
127  const int unmatched = (int)(std::distance(strBegin, strEnd)) - nextMatch;
128  outScore += unmatchedLetterPenalty * unmatched;
129 
130  uint8_t seqs[maxMatches] = {0};
131 
132  // Apply ordering bonuses
133  int j = 0;
134  for (int i = 0; i < nextMatch; ++i) {
135  const uint8_t currIdx = matches[i];
136 
137  if (i > 0) {
138  const uint8_t prevIdx = matches[i - 1];
139 
140  // Sequential
141  if (currIdx == (prevIdx + 1)) {
142  if (j > 0 && seqs[j - 1] == i - 1){
143  outScore += sequentialBonus;
144  seqs[j++] = i;
145  } else {
146  // In sequence, but from first char
147  outScore += nonBeginSequenceBonus;
148  }
149  }
150  }
151 
152  // Check for bonuses based on neighbor character value
153  if (currIdx > 0) {
154  // Camel case
155  const QChar neighbor = *(strBegin + currIdx - 1);
156  const QChar curr = *(strBegin + currIdx);
157  if (neighbor.isLower() && curr.isUpper()) {
158  outScore += camelBonus;
159  }
160 
161  // Separator
162  const bool neighborSeparator = neighbor == QLatin1Char('_') || neighbor == QLatin1Char(' ');
163  if (neighborSeparator) {
164  outScore += separatorBonus;
165  }
166  } else {
167  // First letter
168  outScore += firstLetterBonus;
169  seqs[j++] = i;
170  }
171  }
172  }
173 
174  totalMatches = nextMatch;
175 
176  // Return best result
177  if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
178  // Recursive score is better than "this"
179  memcpy(matches, bestRecursiveMatches, maxMatches);
180  outScore = bestRecursiveScore;
181  return true;
182  } else if (matched) {
183  // "this" score is better than recursive
184  return true;
185  } else {
186  // no match
187  return false;
188  }
189 }
190 // clang-format on
191 
192 static bool match_internal(QStringView pattern, QStringView str, int &outScore, unsigned char *matches)
193 {
194  if (pattern.isEmpty()) {
195  return true;
196  }
197 
198  int recursionCount = 0;
199 
200  auto strIt = str.cbegin();
201  auto patternIt = pattern.cbegin();
202  const auto patternEnd = pattern.cend();
203  const auto strEnd = str.cend();
204 
205  int total = 0;
206  return match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, 0, total, recursionCount);
207 }
208 
209 /**************************************************************/
210 
212 {
213  auto patternIt = pattern.cbegin();
214  /**
215  * Instead of doing
216  *
217  * strIt.toLower() == patternIt.toLower()
218  *
219  * we convert patternIt to Upper / Lower as needed and compare with strIt. This
220  * saves us from calling toLower() on both strings, making things a little bit faster
221  */
222  bool lower = patternIt->isLower();
223  QChar cUp = lower ? patternIt->toUpper() : *patternIt;
224  QChar cLow = lower ? *patternIt : patternIt->toLower();
225  for (auto strIt = str.cbegin(); strIt != str.cend() && patternIt != pattern.cend(); ++strIt) {
226  if (*strIt == cLow || *strIt == cUp) {
227  ++patternIt;
228  lower = patternIt->isLower();
229  cUp = lower ? patternIt->toUpper() : *patternIt;
230  cLow = lower ? *patternIt : patternIt->toLower();
231  }
232  }
233 
234  return patternIt == pattern.cend();
235 }
236 
238 {
239  /**
240  * Simple substring matching to flush out non-matching strings
241  */
242  const bool simpleMatch = matchSimple(pattern, str);
243 
244  KFuzzyMatcher::Result result;
245  result.matched = false;
246  result.score = 0;
247 
248  if (!simpleMatch) {
249  return result;
250  }
251 
252  // actual algorithm
253  int score = 0;
254  uint8_t matches[256];
255  const bool matched = match_internal(pattern, str, score, matches);
256  result.matched = matched;
257  result.score = score;
258  return result;
259 }
260 
262 {
264  if (pattern.isEmpty()) {
265  return ranges;
266  }
267 
268  int totalMatches = 0;
269  int score = 0;
270  int recursionCount = 0;
271 
272  auto strIt = str.cbegin();
273  auto patternIt = pattern.cbegin();
274  const auto patternEnd = pattern.cend();
275  const auto strEnd = str.cend();
276 
277  uint8_t matches[256];
278  auto res = match_recursive(patternIt, strIt, score, strIt, strEnd, patternEnd, nullptr, matches, 0, totalMatches, recursionCount);
279  // didn't match? => We don't care about results
280  if (!res && type == RangeType::FullyMatched) {
281  return {};
282  }
283 
284  int previousMatch = 0;
285  for (int i = 0; i < totalMatches; ++i) {
286  auto matchPos = matches[i];
287  /**
288  * Check if this match is part of the previous
289  * match. If it is, we increase the length of
290  * the last range.
291  */
292  if (!ranges.isEmpty() && matchPos == previousMatch + 1) {
293  ranges.last().length++;
294  } else {
295  /**
296  * This is a new match inside the string
297  */
298  ranges.push_back({/* start: */ matchPos, /* length: */ 1});
299  }
300  previousMatch = matchPos;
301  }
302 
303  return ranges;
304 }
bool isEmpty() const const
QChar toLower() const const
T & last()
QString pattern(Mode mode=Reading)
KCOREADDONS_EXPORT QVector< KFuzzyMatcher::Range > matchedRanges(QStringView pattern, QStringView str, RangeType type=RangeType::FullyMatched)
A function which returns the positions + lengths where the pattern matched inside the str.
void push_back(const T &value)
QChar toUpper() const const
QStringView::const_iterator cbegin() const const
QStringView::const_iterator cend() const const
KCOREADDONS_EXPORT bool matchSimple(QStringView pattern, QStringView str)
Simple fuzzy matching of chars in pattern with chars in str sequentially.
QString::const_iterator cend() const const
bool isEmpty() const const
bool isUpper() const const
RangeType
The type of matches to consider when requesting for ranges.
int score
Score of this match.
Definition: kfuzzymatcher.h:88
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
This is the main function which does scored fuzzy matching.
QString::const_iterator cbegin() const const
The result of a fuzzy match.
Definition: kfuzzymatcher.h:83
typedef const_iterator
bool isLower() const const
bool matched
true if match was successful
Definition: kfuzzymatcher.h:90
This file is part of the KDE documentation.
Documentation copyright © 1996-2022 The KDE developers.
Generated on Mon Jul 4 2022 04:07:21 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.