KCoreAddons

kfuzzymatcher.cpp
1 /*
2  This file is part of the KDE libraries
3 
4  SPDX-FileCopyrightText: 2017 Forrest Smith <[email protected]>
5  SPDX-FileCopyrightText: 2021 Waqar Ahmed <[email protected]>
6 
7  SPDX-License-Identifier: LGPL-2.0-or-later
8 */
9 #include "kfuzzymatcher.h"
10 
11 #include <QString>
12 #include <QStringView>
13 
14 /**
15  * Custom toLower function which is much faster than
16  * c.toLower() directly
17  */
18 static inline constexpr QChar toLower(QChar c)
19 {
20  return c.isLower() ? c : c.toLower();
21 }
22 
23 // internal
24 // clang-format off
25 static bool match_recursive(QStringView::const_iterator pattern,
27  int &outScore,
28  const QStringView::const_iterator strBegin,
29  const QStringView::const_iterator strEnd,
30  const QStringView::const_iterator patternEnd,
31  const uint8_t *srcMatches,
32  uint8_t *matches,
33  int nextMatch,
34  int &recursionCount)
35 {
36  static constexpr int recursionLimit = 10;
37  // max number of matches allowed, this should be enough
38  static constexpr int maxMatches = 256;
39 
40  // Count recursions
41  ++recursionCount;
42  if (recursionCount >= recursionLimit) {
43  return false;
44  }
45 
46  // Detect end of strings
47  if (pattern == patternEnd || str == strEnd) {
48  return false;
49  }
50 
51  // Recursion params
52  bool recursiveMatch = false;
53  uint8_t bestRecursiveMatches[maxMatches];
54  int bestRecursiveScore = 0;
55 
56  // Loop through pattern and str looking for a match
57  bool firstMatch = true;
58  QChar currentPatternChar = toLower(*pattern);
59  // Are we matching in sequence start from start?
60  bool matchingInSequence = true;
61  while (pattern != patternEnd && str != strEnd) {
62  // Found match
63  if (currentPatternChar == toLower(*str)) {
64  // Supplied matches buffer was too short
65  if (nextMatch >= maxMatches) {
66  return false;
67  }
68 
69  // "Copy-on-Write" srcMatches into matches
70  if (firstMatch && srcMatches) {
71  memcpy(matches, srcMatches, nextMatch);
72  firstMatch = false;
73  }
74 
75  // Recursive call that "skips" this match
76  uint8_t recursiveMatches[maxMatches];
77  int recursiveScore = 0;
78  const auto strNextChar = std::next(str);
79  if (!matchingInSequence && match_recursive(pattern, strNextChar, recursiveScore, strBegin,
80  strEnd, patternEnd, matches, recursiveMatches,
81  nextMatch, recursionCount)) {
82  // Pick best recursive score
83  if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
84  memcpy(bestRecursiveMatches, recursiveMatches, maxMatches);
85  bestRecursiveScore = recursiveScore;
86  }
87  recursiveMatch = true;
88  }
89 
90  // Advance
91  matches[nextMatch++] = (uint8_t)(std::distance(strBegin, str));
92  ++pattern;
93  currentPatternChar = toLower(*pattern);
94  } else {
95  matchingInSequence = false;
96  }
97  ++str;
98  }
99 
100  // Determine if full pattern was matched
101  const bool matched = pattern == patternEnd;
102 
103  // Calculate score
104  if (matched) {
105  static constexpr int sequentialBonus = 25;
106  static constexpr int separatorBonus = 25; // bonus if match occurs after a separator
107  static constexpr int camelBonus = 25; // bonus if match is uppercase and prev is lower
108  static constexpr int firstLetterBonus = 15; // bonus if the first letter is matched
109 
110  static constexpr int leadingLetterPenalty = -5; // penalty applied for every letter in str before the first match
111  static constexpr int maxLeadingLetterPenalty = -15; // maximum penalty for leading letters
112  static constexpr int unmatchedLetterPenalty = -1; // penalty for every letter that doesn't matter
113 
114  static constexpr int nonBeginSequenceBonus = 10;
115 
116  // Initialize score
117  outScore = 100;
118 
119  // Apply leading letter penalty
120  const int penalty = std::max(leadingLetterPenalty * matches[0], maxLeadingLetterPenalty);
121 
122  outScore += penalty;
123 
124  // Apply unmatched penalty
125  const int unmatched = (int)(std::distance(strBegin, strEnd)) - nextMatch;
126  outScore += unmatchedLetterPenalty * unmatched;
127 
128  uint8_t seqs[maxMatches] = {0};
129 
130  // Apply ordering bonuses
131  int j = 0;
132  for (int i = 0; i < nextMatch; ++i) {
133  const uint8_t currIdx = matches[i];
134 
135  if (i > 0) {
136  const uint8_t prevIdx = matches[i - 1];
137 
138  // Sequential
139  if (currIdx == (prevIdx + 1)) {
140  if (j > 0 && seqs[j - 1] == i - 1){
141  outScore += sequentialBonus;
142  seqs[j++] = i;
143  } else {
144  // In sequence, but from first char
145  outScore += nonBeginSequenceBonus;
146  }
147  }
148  }
149 
150  // Check for bonuses based on neighbor character value
151  if (currIdx > 0) {
152  // Camel case
153  const QChar neighbor = *(strBegin + currIdx - 1);
154  const QChar curr = *(strBegin + currIdx);
155  if (neighbor.isLower() && curr.isUpper()) {
156  outScore += camelBonus;
157  }
158 
159  // Separator
160  const bool neighborSeparator = neighbor == QLatin1Char('_') || neighbor == QLatin1Char(' ');
161  if (neighborSeparator) {
162  outScore += separatorBonus;
163  }
164  } else {
165  // First letter
166  outScore += firstLetterBonus;
167  seqs[j++] = i;
168  }
169  }
170  }
171 
172  // Return best result
173  if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
174  // Recursive score is better than "this"
175  memcpy(matches, bestRecursiveMatches, maxMatches);
176  outScore = bestRecursiveScore;
177  return true;
178  } else if (matched) {
179  // "this" score is better than recursive
180  return true;
181  } else {
182  // no match
183  return false;
184  }
185 }
186 // clang-format on
187 
188 static bool match_internal(QStringView pattern, QStringView str, int &outScore, unsigned char *matches)
189 {
190  if (pattern.isEmpty()) {
191  return true;
192  }
193 
194  int recursionCount = 0;
195 
196  auto strIt = str.cbegin();
197  auto patternIt = pattern.cbegin();
198  const auto patternEnd = pattern.cend();
199  const auto strEnd = str.cend();
200 
201  return match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, 0, recursionCount);
202 }
203 
204 /**************************************************************/
205 
207 {
208  auto patternIt = pattern.cbegin();
209  /**
210  * Instead of doing
211  *
212  * strIt.toLower() == patternIt.toLower()
213  *
214  * we convert patternIt to Upper / Lower as needed and compare with strIt. This
215  * saves us from calling toLower() on both strings, making things a little bit faster
216  */
217  bool lower = patternIt->isLower();
218  QChar cUp = lower ? patternIt->toUpper() : *patternIt;
219  QChar cLow = lower ? *patternIt : patternIt->toLower();
220  for (auto strIt = str.cbegin(); strIt != str.cend() && patternIt != pattern.cend(); ++strIt) {
221  if (*strIt == cLow || *strIt == cUp) {
222  ++patternIt;
223  lower = patternIt->isLower();
224  cUp = lower ? patternIt->toUpper() : *patternIt;
225  cLow = lower ? *patternIt : patternIt->toLower();
226  }
227  }
228 
229  return patternIt == pattern.cend();
230 }
231 
233 {
234  /**
235  * Simple substring matching to flush out non-matching strings
236  */
237  const bool simpleMatch = matchSimple(pattern, str);
238 
239  KFuzzyMatcher::Result result;
240  result.matched = false;
241  result.score = 0;
242 
243  if (!simpleMatch) {
244  return result;
245  }
246 
247  // actual algorithm
248  int score = 0;
249  uint8_t matches[256];
250  const bool matched = match_internal(pattern, str, score, matches);
251  result.matched = matched;
252  result.score = score;
253  return result;
254 }
bool isEmpty() const const
The result of a fuzzy match.
Definition: kfuzzymatcher.h:81
KCOREADDONS_EXPORT bool matchSimple(QStringView pattern, QStringView str)
Simple fuzzy matching of chars in pattern with chars in str sequentially.
int score
Score of this match.
Definition: kfuzzymatcher.h:86
bool matched
true if match was successful
Definition: kfuzzymatcher.h:88
QStringView::const_iterator cbegin() const const
typedef const_iterator
bool isUpper() const const
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
This is the main function which does scored fuzzy matching.
QChar toLower() const const
QChar toUpper() const const
QStringView::const_iterator cend() const const
bool isLower() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Sun Apr 18 2021 23:02:02 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.