Baloo

termgenerator.cpp
1/*
2 This file is part of the KDE Baloo project.
3 SPDX-FileCopyrightText: 2014-2015 Vishesh Handa <vhanda@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.1-or-later
6*/
7
8#include "termgenerator.h"
9
10#include <QTextBoundaryFinder>
11
12using namespace Baloo;
13
14namespace {
15
17{
18 // Remove all accents. It is important to call toLower after normalization,
19 // since some exotic unicode symbols can remain uppercase
20 const QString denormalized = str.normalized(QString::NormalizationForm_KD).toLower();
21
24 for (const auto& c : denormalized) {
25 if (!c.isMark()) {
27 }
28 }
29
31}
32
33void appendTerm(QByteArrayList &list, const QString &term)
34{
35 if (!term.isEmpty()) {
36 // Truncate the string to avoid arbitrarily long terms
37 list << QStringView(term).left(TermGenerator::maxTermSize).toUtf8();
38 }
39}
40
41}
42
43TermGenerator::TermGenerator(Document& doc)
44 : m_doc(doc)
45 , m_position(1)
46{
47}
48
49void TermGenerator::indexText(const QString& text)
50{
51 indexText(text, QByteArray());
52}
53
54QByteArrayList TermGenerator::termList(const QString& text_)
55{
56 QString text(text_);
57 text.replace(QLatin1Char('_'), QLatin1Char(' '));
58
59 int start = 0;
60
61 auto isSkipChar = [] (const QChar& c) {
62 return c.isPunct() || c.isMark() || c.isSpace() || (!c.isPrint() && !c.isSurrogate());
63 };
64
67 for (; bf.position() != -1; bf.toNextBoundary()) {
68 int end = bf.position();
69 while (start < end && isSkipChar(text[start])) {
70 start++;
71 }
72 if (end == start) {
73 continue;
74 }
75
76 // Typically we commit a term when we have an EndOfItem, starting
77 // from the last StartOfItem, everything between last EndOfItem and
78 // StartOfItem is just whitespace and punctuation. Unfortunately,
79 // most CJK characters do not trigger a StartOfItem and thus no
80 // EndOfItem, so everything in front of a StartOfItem has to be
81 // committed as well
82 bool commit = bf.boundaryReasons() & (QTextBoundaryFinder::EndOfItem | QTextBoundaryFinder::StartOfItem);
83
84 // Also commit term if end-of-text is reached or when we find
85 // any punctuation
86 if (!commit & (end == text.size() || isSkipChar(text[end]))) {
87 commit = true;
88 }
89
90 if (commit) {
91 const QString term = normalizeTerm(text.mid(start, end - start));
92 appendTerm(list, term);
93 start = end;
94 }
95 }
96 return list;
97}
98
99void TermGenerator::indexText(const QString& text, const QByteArray& prefix)
100{
101 const QByteArrayList terms = termList(text);
102 if (terms.size() == 1) {
103 QByteArray finalArr = prefix + terms[0];
104 m_doc.addTerm(finalArr);
105 return;
106 }
107 for (const QByteArray& term : terms) {
108 QByteArray finalArr = prefix + term;
109
110 m_doc.addPositionTerm(finalArr, m_position);
111 m_position++;
112 }
113 m_position++;
114}
115
116void TermGenerator::indexFileNameText(const QString& text)
117{
118 const QByteArray prefix = QByteArrayLiteral("F");
119 const QByteArrayList terms = termList(text);
120 if (terms.size() == 1) {
121 QByteArray finalArr = prefix + terms[0];
122 m_doc.addFileNameTerm(finalArr);
123 return;
124 }
125 for (const QByteArray& term : terms) {
126 QByteArray finalArr = prefix + term;
127
128 m_doc.addFileNamePositionTerm(finalArr, m_position);
129 m_position++;
130 }
131 m_position++;
132}
133
134void TermGenerator::indexXattrText(const QString& text, const QByteArray& prefix)
135{
136 const QByteArrayList terms = termList(text);
137 if (terms.size() == 1) {
138 QByteArray finalArr = prefix + terms[0];
139 m_doc.addXattrTerm(finalArr);
140 return;
141 }
142 for (const QByteArray& term : terms) {
143 QByteArray finalArr = prefix + term;
144
145 m_doc.addXattrPositionTerm(finalArr, m_position);
146 m_position++;
147 }
148 m_position++;
149}
150
151int TermGenerator::position() const
152{
153 return m_position;
154}
155
156void TermGenerator::setPosition(int position)
157{
158 m_position = position;
159}
Q_SCRIPTABLE Q_NOREPLY void start()
Implements storage for docIds without any associated data Instantiated for:
Definition coding.cpp:11
KIOCORE_EXPORT QStringList list(const QString &fileClass)
const QList< QKeySequence > & end()
void append(QList< T > &&value)
void reserve(qsizetype size)
qsizetype size() const const
NormalizationForm_KD
bool isEmpty() const const
QString mid(qsizetype position, qsizetype n) const const
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
qsizetype size() const const
QStringView left(qsizetype length) const const
QByteArray toUtf8() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:20:16 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.