13#include "textbreaks_p.h"
14#include "tokenizer_p.h"
18class BreakTokenizerPrivate
26 BreakTokenizerPrivate(Type s)
27 : breakFinder(new TextBreaks)
34 ~BreakTokenizerPrivate()
39 TextBreaks::Positions breaks()
const;
41 void shiftBreaks(
int from,
int offset);
42 void replace(
int pos,
int len,
const QString &newWord);
44 TextBreaks *
const breakFinder;
47 int itemPosition = -1;
48 mutable bool cacheValid;
51 bool inAddress =
false;
52 bool ignoreUppercase =
false;
56 void setBuffer(
const QString &b)
63 void regenerateCache()
const;
64 mutable TextBreaks::Positions cachedBreaks;
67void BreakTokenizerPrivate::invalidate()
73bool BreakTokenizerPrivate::hasNext()
const
75 if (itemPosition >= (breaks().size() - 1)) {
82TextBreaks::Positions BreakTokenizerPrivate::breaks()
const
91void BreakTokenizerPrivate::shiftBreaks(
int from,
int offset)
93 for (
int i = 0; i < cachedBreaks.size(); i++) {
94 if (cachedBreaks[i].
start > from) {
95 cachedBreaks[i].start = cachedBreaks[i].start - offset;
100void BreakTokenizerPrivate::regenerateCache()
const
102 if (!breakFinder || buffer.
isEmpty()) {
103 cachedBreaks = TextBreaks::Positions();
107 breakFinder->setText(buffer);
109 if (type == Sentences) {
110 cachedBreaks = breakFinder->sentenceBreaks();
111 }
else if (type == Words) {
112 cachedBreaks = breakFinder->wordBreaks();
119Token BreakTokenizerPrivate::next()
130 const TextBreaks::Positions breaks = this->breaks();
131 const TextBreaks::Position &textBreak = breaks.at(itemPosition);
133 last = {token, textBreak.start};
137void BreakTokenizerPrivate::replace(
int pos,
int len,
const QString &newWord)
139 buffer.
replace(pos, len, newWord);
140 int offset = len - newWord.
length();
142 shiftBreaks(pos, offset);
148WordTokenizer::WordTokenizer(
const QString &buffer)
149 : d(new BreakTokenizerPrivate(BreakTokenizerPrivate::Words))
154WordTokenizer::~WordTokenizer() =
default;
156bool WordTokenizer::hasNext()
const
161void WordTokenizer::setBuffer(
const QString &buffer)
163 d->setBuffer(buffer);
166Token WordTokenizer::next()
171 if (d->inAddress && n.position() > 0 && d->buffer[n.position() - 1].isSpace()) {
172 d->inAddress =
false;
176 if (!d->inAddress || hasNext()) {
177 const int pos = n.position() + n.length();
181 if ((pos + 2 < d->buffer.length()) && d->buffer[pos] ==
QLatin1Char(
':') && d->buffer[pos + 1] ==
QLatin1Char(
'/')
189QString WordTokenizer::buffer()
const
194bool WordTokenizer::isUppercase(
QStringView word)
const
196 for (
int i = 0; i < word.
length(); ++i) {
204void WordTokenizer::setIgnoreUppercase(
bool val)
206 d->ignoreUppercase = val;
209void WordTokenizer::replace(
int pos,
int len,
const QString &newWord)
211 d->replace(pos, len, newWord);
214bool WordTokenizer::isSpellcheckable()
const
216 if (d->last.isNull() || d->last.isEmpty()) {
219 if (!d->last.at(0).isLetter()) {
225 if (d->ignoreUppercase && isUppercase(d->last.token)) {
233SentenceTokenizer::SentenceTokenizer(
const QString &buffer)
234 : d(new BreakTokenizerPrivate(BreakTokenizerPrivate::Sentences))
239SentenceTokenizer::~SentenceTokenizer() =
default;
241bool SentenceTokenizer::hasNext()
const
246void SentenceTokenizer::setBuffer(
const QString &buffer)
248 d->setBuffer(buffer);
251Token SentenceTokenizer::next()
256QString SentenceTokenizer::buffer()
const
261void SentenceTokenizer::replace(
int pos,
int len,
const QString &newWord)
Q_SCRIPTABLE Q_NOREPLY void start()
bool isLetter(char32_t ucs4)
bool isUpper(char32_t ucs4)
bool isEmpty() const const
qsizetype length() const const
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
QStringView mid(qsizetype start, qsizetype length) const const
QChar at(qsizetype n) const const
qsizetype length() const const