9#include "kateregexpsearch.h"
11#include <ktexteditor/document.h>
17#ifdef FAST_DEBUG_ENABLE
18#define FAST_DEBUG(x) qCDebug(LOG_KTE) << x
23class KateRegExpSearch::ReplacementStream
27 counter(
int value,
int minWidth)
55 ReplacementStream(
const QStringList &capturedTexts);
62 ReplacementStream &operator<<(
const QString &);
63 ReplacementStream &operator<<(
const counter &);
64 ReplacementStream &operator<<(
const cap &);
65 ReplacementStream &operator<<(CaseConversion);
69 CaseConversion m_caseConversion;
73KateRegExpSearch::ReplacementStream::ReplacementStream(
const QStringList &capturedTexts)
74 : m_capturedTexts(capturedTexts)
75 , m_caseConversion(keepCase)
79KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(
const QString &str)
81 switch (m_caseConversion) {
91 m_caseConversion = keepCase;
104 m_caseConversion = keepCase;
118KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(
const counter &c)
121 m_str.append(QStringLiteral(
"%1").arg(c.value, c.minWidth, 10,
QLatin1Char(
'0')));
126KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(
const cap &cap)
128 if (0 <= cap.n && cap.n < m_capturedTexts.size()) {
129 (*this) << m_capturedTexts[cap.n];
138KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(CaseConversion caseConversion)
140 m_caseConversion = caseConversion;
150 : m_document(document)
155struct TwoViewCursor {
198 const QString repairedPattern = repairPattern(pattern, stillMultiLine);
208 if (stillMultiLine) {
217 if (!repairedRegex.
isValid()) {
221 const int rangeStartLine = inputRange.
start().
line();
222 const int rangeStartCol = inputRange.
start().
column();
224 const int rangeEndLine = inputRange.
end().
line();
225 const int rangeEndCol = inputRange.
end().
column();
227 if (stillMultiLine) {
228 const int rangeLineCount = rangeEndLine - rangeStartLine + 1;
229 FAST_DEBUG(
"regular expression search (lines " << rangeStartLine <<
".." << rangeEndLine <<
")");
231 const int docLineCount = m_document->
lines();
233 if (rangeStartLine >= docLineCount) {
238 int maxMatchOffset = 0;
242 for (
int i = 0; i < rangeLineCount; ++i) {
243 const int docLineIndex = rangeStartLine + i;
244 if (docLineIndex < 0 || docLineCount <= docLineIndex) {
248 const QString textLine = m_document->
line(docLineIndex);
249 lineLens[i] = textLine.
length();
250 wholeRange.
append(textLine);
257 if (i != (rangeLineCount - 1)) {
262 maxMatchOffset += (i == rangeEndLine) ? rangeEndCol : lineLens.
at(i) + 1;
264 FAST_DEBUG(
" line" << i <<
"has length" << lineLens.
at(i));
267 FAST_DEBUG(
"Max. match offset" << maxMatchOffset);
277 match.swap(curMatch);
284 curMatch = iter.
next();
287 match.swap(curMatch);
294 FAST_DEBUG(
"not found");
303 for (
int c = 0; c <= numCaptures; ++c) {
304 const int openIndex = match.capturedStart(c);
305 IndexPair &pair = indexPairs[c];
306 if (openIndex == -1) {
309 pair.closeIndex = -1;
310 FAST_DEBUG(
"capture []");
312 const int closeIndex = match.capturedEnd(c);
313 pair.openIndex = openIndex;
314 pair.closeIndex = closeIndex;
315 FAST_DEBUG(
"capture [" << pair.openIndex <<
".." << pair.closeIndex <<
"]");
318 if (!indicesToCursors.
contains(openIndex)) {
319 TwoViewCursor *twoViewCursor =
new TwoViewCursor;
320 twoViewCursor->index = openIndex;
321 indicesToCursors.
insert(openIndex, twoViewCursor);
322 FAST_DEBUG(
" capture group start index added: " << openIndex);
324 if (!indicesToCursors.
contains(closeIndex)) {
325 TwoViewCursor *twoViewCursor =
new TwoViewCursor;
326 twoViewCursor->index = closeIndex;
327 indicesToCursors.
insert(closeIndex, twoViewCursor);
328 FAST_DEBUG(
" capture group end index added: " << closeIndex);
338 for (TwoViewCursor *twoViewCursor : std::as_const(indicesToCursors)) {
340 const int index = twoViewCursor->index;
341 FAST_DEBUG(
"resolving position" << index);
343 while (curRelIndex <= index) {
344 FAST_DEBUG(
"walk pos (" << curRelLine <<
"," << curRelCol <<
") = " << curRelIndex <<
"relative, steps more to go" << index - curRelIndex);
346 const int curRelLineLen = lineLens.
at(curRelLine);
347 const int curLineRemainder = curRelLineLen - curRelCol;
348 const int lineFeedIndex = curRelIndex + curLineRemainder;
349 if (index <= lineFeedIndex) {
350 if (index == lineFeedIndex) {
352 FAST_DEBUG(
" on line feed");
353 const int absLine = curRelLine + rangeStartLine;
354 twoViewCursor->line = absLine;
355 twoViewCursor->col = curRelLineLen;
358 const int advance = (index - curRelIndex) + 1;
361 curRelIndex += advance;
364 FAST_DEBUG(
" before line feed");
365 const int diff = (index - curRelIndex);
366 const int absLine = curRelLine + rangeStartLine;
367 const int absCol = curRelCol + diff;
368 twoViewCursor->line = absLine;
369 twoViewCursor->col = absCol;
372 const int advance = diff + 1;
373 curRelCol += advance;
374 curRelIndex += advance;
376 FAST_DEBUG(
"position(" << twoViewCursor->line <<
"," << twoViewCursor->col <<
")");
380 FAST_DEBUG(
" not on this line");
383 const int advance = curLineRemainder + 1;
384 curRelIndex += advance;
391 for (
int y = 0; y <= numCaptures; y++) {
392 IndexPair &pair = indexPairs[y];
393 if (!(pair.openIndex == -1 || pair.closeIndex == -1)) {
394 const TwoViewCursor *
const openCursors = indicesToCursors.
value(pair.openIndex);
395 const TwoViewCursor *
const closeCursors = indicesToCursors.
value(pair.closeIndex);
396 const int startLine = openCursors->line;
397 const int startCol = openCursors->col;
398 const int endLine = closeCursors->line;
399 const int endCol = closeCursors->col;
400 FAST_DEBUG(
"range " << y <<
": (" << startLine <<
", " << startCol <<
")..(" << endLine <<
", " << endCol <<
")");
406 qDeleteAll(indicesToCursors);
411 const int rangeStartCol = inputRange.
start().
column();
412 const uint rangeEndCol = inputRange.
end().
column();
414 const int rangeStartLine = inputRange.
start().
line();
415 const int rangeEndLine = inputRange.
end().
line();
417 const int forInit = backwards ? rangeEndLine : rangeStartLine;
419 const int forInc = backwards ? -1 : +1;
421 FAST_DEBUG(
"single line " << (backwards ? rangeEndLine : rangeStartLine) <<
".." << (backwards ? rangeStartLine : rangeEndLine));
423 for (
int j = forInit; (rangeStartLine <= j) && (j <= rangeEndLine); j += forInc) {
424 if (j < 0 || m_document->lines() <= j) {
425 FAST_DEBUG(
"searchText | line " << j <<
": no");
431 const int offset = (j == rangeStartLine) ? rangeStartCol : 0;
432 const int endLineMaxOffset = (j == rangeEndLine) ? rangeEndCol : textLine.
length();
443 match.swap(curMatch);
448 match = repairedRegex.
match(textLine, offset);
449 if (match.hasMatch() && match.capturedEnd() <= endLineMaxOffset) {
455 FAST_DEBUG(
"line " << j <<
": yes");
462 FAST_DEBUG(
"result range " << 0 <<
": (" << j <<
", " << match.capturedStart <<
")..(" << j <<
", " << match.capturedEnd() <<
")");
464 for (
int y = 1; y <= numCaptures; ++y) {
465 const int openIndex = match.capturedStart(y);
467 if (openIndex == -1) {
470 FAST_DEBUG(
"capture []");
472 const int closeIndex = match.capturedEnd(y);
474 FAST_DEBUG(
"result range " << y <<
": (" << j <<
", " << openIndex <<
")..(" << j <<
", " << closeIndex <<
")");
481 FAST_DEBUG(
"searchText | line " << j <<
": no");
501 const int inputLen = text.
length();
505 ReplacementStream out(capturedTexts);
507 while (input < inputLen) {
508 switch (text[input].unicode()) {
515 if (input + 1 >= inputLen) {
522 switch (text[input + 1].unicode()) {
524 if (input + 4 >= inputLen) {
525 out << ReplacementStream::cap(0);
528 bool stripAndSkip =
false;
529 const ushort text_2 = text[input + 2].unicode();
530 if ((text_2 >= L
'0') && (text_2 <= L
'3')) {
531 const ushort text_3 = text[input + 3].unicode();
532 if ((text_3 >= L
'0') && (text_3 <= L
'7')) {
533 const ushort text_4 = text[input + 4].unicode();
534 if ((text_4 >= L
'0') && (text_4 <= L
'7')) {
536 for (
int i = 0; i < 3; i++) {
537 digits[i] = 7 - (L
'7' - text[input + 2 + i].unicode());
539 const int ch = 64 * digits[0] + 8 * digits[1] + digits[2];
553 out << ReplacementStream::cap(0);
569 out << ReplacementStream::cap(9 - (L
'9' - text[input + 1].unicode()));
578 while ((input + captureSize) < inputLen) {
579 const ushort nextDigit = text[input + captureSize].unicode();
580 if ((nextDigit >= L
'0') && (nextDigit <= L
'9')) {
585 if (nextDigit == L
'}') {
591 out << ReplacementStream::cap(
capture);
592 input += captureSize;
601 if (!replacementGoodies) {
603 out << text[input + 1];
606 switch (text[input + 1].unicode()) {
608 out << ReplacementStream::lowerCase;
612 out << ReplacementStream::lowerCaseFirst;
616 out << ReplacementStream::upperCase;
620 out << ReplacementStream::upperCaseFirst;
625 out << ReplacementStream::keepCase;
632 if (!replacementGoodies) {
634 out << text[input + 1];
641 while ((input + minWidth + 1 < inputLen) && (text[input + minWidth + 1].unicode() == L
'#')) {
644 out << ReplacementStream::counter(replacementCounter, minWidth);
645 input += 1 + minWidth;
680 if (input + 5 >= inputLen) {
682 out << text[input + 1];
685 bool stripAndSkip =
false;
686 const ushort text_2 = text[input + 2].unicode();
687 if (((text_2 >= L
'0') && (text_2 <= L
'9')) || ((text_2 >= L
'a') && (text_2 <= L
'f')) || ((text_2 >= L
'A') && (text_2 <= L
'F'))) {
688 const ushort text_3 = text[input + 3].unicode();
689 if (((text_3 >= L
'0') && (text_3 <= L
'9')) || ((text_3 >= L
'a') && (text_3 <= L
'f')) || ((text_3 >= L
'A') && (text_3 <= L
'F'))) {
690 const ushort text_4 = text[input + 4].unicode();
691 if (((text_4 >= L
'0') && (text_4 <= L
'9')) || ((text_4 >= L
'a') && (text_4 <= L
'f')) || ((text_4 >= L
'A') && (text_4 <= L
'F'))) {
692 const ushort text_5 = text[input + 5].unicode();
693 if (((text_5 >= L
'0') && (text_5 <= L
'9')) || ((text_5 >= L
'a') && (text_5 <= L
'f'))
694 || ((text_5 >= L
'A') && (text_5 <= L
'F'))) {
696 for (
int i = 0; i < 4; i++) {
697 const ushort cur = text[input + 2 + i].unicode();
698 if ((cur >= L
'0') && (cur <= L
'9')) {
699 digits[i] = 9 - (L
'9' - cur);
700 }
else if ((cur >= L
'a') && (cur <= L
'f')) {
701 digits[i] = 15 - (L
'f' - cur);
703 digits[i] = 15 - (L
'F' - cur);
707 const int ch = 4096 * digits[0] + 256 * digits[1] + 16 * digits[2] + digits[3];
723 out << text[input + 1];
731 out << text[input + 1];
745QString KateRegExpSearch::repairPattern(
const QString &pattern,
bool &stillMultiLine)
754 const int inputLen = pattern.
length();
759 output.
reserve(2 * inputLen + 1);
762 bool insideClass =
false;
764 stillMultiLine =
false;
766 while (input < inputLen) {
769 switch (pattern[input].unicode()) {
771 switch (pattern[input + 1].unicode()) {
773 if (input + 5 < inputLen) {
775 output.
append(patternView.mid(input, 6));
779 output.
append(patternView.mid(input, 2));
782 stillMultiLine =
true;
786 if (input + 4 < inputLen) {
788 output.
append(patternView.mid(input, 5));
792 output.
append(patternView.mid(input, 2));
795 stillMultiLine =
true;
805 stillMultiLine =
true;
811 output.
append(patternView.mid(input, 2));
819 output.
append(pattern[input]);
825 output.
append(pattern[input]);
829 switch (pattern[input].unicode()) {
831 switch (pattern[input + 1].unicode()) {
833 if (input + 5 < inputLen) {
835 output.
append(patternView.mid(input, 6));
839 output.
append(patternView.mid(input, 2));
842 stillMultiLine =
true;
846 if (input + 4 < inputLen) {
848 output.
append(patternView.mid(input, 5));
852 output.
append(patternView.mid(input, 2));
855 stillMultiLine =
true;
865 stillMultiLine =
true;
870 output.
append(patternView.mid(input, 2));
878 output.
append(pattern[input]);
884 output.
append(pattern[input]);
893#ifdef FAST_DEBUG_ENABLE
894#undef FAST_DEBUG_ENABLE
constexpr int column() const noexcept
Retrieve the column on which this cursor is situated.
constexpr int line() const noexcept
Retrieve the line on which this cursor is situated.
A KParts derived class representing a text document.
virtual QString line(int line) const =0
Get a single text line.
virtual int lines() const =0
Get the count of lines of the document.
An object representing a section of text, from one Cursor to another.
constexpr Cursor end() const noexcept
Get the end position of this range.
constexpr Cursor start() const noexcept
Get the start position of this range.
constexpr bool isEmpty() const noexcept
Returns true if this range contains no characters, ie.
static constexpr Range invalid() noexcept
Returns an invalid range.
constexpr bool isValid() const noexcept
Validity check.
QList< KTextEditor::Range > search(const QString &pattern, KTextEditor::Range inputRange, bool backwards=false, QRegularExpression::PatternOptions options=QRegularExpression::NoPatternOption)
Search for the regular expression pattern inside the range inputRange.
static QString escapePlaintext(const QString &text)
Returns a modified version of text where escape sequences are resolved, e.g.
static QString buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter)
Returns a modified version of text where.
Q_SCRIPTABLE Q_NOREPLY void capture(double settleTime=0.0)
char32_t toLower(char32_t ucs4)
char32_t toUpper(char32_t ucs4)
const_reference at(qsizetype i) const const
bool contains(const Key &key) const const
iterator insert(const Key &key, const T &value)
T value(const Key &key, const T &defaultValue) const const
QRegularExpressionMatchIterator globalMatch(QStringView subjectView, qsizetype offset, MatchType matchType, MatchOptions matchOptions) const const
QRegularExpressionMatch match(QStringView subjectView, qsizetype offset, MatchType matchType, MatchOptions matchOptions) const const
int captureCount() const const
bool isValid() const const
QString pattern() const const
PatternOptions patternOptions() const const
void setPattern(const QString &pattern)
void setPatternOptions(PatternOptions options)
qsizetype capturedEnd(QStringView name) const const
bool hasNext() const const
QRegularExpressionMatch next()
QString & append(QChar ch)
const QChar at(qsizetype position) const const
bool isEmpty() const const
qsizetype length() const const
QString number(double n, char format, int precision)
void reserve(qsizetype size)
QString toLower() const const
QString toUpper() const const