KSyntaxHighlighting

abstracthighlighter.cpp
1/*
2 SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "abstracthighlighter.h"
8#include "abstracthighlighter_p.h"
9#include "context_p.h"
10#include "definition_p.h"
11#include "foldingregion.h"
12#include "format.h"
13#include "ksyntaxhighlighting_logging.h"
14#include "repository.h"
15#include "repository_p.h"
16#include "rule_p.h"
17#include "state.h"
18#include "state_p.h"
19#include "theme.h"
20
21using namespace KSyntaxHighlighting;
22
23AbstractHighlighterPrivate::AbstractHighlighterPrivate()
24{
25}
26
27AbstractHighlighterPrivate::~AbstractHighlighterPrivate()
28{
29}
30
31void AbstractHighlighterPrivate::ensureDefinitionLoaded()
32{
33 auto defData = DefinitionData::get(m_definition);
34 if (Q_UNLIKELY(!m_definition.isValid() && defData->repo && !m_definition.name().isEmpty())) {
35 qCDebug(Log) << "Definition became invalid, trying re-lookup.";
36 m_definition = defData->repo->definitionForName(m_definition.name());
37 defData = DefinitionData::get(m_definition);
38 }
39
40 if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) {
41 qCCritical(Log) << "Repository got deleted while a highlighter is still active!";
42 }
43
44 if (m_definition.isValid()) {
45 defData->load();
46 }
47}
48
49AbstractHighlighter::AbstractHighlighter()
50 : d_ptr(new AbstractHighlighterPrivate)
51{
52}
53
54AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd)
55 : d_ptr(dd)
56{
57}
58
59AbstractHighlighter::~AbstractHighlighter()
60{
61 delete d_ptr;
62}
63
65{
66 return d_ptr->m_definition;
67}
68
70{
72 d->m_definition = def;
73}
74
76{
78 return d->m_theme;
79}
80
82{
84 d->m_theme = theme;
85}
86
87/**
88 * Returns the index of the first non-space character. If the line is empty,
89 * or only contains white spaces, text.size() is returned.
90 */
91static inline int firstNonSpaceChar(QStringView text)
92{
93 for (int i = 0; i < text.length(); ++i) {
94 if (!text[i].isSpace()) {
95 return i;
96 }
97 }
98 return text.size();
99}
100
102{
104
105 // verify definition, deal with no highlighting being enabled
106 d->ensureDefinitionLoaded();
107 const auto defData = DefinitionData::get(d->m_definition);
108 if (!d->m_definition.isValid() || !defData->isLoaded()) {
109 applyFormat(0, text.size(), Format());
110 return State();
111 }
112
113 // limit the cache for unification to some reasonable size
114 // we use here at the moment 64k elements to not hog too much memory
115 // and to make the clearing no big stall
116 if (defData->unify.size() > 64 * 1024)
117 defData->unify.clear();
118
119 // verify/initialize state
120 auto newState = state;
121 auto stateData = StateData::get(newState);
122 bool isSharedData = true;
123 if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) {
124 qCDebug(Log) << "Got invalid state, resetting.";
125 stateData = nullptr;
126 }
127 if (Q_UNLIKELY(!stateData)) {
128 stateData = StateData::reset(newState);
129 stateData->push(defData->initialContext(), QStringList());
130 stateData->m_defId = defData->id;
131 isSharedData = false;
132 }
133
134 // process empty lines
135 if (Q_UNLIKELY(text.isEmpty())) {
136 /**
137 * handle line empty context switches
138 * guard against endless loops
139 * see https://phabricator.kde.org/D18509
140 */
141 int endlessLoopingCounter = 0;
142 while (!stateData->topContext()->lineEmptyContext().isStay()) {
143 /**
144 * line empty context switches
145 */
146 if (!d->switchContext(stateData, stateData->topContext()->lineEmptyContext(), QStringList(), newState, isSharedData)) {
147 /**
148 * end when trying to #pop the main context
149 */
150 break;
151 }
152
153 if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) {
154 break;
155 }
156
157 // guard against endless loops
158 ++endlessLoopingCounter;
159 if (endlessLoopingCounter > 1024) {
160 qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line.";
161 break;
162 }
163 }
164 auto context = stateData->topContext();
165 applyFormat(0, 0, context->attributeFormat());
166 return *defData->unify.insert(newState);
167 }
168
169 auto &dynamicRegexpCache = RepositoryPrivate::get(defData->repo)->m_dynamicRegexpCache;
170
171 int offset = 0;
172 int beginOffset = 0;
173 bool lineContinuation = false;
174
175 /**
176 * for expensive rules like regexes we do:
177 * - match them for the complete line, as this is faster than re-trying them at all positions
178 * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use
179 * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed
180 */
182 QStringList capturesForLastDynamicSkipOffset;
183
184 auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int {
185 auto i = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
186 return v.first == r;
187 });
188 if (i == skipOffsets.end())
189 return 0;
190 return i->second;
191 };
192
193 auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) {
194 auto it = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
195 return v.first == r;
196 });
197 if (it == skipOffsets.end()) {
198 skipOffsets.push_back({r, i});
199 } else {
200 it->second = i;
201 }
202 };
203
204 /**
205 * current active format
206 * stored as pointer to avoid deconstruction/constructions inside the internal loop
207 * the pointers are stable, the formats are either in the contexts or rules
208 */
209 auto currentFormat = &stateData->topContext()->attributeFormat();
210
211 /**
212 * cached first non-space character, needs to be computed if < 0
213 */
214 int firstNonSpace = -1;
215 int lastOffset = offset;
216 int endlessLoopingCounter = 0;
217 do {
218 /**
219 * avoid that we loop endless for some broken hl definitions
220 */
221 if (lastOffset == offset) {
222 ++endlessLoopingCounter;
223 if (endlessLoopingCounter > 1024) {
224 qCDebug(Log) << "Endless state transitions, aborting highlighting of line.";
225 break;
226 }
227 } else {
228 // ensure we made progress, clear the endlessLoopingCounter
229 Q_ASSERT(offset > lastOffset);
230 lastOffset = offset;
231 endlessLoopingCounter = 0;
232 }
233
234 /**
235 * try to match all rules in the context in order of declaration in XML
236 */
237 bool isLookAhead = false;
238 int newOffset = 0;
239 const Format *newFormat = nullptr;
240 for (const auto &ruleShared : stateData->topContext()->rules()) {
241 auto rule = ruleShared.get();
242 /**
243 * filter out rules that require a specific column
244 */
245 if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) {
246 continue;
247 }
248
249 /**
250 * filter out rules that only match for leading whitespace
251 */
252 if (rule->firstNonSpace()) {
253 /**
254 * compute the first non-space lazy
255 * avoids computing it for contexts without any such rules
256 */
257 if (firstNonSpace < 0) {
258 firstNonSpace = firstNonSpaceChar(text);
259 }
260
261 /**
262 * can we skip?
263 */
264 if (offset > firstNonSpace) {
265 continue;
266 }
267 }
268
269 int currentSkipOffset = 0;
270 if (Q_UNLIKELY(rule->hasSkipOffset())) {
271 /**
272 * shall we skip application of this rule? two cases:
273 * - rule can't match at all => currentSkipOffset < 0
274 * - rule will only match for some higher offset => currentSkipOffset > offset
275 *
276 * we need to invalidate this if we are dynamic and have different captures then last time
277 */
278 if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) {
279 skipOffsets.clear();
280 } else {
281 currentSkipOffset = getSkipOffsetValue(rule);
282 if (currentSkipOffset < 0 || currentSkipOffset > offset) {
283 continue;
284 }
285 }
286 }
287
288 auto newResult = rule->doMatch(text, offset, stateData->topCaptures(), dynamicRegexpCache);
289 newOffset = newResult.offset();
290
291 /**
292 * update skip offset if new one rules out any later match or is larger than current one
293 */
294 if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) {
295 insertSkipOffset(rule, newResult.skipOffset());
296
297 // remember new captures, if dynamic to enforce proper reset above on change!
298 if (rule->isDynamic()) {
299 capturesForLastDynamicSkipOffset = stateData->topCaptures();
300 }
301 }
302
303 if (newOffset <= offset) {
304 continue;
305 }
306
307 /**
308 * apply folding.
309 * special cases:
310 * - rule with endRegion + beginRegion: in endRegion, the length is 0
311 * - rule with lookAhead: length is 0
312 */
313 if (rule->endRegion().isValid() && rule->beginRegion().isValid()) {
314 applyFolding(offset, 0, rule->endRegion());
315 } else if (rule->endRegion().isValid()) {
316 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->endRegion());
317 }
318 if (rule->beginRegion().isValid()) {
319 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->beginRegion());
320 }
321
322 if (rule->isLookAhead()) {
323 Q_ASSERT(!rule->context().isStay());
324 d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData);
325 isLookAhead = true;
326 break;
327 }
328
329 d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData);
330 newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat();
331 if (newOffset == text.size() && rule->isLineContinue()) {
332 lineContinuation = true;
333 }
334 break;
335 }
336 if (isLookAhead) {
337 continue;
338 }
339
340 if (newOffset <= offset) { // no matching rule
341 if (stateData->topContext()->fallthrough()) {
342 d->switchContext(stateData, stateData->topContext()->fallthroughContext(), QStringList(), newState, isSharedData);
343 continue;
344 }
345
346 newOffset = offset + 1;
347 newFormat = &stateData->topContext()->attributeFormat();
348 }
349
350 /**
351 * if we arrive here, some new format has to be set!
352 */
353 Q_ASSERT(newFormat);
354
355 /**
356 * on format change, apply the last one and switch to new one
357 */
358 if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) {
359 if (offset > 0) {
360 applyFormat(beginOffset, offset - beginOffset, *currentFormat);
361 }
362 beginOffset = offset;
363 currentFormat = newFormat;
364 }
365
366 /**
367 * we must have made progress if we arrive here!
368 */
369 Q_ASSERT(newOffset > offset);
370 offset = newOffset;
371
372 } while (offset < text.size());
373
374 /**
375 * apply format for remaining text, if any
376 */
377 if (beginOffset < offset) {
378 applyFormat(beginOffset, text.size() - beginOffset, *currentFormat);
379 }
380
381 /**
382 * handle line end context switches
383 * guard against endless loops
384 * see https://phabricator.kde.org/D18509
385 */
386 {
387 int endlessLoopingCounter = 0;
388 while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) {
389 if (!d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList(), newState, isSharedData)) {
390 break;
391 }
392
393 // guard against endless loops
394 ++endlessLoopingCounter;
395 if (endlessLoopingCounter > 1024) {
396 qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line.";
397 break;
398 }
399 }
400 }
401
402 return *defData->unify.insert(newState);
403}
404
405bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData)
406{
407 const auto popCount = contextSwitch.popCount();
408 const auto context = contextSwitch.context();
409 if (popCount <= 0 && !context) {
410 return true;
411 }
412
413 // a modified state must be detached before modification
414 if (isSharedData) {
415 data = StateData::detach(state);
416 isSharedData = false;
417 }
418
419 // kill as many items as requested from the stack, will always keep the initial context alive!
420 const bool initialContextSurvived = data->pop(popCount);
421
422 // if we have a new context to add, push it
423 // then we always "succeed"
424 if (context) {
425 data->push(context, std::move(captures));
426 return true;
427 }
428
429 // else we abort, if we did try to pop the initial context
430 return initialContextSurvived;
431}
432
433void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region)
434{
435 Q_UNUSED(offset);
436 Q_UNUSED(length);
437 Q_UNUSED(region);
438}
Abstract base class for highlighters.
virtual void applyFormat(int offset, int length, const Format &format)=0
Reimplement this to apply formats to your output.
virtual void setDefinition(const Definition &def)
Sets the syntax definition used for highlighting.
Theme theme() const
Returns the currently selected theme for highlighting.
virtual void setTheme(const Theme &theme)
Sets the theme used for highlighting.
virtual void applyFolding(int offset, int length, FoldingRegion region)
Reimplement this to apply folding to your output.
Definition definition() const
Returns the syntax definition used for highlighting.
State highlightLine(QStringView text, const State &state)
Highlight the given line.
Represents a syntax definition.
Definition definition.h:83
Represents a begin or end of a folding region.
Describes the format to be used for a specific text fragment.
Definition format.h:28
bool isValid() const
Returns true if this is a valid format, ie.
Definition format.cpp:91
int id() const
Returns a unique identifier of this format.
Definition format.cpp:101
Opaque handle to the state of the highlighting engine.
Definition state.h:30
Color theme definition used for highlighting.
Definition theme.h:65
Syntax highlighting engine for Kate syntax definitions.
bool isEmpty() const const
qsizetype length() const const
qsizetype size() const const
iterator begin()
iterator end()
void push_back(T &&t)
Q_D(Todo)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:19:29 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.