KSyntaxHighlighting

abstracthighlighter.cpp
1/*
2 SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "abstracthighlighter.h"
8#include "abstracthighlighter_p.h"
9#include "context_p.h"
10#include "definition_p.h"
11#include "foldingregion.h"
12#include "format.h"
13#include "ksyntaxhighlighting_logging.h"
14#include "repository.h"
15#include "repository_p.h"
16#include "rule_p.h"
17#include "state.h"
18#include "state_p.h"
19#include "theme.h"
20
21using namespace KSyntaxHighlighting;
22
23AbstractHighlighterPrivate::AbstractHighlighterPrivate()
24{
25}
26
27AbstractHighlighterPrivate::~AbstractHighlighterPrivate()
28{
29}
30
31void AbstractHighlighterPrivate::ensureDefinitionLoaded()
32{
33 auto defData = DefinitionData::get(m_definition);
34 if (Q_UNLIKELY(!m_definition.isValid())) {
35 if (defData->repo && !defData->name.isEmpty()) {
36 qCDebug(Log) << "Definition became invalid, trying re-lookup.";
37 m_definition = defData->repo->definitionForName(defData->name);
38 defData = DefinitionData::get(m_definition);
39 }
40
41 if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) {
42 qCCritical(Log) << "Repository got deleted while a highlighter is still active!";
43 }
44 }
45
46 if (m_definition.isValid()) {
47 defData->load();
48 }
49}
50
51AbstractHighlighter::AbstractHighlighter()
52 : d_ptr(new AbstractHighlighterPrivate)
53{
54}
55
56AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd)
57 : d_ptr(dd)
58{
59}
60
61AbstractHighlighter::~AbstractHighlighter()
62{
63 delete d_ptr;
64}
65
67{
68 return d_ptr->m_definition;
69}
70
72{
74 d->m_definition = def;
75}
76
78{
80 return d->m_theme;
81}
82
84{
86 d->m_theme = theme;
87}
88
89/**
90 * Returns the index of the first non-space character. If the line is empty,
91 * or only contains white spaces, text.size() is returned.
92 */
93static inline int firstNonSpaceChar(QStringView text)
94{
95 for (int i = 0; i < text.length(); ++i) {
96 if (!text[i].isSpace()) {
97 return i;
98 }
99 }
100 return text.size();
101}
102
104{
106
107 // verify definition, deal with no highlighting being enabled
108 d->ensureDefinitionLoaded();
109 const auto defData = DefinitionData::get(d->m_definition);
110 if (!d->m_definition.isValid() || !defData->isLoaded()) {
111 applyFormat(0, text.size(), Format());
112 return State();
113 }
114
115 // limit the cache for unification to some reasonable size
116 // we use here at the moment 64k elements to not hog too much memory
117 // and to make the clearing no big stall
118 if (defData->unify.size() > 64 * 1024)
119 defData->unify.clear();
120
121 // verify/initialize state
122 auto newState = state;
123 auto stateData = StateData::get(newState);
124 bool isSharedData = true;
125 if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) {
126 qCDebug(Log) << "Got invalid state, resetting.";
127 stateData = nullptr;
128 }
129 if (Q_UNLIKELY(!stateData)) {
130 stateData = StateData::reset(newState);
131 stateData->push(defData->initialContext(), QStringList());
132 stateData->m_defId = defData->id;
133 isSharedData = false;
134 }
135
136 // process empty lines
137 if (Q_UNLIKELY(text.isEmpty())) {
138 /**
139 * handle line empty context switches
140 * guard against endless loops
141 * see https://phabricator.kde.org/D18509
142 */
143 int endlessLoopingCounter = 0;
144 while (!stateData->topContext()->lineEmptyContext().isStay()) {
145 /**
146 * line empty context switches
147 */
148 if (!d->switchContext(stateData, stateData->topContext()->lineEmptyContext(), QStringList(), newState, isSharedData)) {
149 /**
150 * end when trying to #pop the main context
151 */
152 break;
153 }
154
155 if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) {
156 break;
157 }
158
159 // guard against endless loops
160 ++endlessLoopingCounter;
161 if (endlessLoopingCounter > 1024) {
162 qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line.";
163 break;
164 }
165 }
166 auto context = stateData->topContext();
167 applyFormat(0, 0, context->attributeFormat());
168 return *defData->unify.insert(newState);
169 }
170
171 auto &dynamicRegexpCache = RepositoryPrivate::get(defData->repo)->m_dynamicRegexpCache;
172
173 int offset = 0;
174 int beginOffset = 0;
175 bool lineContinuation = false;
176
177 /**
178 * for expensive rules like regexes we do:
179 * - match them for the complete line, as this is faster than re-trying them at all positions
180 * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use
181 * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed
182 */
184 QStringList capturesForLastDynamicSkipOffset;
185
186 auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int {
187 auto i = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
188 return v.first == r;
189 });
190 if (i == skipOffsets.end())
191 return 0;
192 return i->second;
193 };
194
195 auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) {
196 auto it = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
197 return v.first == r;
198 });
199 if (it == skipOffsets.end()) {
200 skipOffsets.push_back({r, i});
201 } else {
202 it->second = i;
203 }
204 };
205
206 /**
207 * current active format
208 * stored as pointer to avoid deconstruction/constructions inside the internal loop
209 * the pointers are stable, the formats are either in the contexts or rules
210 */
211 auto currentFormat = &stateData->topContext()->attributeFormat();
212
213 /**
214 * cached first non-space character, needs to be computed if < 0
215 */
216 int firstNonSpace = -1;
217 int lastOffset = offset;
218 int endlessLoopingCounter = 0;
219 do {
220 /**
221 * avoid that we loop endless for some broken hl definitions
222 */
223 if (lastOffset == offset) {
224 ++endlessLoopingCounter;
225 if (endlessLoopingCounter > 1024) {
226 qCDebug(Log) << "Endless state transitions, aborting highlighting of line.";
227 break;
228 }
229 } else {
230 // ensure we made progress, clear the endlessLoopingCounter
231 Q_ASSERT(offset > lastOffset);
232 lastOffset = offset;
233 endlessLoopingCounter = 0;
234 }
235
236 /**
237 * try to match all rules in the context in order of declaration in XML
238 */
239 bool isLookAhead = false;
240 int newOffset = 0;
241 const Format *newFormat = nullptr;
242 for (const auto &ruleShared : stateData->topContext()->rules()) {
243 auto rule = ruleShared.get();
244 /**
245 * filter out rules that require a specific column
246 */
247 if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) {
248 continue;
249 }
250
251 /**
252 * filter out rules that only match for leading whitespace
253 */
254 if (rule->firstNonSpace()) {
255 /**
256 * compute the first non-space lazy
257 * avoids computing it for contexts without any such rules
258 */
259 if (firstNonSpace < 0) {
260 firstNonSpace = firstNonSpaceChar(text);
261 }
262
263 /**
264 * can we skip?
265 */
266 if (offset > firstNonSpace) {
267 continue;
268 }
269 }
270
271 int currentSkipOffset = 0;
272 if (Q_UNLIKELY(rule->hasSkipOffset())) {
273 /**
274 * shall we skip application of this rule? two cases:
275 * - rule can't match at all => currentSkipOffset < 0
276 * - rule will only match for some higher offset => currentSkipOffset > offset
277 *
278 * we need to invalidate this if we are dynamic and have different captures then last time
279 */
280 if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) {
281 skipOffsets.clear();
282 } else {
283 currentSkipOffset = getSkipOffsetValue(rule);
284 if (currentSkipOffset < 0 || currentSkipOffset > offset) {
285 continue;
286 }
287 }
288 }
289
290 auto newResult = rule->doMatch(text, offset, stateData->topCaptures(), dynamicRegexpCache);
291 newOffset = newResult.offset();
292
293 /**
294 * update skip offset if new one rules out any later match or is larger than current one
295 */
296 if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) {
297 insertSkipOffset(rule, newResult.skipOffset());
298
299 // remember new captures, if dynamic to enforce proper reset above on change!
300 if (rule->isDynamic()) {
301 capturesForLastDynamicSkipOffset = stateData->topCaptures();
302 }
303 }
304
305 if (newOffset <= offset) {
306 continue;
307 }
308
309 /**
310 * apply folding.
311 * special cases:
312 * - rule with endRegion + beginRegion: in endRegion, the length is 0
313 * - rule with lookAhead: length is 0
314 */
315 if (rule->endRegion().isValid() && rule->beginRegion().isValid()) {
316 applyFolding(offset, 0, rule->endRegion());
317 } else if (rule->endRegion().isValid()) {
318 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->endRegion());
319 }
320 if (rule->beginRegion().isValid()) {
321 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->beginRegion());
322 }
323
324 if (rule->isLookAhead()) {
325 Q_ASSERT(!rule->context().isStay());
326 d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData);
327 isLookAhead = true;
328 break;
329 }
330
331 d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData);
332 newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat();
333 if (newOffset == text.size() && rule->isLineContinue()) {
334 lineContinuation = true;
335 }
336 break;
337 }
338 if (isLookAhead) {
339 continue;
340 }
341
342 if (newOffset <= offset) { // no matching rule
343 if (stateData->topContext()->fallthrough()) {
344 d->switchContext(stateData, stateData->topContext()->fallthroughContext(), QStringList(), newState, isSharedData);
345 continue;
346 }
347
348 newOffset = offset + 1;
349 newFormat = &stateData->topContext()->attributeFormat();
350 }
351
352 /**
353 * if we arrive here, some new format has to be set!
354 */
355 Q_ASSERT(newFormat);
356
357 /**
358 * on format change, apply the last one and switch to new one
359 */
360 if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) {
361 if (offset > 0) {
362 applyFormat(beginOffset, offset - beginOffset, *currentFormat);
363 }
364 beginOffset = offset;
365 currentFormat = newFormat;
366 }
367
368 /**
369 * we must have made progress if we arrive here!
370 */
371 Q_ASSERT(newOffset > offset);
372 offset = newOffset;
373
374 } while (offset < text.size());
375
376 /**
377 * apply format for remaining text, if any
378 */
379 if (beginOffset < offset) {
380 applyFormat(beginOffset, text.size() - beginOffset, *currentFormat);
381 }
382
383 /**
384 * handle line end context switches
385 * guard against endless loops
386 * see https://phabricator.kde.org/D18509
387 */
388 {
389 int endlessLoopingCounter = 0;
390 while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) {
391 if (!d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList(), newState, isSharedData)) {
392 break;
393 }
394
395 // guard against endless loops
396 ++endlessLoopingCounter;
397 if (endlessLoopingCounter > 1024) {
398 qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line.";
399 break;
400 }
401 }
402 }
403
404 return *defData->unify.insert(newState);
405}
406
407bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData)
408{
409 const auto popCount = contextSwitch.popCount();
410 const auto context = contextSwitch.context();
411 if (popCount <= 0 && !context) {
412 return true;
413 }
414
415 // a modified state must be detached before modification
416 if (isSharedData) {
417 data = StateData::detach(state);
418 isSharedData = false;
419 }
420
421 // kill as many items as requested from the stack, will always keep the initial context alive!
422 const bool initialContextSurvived = data->pop(popCount);
423
424 // if we have a new context to add, push it
425 // then we always "succeed"
426 if (context) {
427 data->push(context, std::move(captures));
428 return true;
429 }
430
431 // else we abort, if we did try to pop the initial context
432 return initialContextSurvived;
433}
434
435void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region)
436{
437 Q_UNUSED(offset);
438 Q_UNUSED(length);
439 Q_UNUSED(region);
440}
Abstract base class for highlighters.
virtual void applyFormat(int offset, int length, const Format &format)=0
Reimplement this to apply formats to your output.
virtual void setDefinition(const Definition &def)
Sets the syntax definition used for highlighting.
Theme theme() const
Returns the currently selected theme for highlighting.
virtual void setTheme(const Theme &theme)
Sets the theme used for highlighting.
virtual void applyFolding(int offset, int length, FoldingRegion region)
Reimplement this to apply folding to your output.
Definition definition() const
Returns the syntax definition used for highlighting.
State highlightLine(QStringView text, const State &state)
Highlight the given line.
Represents a syntax definition.
Definition definition.h:83
Represents a begin or end of a folding region.
Describes the format to be used for a specific text fragment.
Definition format.h:28
bool isValid() const
Returns true if this is a valid format, ie.
Definition format.cpp:90
int id() const
Returns a unique identifier of this format.
Definition format.cpp:100
Opaque handle to the state of the highlighting engine.
Definition state.h:30
Color theme definition used for highlighting.
Definition theme.h:65
Syntax highlighting engine for Kate syntax definitions.
bool isEmpty() const const
qsizetype length() const const
qsizetype size() const const
iterator begin()
iterator end()
void push_back(T &&t)
Q_D(Todo)
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:49:02 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.