KSyntaxHighlighting

highlightingdata.cpp
1/*
2 SPDX-FileCopyrightText: 2021 Jonathan Poelen <jonathan.poelen@gmail.com>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "highlightingdata_p.hpp"
8#include "ksyntaxhighlighting_logging.h"
9#include "xml_p.h"
10
11#include <QXmlStreamReader>
12#include <QStringView>
13
14using namespace KSyntaxHighlighting;
15
16template<class Data, class... Args>
17static void initRuleData(Data &data, Args &&...args)
18{
19 new (&data) Data{std::move(args)...};
20}
21
22static Qt::CaseSensitivity attrToCaseSensitivity(QStringView str)
23{
24 return Xml::attrToBool(str) ? Qt::CaseInsensitive : Qt::CaseSensitive;
25}
26
27static HighlightingContextData::Rule::WordDelimiters loadAdditionalWordDelimiters(QXmlStreamReader &reader)
28{
29 return HighlightingContextData::Rule::WordDelimiters{
30 reader.attributes().value(QLatin1String("additionalDeliminator")).toString(),
31 reader.attributes().value(QLatin1String("weakDeliminator")).toString(),
32 };
33}
34
35static bool checkIsNotEmpty(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
36{
37 if (!str.isEmpty()) {
38 return true;
39 }
40
41 qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute is empty";
42 return false;
43}
44
45static bool checkIsChar(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
46{
47 if (str.size() == 1) {
48 return true;
49 }
50
51 qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute must contain exactly 1 character";
52 return false;
53}
54
55static bool loadRule(const QString &defName, HighlightingContextData::Rule &rule, QXmlStreamReader &reader)
56{
57 using Rule = HighlightingContextData::Rule;
58
59 QStringView name = reader.name();
60 const auto attrs = reader.attributes();
61 bool isIncludeRules = false;
62
63 if (name == QLatin1String("DetectChar")) {
64 const auto s = attrs.value(QLatin1String("char"));
65 if (!checkIsChar(s, "char", defName, reader)) {
66 return false;
67 }
68 const QChar c = s.at(0);
69 const bool dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
70
71 initRuleData(rule.data.detectChar, c, dynamic);
72 rule.type = Rule::Type::DetectChar;
73 } else if (name == QLatin1String("RegExpr")) {
74 const auto pattern = attrs.value(QLatin1String("String"));
75 if (!checkIsNotEmpty(pattern, "String", defName, reader)) {
76 return false;
77 }
78
79 const auto isCaseInsensitive = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
80 const auto isMinimal = Xml::attrToBool(attrs.value(QLatin1String("minimal")));
81 const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
82
83 initRuleData(rule.data.regExpr, pattern.toString(), isCaseInsensitive, isMinimal, dynamic);
84 rule.type = Rule::Type::RegExpr;
85 } else if (name == QLatin1String("IncludeRules")) {
86 const auto context = attrs.value(QLatin1String("context"));
87 if (!checkIsNotEmpty(context, "context", defName, reader)) {
88 return false;
89 }
90 const bool includeAttribute = Xml::attrToBool(attrs.value(QLatin1String("includeAttrib")));
91
92 initRuleData(rule.data.includeRules, context.toString(), includeAttribute);
93 rule.type = Rule::Type::IncludeRules;
94 isIncludeRules = true;
95 } else if (name == QLatin1String("Detect2Chars")) {
96 const auto s1 = attrs.value(QLatin1String("char"));
97 const auto s2 = attrs.value(QLatin1String("char1"));
98 if (!checkIsChar(s1, "char", defName, reader)) {
99 return false;
100 }
101 if (!checkIsChar(s2, "char1", defName, reader)) {
102 return false;
103 }
104
105 initRuleData(rule.data.detect2Chars, s1.at(0), s2.at(0));
106 rule.type = Rule::Type::Detect2Chars;
107 } else if (name == QLatin1String("keyword")) {
108 const auto s = attrs.value(QLatin1String("String"));
109 if (!checkIsNotEmpty(s, "String", defName, reader)) {
110 return false;
111 }
112 Qt::CaseSensitivity caseSensitivityOverride = Qt::CaseInsensitive;
113 bool hasCaseSensitivityOverride = false;
114
115 /**
116 * we might overwrite the case sensitivity
117 * then we need to init the list for lookup of that sensitivity setting
118 */
119 if (attrs.hasAttribute(QLatin1String("insensitive"))) {
120 hasCaseSensitivityOverride = true;
121 caseSensitivityOverride = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
122 }
123
124 initRuleData(rule.data.keyword, s.toString(), loadAdditionalWordDelimiters(reader), caseSensitivityOverride, hasCaseSensitivityOverride);
125 rule.type = Rule::Type::Keyword;
126 } else if (name == QLatin1String("DetectSpaces")) {
127 rule.type = Rule::Type::DetectSpaces;
128 } else if (name == QLatin1String("StringDetect")) {
129 const auto string = attrs.value(QLatin1String("String"));
130 if (!checkIsNotEmpty(string, "String", defName, reader)) {
131 return false;
132 }
133 const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
134 const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
135 const bool isSensitive = (caseSensitivity == Qt::CaseSensitive);
136
137 // String can be replaced with DetectChar or AnyChar
138 if (!dynamic && string.size() == 1) {
139 QChar c = string.at(0);
140 if (isSensitive || c.toLower() == c.toUpper()) {
141 initRuleData(rule.data.detectChar, c, dynamic);
142 rule.type = Rule::Type::DetectChar;
143 } else {
144 initRuleData(rule.data.anyChar, c.toLower() + c.toUpper());
145 rule.type = Rule::Type::AnyChar;
146 }
147 }
148 // String can be replaced with Detect2Chars
149 else if (isSensitive && !dynamic && string.size() == 2) {
150 initRuleData(rule.data.detect2Chars, string.at(0), string.at(1));
151 rule.type = Rule::Type::Detect2Chars;
152 } else {
153 initRuleData(rule.data.stringDetect, string.toString(), caseSensitivity, dynamic);
154 rule.type = Rule::Type::StringDetect;
155 }
156 } else if (name == QLatin1String("WordDetect")) {
157 const auto word = attrs.value(QLatin1String("String"));
158 if (!checkIsNotEmpty(word, "String", defName, reader)) {
159 return false;
160 }
161 const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
162
163 initRuleData(rule.data.wordDetect, word.toString(), loadAdditionalWordDelimiters(reader), caseSensitivity);
164 rule.type = Rule::Type::WordDetect;
165 } else if (name == QLatin1String("AnyChar")) {
166 const auto chars = attrs.value(QLatin1String("String"));
167 if (!checkIsNotEmpty(chars, "String", defName, reader)) {
168 return false;
169 }
170
171 // AnyChar can be replaced with DetectChar
172 if (chars.size() == 1) {
173 initRuleData(rule.data.detectChar, chars.at(0), false);
174 rule.type = Rule::Type::DetectChar;
175 } else {
176 initRuleData(rule.data.anyChar, chars.toString());
177 rule.type = Rule::Type::AnyChar;
178 }
179 } else if (name == QLatin1String("DetectIdentifier")) {
180 rule.type = Rule::Type::DetectIdentifier;
181 } else if (name == QLatin1String("LineContinue")) {
182 const auto s = attrs.value(QLatin1String("char"));
183 const QChar c = s.isEmpty() ? QLatin1Char('\\') : s.at(0);
184
185 initRuleData(rule.data.lineContinue, c);
186 rule.type = Rule::Type::LineContinue;
187 } else if (name == QLatin1String("Int")) {
188 initRuleData(rule.data.detectInt, loadAdditionalWordDelimiters(reader));
189 rule.type = Rule::Type::Int;
190 } else if (name == QLatin1String("Float")) {
191 initRuleData(rule.data.detectFloat, loadAdditionalWordDelimiters(reader));
192 rule.type = Rule::Type::Float;
193 } else if (name == QLatin1String("HlCStringChar")) {
194 rule.type = Rule::Type::HlCStringChar;
195 } else if (name == QLatin1String("RangeDetect")) {
196 const auto s1 = attrs.value(QLatin1String("char"));
197 const auto s2 = attrs.value(QLatin1String("char1"));
198 if (!checkIsChar(s1, "char", defName, reader)) {
199 return false;
200 }
201 if (!checkIsChar(s2, "char1", defName, reader)) {
202 return false;
203 }
204
205 initRuleData(rule.data.rangeDetect, s1.at(0), s2.at(0));
206 rule.type = Rule::Type::RangeDetect;
207 } else if (name == QLatin1String("HlCHex")) {
208 initRuleData(rule.data.hlCHex, loadAdditionalWordDelimiters(reader));
209 rule.type = Rule::Type::HlCHex;
210 } else if (name == QLatin1String("HlCChar")) {
211 rule.type = Rule::Type::HlCChar;
212 } else if (name == QLatin1String("HlCOct")) {
213 initRuleData(rule.data.hlCOct, loadAdditionalWordDelimiters(reader));
214 rule.type = Rule::Type::HlCOct;
215 } else {
216 qCWarning(Log) << "Unknown rule type:" << name;
217 return false;
218 }
219
220 if (!isIncludeRules) {
221 rule.common.contextName = attrs.value(QLatin1String("context")).toString();
222 rule.common.beginRegionName = attrs.value(QLatin1String("beginRegion")).toString();
223 rule.common.endRegionName = attrs.value(QLatin1String("endRegion")).toString();
224 rule.common.firstNonSpace = Xml::attrToBool(attrs.value(QLatin1String("firstNonSpace")));
225 rule.common.lookAhead = Xml::attrToBool(attrs.value(QLatin1String("lookAhead")));
226 // attribute is only used when lookAhead is false
227 if (!rule.common.lookAhead) {
228 rule.common.attributeName = attrs.value(QLatin1String("attribute")).toString();
229 }
230 bool colOk = false;
231 rule.common.column = attrs.value(QLatin1String("column")).toInt(&colOk);
232 if (!colOk) {
233 rule.common.column = -1;
234 }
235 }
236
237 return true;
238}
239
240template<class Data1, class Data2, class Visitor>
241static void dataRuleVisit(HighlightingContextData::Rule::Type type, Data1 &&data1, Data2 &&data2, Visitor &&visitor)
242{
243 using Rule = HighlightingContextData::Rule;
244 using Type = Rule::Type;
245 switch (type) {
246 case Type::AnyChar:
247 visitor(data1.anyChar, data2.anyChar);
248 break;
249 case Type::DetectChar:
250 visitor(data1.detectChar, data2.detectChar);
251 break;
252 case Type::Detect2Chars:
253 visitor(data1.detect2Chars, data2.detect2Chars);
254 break;
255 case Type::HlCOct:
256 visitor(data1.hlCOct, data2.hlCOct);
257 break;
258 case Type::IncludeRules:
259 visitor(data1.includeRules, data2.includeRules);
260 break;
261 case Type::Int:
262 visitor(data1.detectInt, data2.detectInt);
263 break;
264 case Type::Keyword:
265 visitor(data1.keyword, data2.keyword);
266 break;
267 case Type::LineContinue:
268 visitor(data1.lineContinue, data2.lineContinue);
269 break;
270 case Type::RangeDetect:
271 visitor(data1.rangeDetect, data2.rangeDetect);
272 break;
273 case Type::RegExpr:
274 visitor(data1.regExpr, data2.regExpr);
275 break;
276 case Type::StringDetect:
277 visitor(data1.stringDetect, data2.stringDetect);
278 break;
279 case Type::WordDetect:
280 visitor(data1.wordDetect, data2.wordDetect);
281 break;
282 case Type::Float:
283 visitor(data1.detectFloat, data2.detectFloat);
284 break;
285 case Type::HlCHex:
286 visitor(data1.hlCHex, data2.hlCHex);
287 break;
288
289 case Type::HlCStringChar:
290 case Type::DetectIdentifier:
291 case Type::DetectSpaces:
292 case Type::HlCChar:
293 case Type::Unknown:;
294 }
295}
296
297HighlightingContextData::Rule::Rule() noexcept = default;
298
299HighlightingContextData::Rule::Rule(Rule &&other) noexcept
300 : common(std::move(other.common))
301{
302 dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
303 using Data = std::remove_reference_t<decltype(data1)>;
304 new (&data1) Data(std::move(data2));
305 });
306 type = other.type;
307}
308
309HighlightingContextData::Rule::Rule(const Rule &other)
310 : common(other.common)
311{
312 dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
313 using Data = std::remove_reference_t<decltype(data1)>;
314 new (&data1) Data(data2);
315 });
316 type = other.type;
317}
318
319HighlightingContextData::Rule::~Rule()
320{
321 dataRuleVisit(type, data, data, [](auto &data, auto &) {
322 using Data = std::remove_reference_t<decltype(data)>;
323 data.~Data();
324 });
325}
326
327HighlightingContextData::ContextSwitch::ContextSwitch(QStringView str)
328{
329 if (str.isEmpty() || str == QStringLiteral("#stay")) {
330 return;
331 }
332
333 while (str.startsWith(QStringLiteral("#pop"))) {
334 ++m_popCount;
335 if (str.size() > 4 && str.at(4) == QLatin1Char('!')) {
336 str = str.mid(5);
337 break;
338 }
339 str = str.mid(4);
340 }
341
342 if (str.isEmpty()) {
343 return;
344 }
345
346 m_contextAndDefName = str.toString();
347 m_defNameIndex = str.indexOf(QStringLiteral("##"));
348}
349
350bool HighlightingContextData::ContextSwitch::isStay() const
351{
352 return m_popCount == -1 && m_contextAndDefName.isEmpty();
353}
354
355QStringView HighlightingContextData::ContextSwitch::contextName() const
356{
357 if (m_defNameIndex == -1) {
358 return m_contextAndDefName;
359 }
360 return QStringView(m_contextAndDefName).left(m_defNameIndex);
361}
362
363QStringView HighlightingContextData::ContextSwitch::defName() const
364{
365 if (m_defNameIndex == -1) {
366 return QStringView();
367 }
368 return QStringView(m_contextAndDefName).mid(m_defNameIndex + 2);
369}
370
371void HighlightingContextData::load(const QString &defName, QXmlStreamReader &reader)
372{
373 Q_ASSERT(reader.name() == QLatin1String("context"));
374 Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement);
375
376 name = reader.attributes().value(QLatin1String("name")).toString();
377 attribute = reader.attributes().value(QLatin1String("attribute")).toString();
378 lineEndContext = reader.attributes().value(QLatin1String("lineEndContext")).toString();
379 lineEmptyContext = reader.attributes().value(QLatin1String("lineEmptyContext")).toString();
380 fallthroughContext = reader.attributes().value(QLatin1String("fallthroughContext")).toString();
381 noIndentationBasedFolding = Xml::attrToBool(reader.attributes().value(QLatin1String("noIndentationBasedFolding")));
382 stopEmptyLineContextSwitchLoop = Xml::attrToBool(reader.attributes().value(QLatin1String("stopEmptyLineContextSwitchLoop")));
383
384 rules.reserve(8);
385
386 reader.readNext();
387 while (!reader.atEnd()) {
388 switch (reader.tokenType()) {
390 auto &rule = rules.emplace_back();
391 if (!loadRule(defName, rule, reader)) {
392 rules.pop_back();
393 }
394 // be done with this rule, skip all subelements, e.g. no longer supported sub-rules
395 reader.skipCurrentElement();
396 reader.readNext();
397 break;
398 }
400 return;
401 default:
402 reader.readNext();
403 break;
404 }
405 }
406}
Represents the raw xml data of a context and its rules.
QString attribute
attribute name, to lookup our format
Type type(const QSqlDatabase &db)
QString name(StandardShortcut id)
Syntax highlighting engine for Kate syntax definitions.
char32_t toLower(char32_t ucs4)
char32_t toUpper(char32_t ucs4)
QStringView left(qsizetype length) const const
QStringView mid(qsizetype start, qsizetype length) const const
QChar at(qsizetype n) const const
qsizetype indexOf(QChar c, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
qsizetype size() const const
bool startsWith(QChar ch) const const
QString toString() const const
CaseSensitivity
QStringView value(QAnyStringView namespaceUri, QAnyStringView name) const const
bool atEnd() const const
QXmlStreamAttributes attributes() const const
qint64 lineNumber() const const
QStringView name() const const
TokenType readNext()
void skipCurrentElement()
TokenType tokenType() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:19:29 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.