KSyntaxHighlighting

highlightingdata.cpp
1/*
2 SPDX-FileCopyrightText: 2021 Jonathan Poelen <jonathan.poelen@gmail.com>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "highlightingdata_p.hpp"
8#include "ksyntaxhighlighting_logging.h"
9#include "xml_p.h"
10
11#include <QXmlStreamReader>
12#include <QStringView>
13
14using namespace KSyntaxHighlighting;
15
16template<class Data, class... Args>
17static void initRuleData(Data &data, Args &&...args)
18{
19 new (&data) Data{std::move(args)...};
20}
21
22static Qt::CaseSensitivity attrToCaseSensitivity(QStringView str)
23{
24 return Xml::attrToBool(str) ? Qt::CaseInsensitive : Qt::CaseSensitive;
25}
26
27static HighlightingContextData::Rule::WordDelimiters loadAdditionalWordDelimiters(QXmlStreamReader &reader)
28{
29 return HighlightingContextData::Rule::WordDelimiters{
30 reader.attributes().value(QLatin1String("additionalDeliminator")).toString(),
31 reader.attributes().value(QLatin1String("weakDeliminator")).toString(),
32 };
33}
34
35static bool checkIsNotEmpty(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
36{
37 if (!str.isEmpty()) {
38 return true;
39 }
40
41 qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute is empty";
42 return false;
43}
44
45static bool checkIsChar(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
46{
47 if (str.size() == 1) {
48 return true;
49 }
50
51 qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute must contain exactly 1 character";
52 return false;
53}
54
55static bool loadRule(const QString &defName, HighlightingContextData::Rule &rule, QXmlStreamReader &reader)
56{
57 using Rule = HighlightingContextData::Rule;
58
59 QStringView name = reader.name();
60 const auto attrs = reader.attributes();
61 bool isIncludeRules = false;
62
63 if (name == QLatin1String("DetectChar")) {
64 const auto s = attrs.value(QLatin1String("char"));
65 if (!checkIsChar(s, "char", defName, reader)) {
66 return false;
67 }
68 const QChar c = s.at(0);
69 const bool dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
70
71 initRuleData(rule.data.detectChar, c, dynamic);
72 rule.type = Rule::Type::DetectChar;
73 } else if (name == QLatin1String("RegExpr")) {
74 const auto pattern = attrs.value(QLatin1String("String"));
75 if (!checkIsNotEmpty(pattern, "String", defName, reader)) {
76 return false;
77 }
78
79 const auto isCaseInsensitive = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
80 const auto isMinimal = Xml::attrToBool(attrs.value(QLatin1String("minimal")));
81 const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
82
83 initRuleData(rule.data.regExpr, pattern.toString(), isCaseInsensitive, isMinimal, dynamic);
84 rule.type = Rule::Type::RegExpr;
85 } else if (name == QLatin1String("IncludeRules")) {
86 const auto context = attrs.value(QLatin1String("context"));
87 if (!checkIsNotEmpty(context, "context", defName, reader)) {
88 return false;
89 }
90 const bool includeAttribute = Xml::attrToBool(attrs.value(QLatin1String("includeAttrib")));
91
92 initRuleData(rule.data.includeRules, context.toString(), includeAttribute);
93 rule.type = Rule::Type::IncludeRules;
94 isIncludeRules = true;
95 } else if (name == QLatin1String("Detect2Chars")) {
96 const auto s1 = attrs.value(QLatin1String("char"));
97 const auto s2 = attrs.value(QLatin1String("char1"));
98 if (!checkIsChar(s1, "char", defName, reader)) {
99 return false;
100 }
101 if (!checkIsChar(s2, "char1", defName, reader)) {
102 return false;
103 }
104
105 initRuleData(rule.data.detect2Chars, s1.at(0), s2.at(0));
106 rule.type = Rule::Type::Detect2Chars;
107 } else if (name == QLatin1String("keyword")) {
108 const auto s = attrs.value(QLatin1String("String"));
109 if (!checkIsNotEmpty(s, "String", defName, reader)) {
110 return false;
111 }
112 Qt::CaseSensitivity caseSensitivityOverride = Qt::CaseInsensitive;
113 bool hasCaseSensitivityOverride = false;
114
115 /**
116 * we might overwrite the case sensitivity
117 * then we need to init the list for lookup of that sensitivity setting
118 */
119 if (attrs.hasAttribute(QLatin1String("insensitive"))) {
120 hasCaseSensitivityOverride = true;
121 caseSensitivityOverride = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
122 }
123
124 initRuleData(rule.data.keyword, s.toString(), loadAdditionalWordDelimiters(reader), caseSensitivityOverride, hasCaseSensitivityOverride);
125 rule.type = Rule::Type::Keyword;
126 } else if (name == QLatin1String("DetectSpaces")) {
127 rule.type = Rule::Type::DetectSpaces;
128 } else if (name == QLatin1String("StringDetect")) {
129 const auto string = attrs.value(QLatin1String("String"));
130 if (!checkIsNotEmpty(string, "String", defName, reader)) {
131 return false;
132 }
133 const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
134 const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
135 const bool isSensitive = (caseSensitivity == Qt::CaseSensitive);
136
137 // String can be replaced with DetectChar or AnyChar
138 if (!dynamic && string.size() == 1) {
139 QChar c = string.at(0);
140 if (isSensitive || c.toLower() == c.toUpper()) {
141 initRuleData(rule.data.detectChar, c, dynamic);
142 rule.type = Rule::Type::DetectChar;
143 } else {
144 initRuleData(rule.data.anyChar, c.toLower() + c.toUpper());
145 rule.type = Rule::Type::AnyChar;
146 }
147 }
148 // String can be replaced with Detect2Chars
149 else if (isSensitive && !dynamic && string.size() == 2) {
150 initRuleData(rule.data.detect2Chars, string.at(0), string.at(1));
151 rule.type = Rule::Type::Detect2Chars;
152 } else {
153 initRuleData(rule.data.stringDetect, string.toString(), caseSensitivity, dynamic);
154 rule.type = Rule::Type::StringDetect;
155 }
156 } else if (name == QLatin1String("WordDetect")) {
157 const auto word = attrs.value(QLatin1String("String"));
158 if (!checkIsNotEmpty(word, "String", defName, reader)) {
159 return false;
160 }
161 const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
162
163 initRuleData(rule.data.wordDetect, word.toString(), loadAdditionalWordDelimiters(reader), caseSensitivity);
164 rule.type = Rule::Type::WordDetect;
165 } else if (name == QLatin1String("AnyChar")) {
166 const auto chars = attrs.value(QLatin1String("String"));
167 if (!checkIsNotEmpty(chars, "String", defName, reader)) {
168 return false;
169 }
170
171 // AnyChar can be replaced with DetectChar
172 if (chars.size() == 1) {
173 initRuleData(rule.data.detectChar, chars.at(0), false);
174 rule.type = Rule::Type::DetectChar;
175 } else {
176 initRuleData(rule.data.anyChar, chars.toString());
177 rule.type = Rule::Type::AnyChar;
178 }
179 } else if (name == QLatin1String("DetectIdentifier")) {
180 rule.type = Rule::Type::DetectIdentifier;
181 } else if (name == QLatin1String("LineContinue")) {
182 const auto s = attrs.value(QLatin1String("char"));
183 const QChar c = s.isEmpty() ? QLatin1Char('\\') : s.at(0);
184
185 initRuleData(rule.data.lineContinue, c);
186 rule.type = Rule::Type::LineContinue;
187 } else if (name == QLatin1String("Int")) {
188 initRuleData(rule.data.detectInt, loadAdditionalWordDelimiters(reader));
189 rule.type = Rule::Type::Int;
190 } else if (name == QLatin1String("Float")) {
191 initRuleData(rule.data.detectFloat, loadAdditionalWordDelimiters(reader));
192 rule.type = Rule::Type::Float;
193 } else if (name == QLatin1String("HlCStringChar")) {
194 rule.type = Rule::Type::HlCStringChar;
195 } else if (name == QLatin1String("RangeDetect")) {
196 const auto s1 = attrs.value(QLatin1String("char"));
197 const auto s2 = attrs.value(QLatin1String("char1"));
198 if (!checkIsChar(s1, "char", defName, reader)) {
199 return false;
200 }
201 if (!checkIsChar(s2, "char1", defName, reader)) {
202 return false;
203 }
204
205 initRuleData(rule.data.rangeDetect, s1.at(0), s2.at(0));
206 rule.type = Rule::Type::RangeDetect;
207 } else if (name == QLatin1String("HlCHex")) {
208 initRuleData(rule.data.hlCHex, loadAdditionalWordDelimiters(reader));
209 rule.type = Rule::Type::HlCHex;
210 } else if (name == QLatin1String("HlCChar")) {
211 rule.type = Rule::Type::HlCChar;
212 } else if (name == QLatin1String("HlCOct")) {
213 initRuleData(rule.data.hlCOct, loadAdditionalWordDelimiters(reader));
214 rule.type = Rule::Type::HlCOct;
215 } else {
216 qCWarning(Log) << "Unknown rule type:" << name;
217 return false;
218 }
219
220 if (!isIncludeRules) {
221 rule.common.contextName = attrs.value(QLatin1String("context")).toString();
222 rule.common.beginRegionName = attrs.value(QLatin1String("beginRegion")).toString();
223 rule.common.endRegionName = attrs.value(QLatin1String("endRegion")).toString();
224 rule.common.firstNonSpace = Xml::attrToBool(attrs.value(QLatin1String("firstNonSpace")));
225 rule.common.lookAhead = Xml::attrToBool(attrs.value(QLatin1String("lookAhead")));
226 // attribute is only used when lookAhead is false
227 if (!rule.common.lookAhead) {
228 rule.common.attributeName = attrs.value(QLatin1String("attribute")).toString();
229 }
230 bool colOk = false;
231 rule.common.column = attrs.value(QLatin1String("column")).toInt(&colOk);
232 if (!colOk) {
233 rule.common.column = -1;
234 }
235 }
236
237 return true;
238}
239
240template<class Data1, class Data2, class Visitor>
241static void dataRuleVisit(HighlightingContextData::Rule::Type type, Data1 &&data1, Data2 &&data2, Visitor &&visitor)
242{
243 using Rule = HighlightingContextData::Rule;
244 using Type = Rule::Type;
245 switch (type) {
246 case Type::AnyChar:
247 visitor(data1.anyChar, data2.anyChar);
248 break;
249 case Type::DetectChar:
250 visitor(data1.detectChar, data2.detectChar);
251 break;
252 case Type::Detect2Chars:
253 visitor(data1.detect2Chars, data2.detect2Chars);
254 break;
255 case Type::HlCOct:
256 visitor(data1.hlCOct, data2.hlCOct);
257 break;
258 case Type::IncludeRules:
259 visitor(data1.includeRules, data2.includeRules);
260 break;
261 case Type::Int:
262 visitor(data1.detectInt, data2.detectInt);
263 break;
264 case Type::Keyword:
265 visitor(data1.keyword, data2.keyword);
266 break;
267 case Type::LineContinue:
268 visitor(data1.lineContinue, data2.lineContinue);
269 break;
270 case Type::RangeDetect:
271 visitor(data1.rangeDetect, data2.rangeDetect);
272 break;
273 case Type::RegExpr:
274 visitor(data1.regExpr, data2.regExpr);
275 break;
276 case Type::StringDetect:
277 visitor(data1.stringDetect, data2.stringDetect);
278 break;
279 case Type::WordDetect:
280 visitor(data1.wordDetect, data2.wordDetect);
281 break;
282 case Type::Float:
283 visitor(data1.detectFloat, data2.detectFloat);
284 break;
285 case Type::HlCHex:
286 visitor(data1.hlCHex, data2.hlCHex);
287 break;
288
289 case Type::HlCStringChar:
290 case Type::DetectIdentifier:
291 case Type::DetectSpaces:
292 case Type::HlCChar:
293 case Type::Unknown:;
294 }
295}
296
297HighlightingContextData::Rule::Rule() noexcept = default;
298
299HighlightingContextData::Rule::Rule(Rule &&other) noexcept
300 : common(std::move(other.common))
301{
302 dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
303 using Data = std::remove_reference_t<decltype(data1)>;
304 new (&data1) Data(std::move(data2));
305 });
306 type = other.type;
307}
308
309HighlightingContextData::Rule::Rule(const Rule &other)
310 : common(other.common)
311{
312 dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
313 using Data = std::remove_reference_t<decltype(data1)>;
314 new (&data1) Data(data2);
315 });
316 type = other.type;
317}
318
319HighlightingContextData::Rule::~Rule()
320{
321 dataRuleVisit(type, data, data, [](auto &data, auto &) {
322 using Data = std::remove_reference_t<decltype(data)>;
323 data.~Data();
324 });
325}
326
327void HighlightingContextData::load(const QString &defName, QXmlStreamReader &reader)
328{
329 Q_ASSERT(reader.name() == QLatin1String("context"));
330 Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement);
331
332 name = reader.attributes().value(QLatin1String("name")).toString();
333 attribute = reader.attributes().value(QLatin1String("attribute")).toString();
334 lineEndContext = reader.attributes().value(QLatin1String("lineEndContext")).toString();
335 lineEmptyContext = reader.attributes().value(QLatin1String("lineEmptyContext")).toString();
336 fallthroughContext = reader.attributes().value(QLatin1String("fallthroughContext")).toString();
337 noIndentationBasedFolding = Xml::attrToBool(reader.attributes().value(QLatin1String("noIndentationBasedFolding")));
338 stopEmptyLineContextSwitchLoop = Xml::attrToBool(reader.attributes().value(QLatin1String("stopEmptyLineContextSwitchLoop")));
339
340 rules.reserve(8);
341
342 reader.readNext();
343 while (!reader.atEnd()) {
344 switch (reader.tokenType()) {
346 auto &rule = rules.emplace_back();
347 if (!loadRule(defName, rule, reader)) {
348 rules.pop_back();
349 }
350 // be done with this rule, skip all subelements, e.g. no longer supported sub-rules
351 reader.skipCurrentElement();
352 reader.readNext();
353 break;
354 }
356 return;
357 default:
358 reader.readNext();
359 break;
360 }
361 }
362}
Represents the raw xml data of a context and its rules.
QString attribute
attribute name, to lookup our format
Type type(const QSqlDatabase &db)
QString name(StandardAction id)
Syntax highlighting engine for Kate syntax definitions.
char32_t toLower(char32_t ucs4)
char32_t toUpper(char32_t ucs4)
bool isEmpty() const const
qsizetype size() const const
QString toString() const const
CaseSensitivity
QStringView value(QAnyStringView namespaceUri, QAnyStringView name) const const
bool atEnd() const const
QXmlStreamAttributes attributes() const const
qint64 lineNumber() const const
QStringView name() const const
TokenType readNext()
void skipCurrentElement()
TokenType tokenType() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:49:02 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.