KSyntaxHighlighting

highlightingdata.cpp
1 /*
2  SPDX-FileCopyrightText: 2021 Jonathan Poelen <[email protected]>
3 
4  SPDX-License-Identifier: MIT
5 */
6 
7 #include "highlightingdata_p.hpp"
8 #include "ksyntaxhighlighting_logging.h"
9 #include "xml_p.h"
10 
11 #include <QXmlStreamReader>
12 #include <QStringView>
13 
14 using namespace KSyntaxHighlighting;
15 
16 template<class Data, class... Args>
17 static void initRuleData(Data &data, Args &&...args)
18 {
19  new (&data) Data{std::move(args)...};
20 }
21 
22 static Qt::CaseSensitivity attrToCaseSensitivity(QStringView str)
23 {
24  return Xml::attrToBool(str) ? Qt::CaseInsensitive : Qt::CaseSensitive;
25 }
26 
27 static HighlightingContextData::Rule::WordDelimiters loadAdditionalWordDelimiters(QXmlStreamReader &reader)
28 {
29  return HighlightingContextData::Rule::WordDelimiters{
30  reader.attributes().value(QLatin1String("additionalDeliminator")).toString(),
31  reader.attributes().value(QLatin1String("weakDeliminator")).toString(),
32  };
33 }
34 
35 static bool checkIsNotEmpty(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
36 {
37  if (!str.isEmpty()) {
38  return true;
39  }
40 
41  qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute is empty";
42  return false;
43 }
44 
45 static bool checkIsChar(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
46 {
47  if (str.size() == 1) {
48  return true;
49  }
50 
51  qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute must contain exactly 1 character";
52  return false;
53 }
54 
55 static bool loadRule(const QString &defName, HighlightingContextData::Rule &rule, QXmlStreamReader &reader)
56 {
57  using Rule = HighlightingContextData::Rule;
58 
59  QStringView name = reader.name();
60  const auto attrs = reader.attributes();
61  bool isIncludeRules = false;
62 
63  if (name == QLatin1String("DetectChar")) {
64  const auto s = attrs.value(QLatin1String("char"));
65  if (!checkIsChar(s, "char", defName, reader)) {
66  return false;
67  }
68  const QChar c = s.at(0);
69  const bool dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
70 
71  initRuleData(rule.data.detectChar, c, dynamic);
72  rule.type = Rule::Type::DetectChar;
73  } else if (name == QLatin1String("RegExpr")) {
74  const auto pattern = attrs.value(QLatin1String("String"));
75  if (!checkIsNotEmpty(pattern, "String", defName, reader)) {
76  return false;
77  }
78 
79  const auto isCaseInsensitive = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
80  const auto isMinimal = Xml::attrToBool(attrs.value(QLatin1String("minimal")));
81  const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
82 
83  initRuleData(rule.data.regExpr, pattern.toString(), isCaseInsensitive, isMinimal, dynamic);
84  rule.type = Rule::Type::RegExpr;
85  } else if (name == QLatin1String("IncludeRules")) {
86  const auto context = attrs.value(QLatin1String("context"));
87  if (!checkIsNotEmpty(context, "context", defName, reader)) {
88  return false;
89  }
90  const bool includeAttribute = Xml::attrToBool(attrs.value(QLatin1String("includeAttrib")));
91 
92  initRuleData(rule.data.includeRules, context.toString(), includeAttribute);
93  rule.type = Rule::Type::IncludeRules;
94  isIncludeRules = true;
95  } else if (name == QLatin1String("Detect2Chars")) {
96  const auto s1 = attrs.value(QLatin1String("char"));
97  const auto s2 = attrs.value(QLatin1String("char1"));
98  if (!checkIsChar(s1, "char", defName, reader)) {
99  return false;
100  }
101  if (!checkIsChar(s2, "char1", defName, reader)) {
102  return false;
103  }
104 
105  initRuleData(rule.data.detect2Chars, s1.at(0), s2.at(0));
106  rule.type = Rule::Type::Detect2Chars;
107  } else if (name == QLatin1String("keyword")) {
108  const auto s = attrs.value(QLatin1String("String"));
109  if (!checkIsNotEmpty(s, "String", defName, reader)) {
110  return false;
111  }
112  Qt::CaseSensitivity caseSensitivityOverride = Qt::CaseInsensitive;
113  bool hasCaseSensitivityOverride = false;
114 
115  /**
116  * we might overwrite the case sensitivity
117  * then we need to init the list for lookup of that sensitivity setting
118  */
119  if (attrs.hasAttribute(QLatin1String("insensitive"))) {
120  hasCaseSensitivityOverride = true;
121  caseSensitivityOverride = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
122  }
123 
124  initRuleData(rule.data.keyword, s.toString(), loadAdditionalWordDelimiters(reader), caseSensitivityOverride, hasCaseSensitivityOverride);
125  rule.type = Rule::Type::Keyword;
126  } else if (name == QLatin1String("DetectSpaces")) {
127  rule.type = Rule::Type::DetectSpaces;
128  } else if (name == QLatin1String("StringDetect")) {
129  const auto string = attrs.value(QLatin1String("String"));
130  if (!checkIsNotEmpty(string, "String", defName, reader)) {
131  return false;
132  }
133  const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
134  const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
135  const bool isSensitive = (caseSensitivity == Qt::CaseSensitive);
136 
137  // String can be replaced with DetectChar or AnyChar
138  if (!dynamic && string.size() == 1) {
139  QChar c = string.at(0);
140  if (isSensitive || c.toLower() == c.toUpper()) {
141  initRuleData(rule.data.detectChar, c, dynamic);
142  rule.type = Rule::Type::DetectChar;
143  } else {
144  initRuleData(rule.data.anyChar, c.toLower() + c.toUpper());
145  rule.type = Rule::Type::AnyChar;
146  }
147  }
148  // String can be replaced with Detect2Chars
149  else if (isSensitive && !dynamic && string.size() == 2) {
150  initRuleData(rule.data.detect2Chars, string.at(0), string.at(1));
151  rule.type = Rule::Type::Detect2Chars;
152  } else {
153  initRuleData(rule.data.stringDetect, string.toString(), caseSensitivity, dynamic);
154  rule.type = Rule::Type::StringDetect;
155  }
156  } else if (name == QLatin1String("WordDetect")) {
157  const auto word = attrs.value(QLatin1String("String"));
158  if (!checkIsNotEmpty(word, "String", defName, reader)) {
159  return false;
160  }
161  const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
162 
163  initRuleData(rule.data.wordDetect, word.toString(), loadAdditionalWordDelimiters(reader), caseSensitivity);
164  rule.type = Rule::Type::WordDetect;
165  } else if (name == QLatin1String("AnyChar")) {
166  const auto chars = attrs.value(QLatin1String("String"));
167  if (!checkIsNotEmpty(chars, "String", defName, reader)) {
168  return false;
169  }
170 
171  // AnyChar can be replaced with DetectChar
172  if (chars.size() == 1) {
173  initRuleData(rule.data.detectChar, chars.at(0), false);
174  rule.type = Rule::Type::DetectChar;
175  } else {
176  initRuleData(rule.data.anyChar, chars.toString());
177  rule.type = Rule::Type::AnyChar;
178  }
179  } else if (name == QLatin1String("DetectIdentifier")) {
180  rule.type = Rule::Type::DetectIdentifier;
181  } else if (name == QLatin1String("LineContinue")) {
182  const auto s = attrs.value(QLatin1String("char"));
183  const QChar c = s.isEmpty() ? QLatin1Char('\\') : s.at(0);
184 
185  initRuleData(rule.data.lineContinue, c);
186  rule.type = Rule::Type::LineContinue;
187  } else if (name == QLatin1String("Int")) {
188  initRuleData(rule.data.detectInt, loadAdditionalWordDelimiters(reader));
189  rule.type = Rule::Type::Int;
190  } else if (name == QLatin1String("Float")) {
191  initRuleData(rule.data.detectFloat, loadAdditionalWordDelimiters(reader));
192  rule.type = Rule::Type::Float;
193  } else if (name == QLatin1String("HlCStringChar")) {
194  rule.type = Rule::Type::HlCStringChar;
195  } else if (name == QLatin1String("RangeDetect")) {
196  const auto s1 = attrs.value(QLatin1String("char"));
197  const auto s2 = attrs.value(QLatin1String("char1"));
198  if (!checkIsChar(s1, "char", defName, reader)) {
199  return false;
200  }
201  if (!checkIsChar(s2, "char1", defName, reader)) {
202  return false;
203  }
204 
205  initRuleData(rule.data.rangeDetect, s1.at(0), s2.at(0));
206  rule.type = Rule::Type::RangeDetect;
207  } else if (name == QLatin1String("HlCHex")) {
208  initRuleData(rule.data.hlCHex, loadAdditionalWordDelimiters(reader));
209  rule.type = Rule::Type::HlCHex;
210  } else if (name == QLatin1String("HlCChar")) {
211  rule.type = Rule::Type::HlCChar;
212  } else if (name == QLatin1String("HlCOct")) {
213  initRuleData(rule.data.hlCOct, loadAdditionalWordDelimiters(reader));
214  rule.type = Rule::Type::HlCOct;
215  } else {
216  qCWarning(Log) << "Unknown rule type:" << name;
217  return false;
218  }
219 
220  if (!isIncludeRules) {
221  rule.common.contextName = attrs.value(QLatin1String("context")).toString();
222  rule.common.beginRegionName = attrs.value(QLatin1String("beginRegion")).toString();
223  rule.common.endRegionName = attrs.value(QLatin1String("endRegion")).toString();
224  rule.common.firstNonSpace = Xml::attrToBool(attrs.value(QLatin1String("firstNonSpace")));
225  rule.common.lookAhead = Xml::attrToBool(attrs.value(QLatin1String("lookAhead")));
226  // attribute is only used when lookAhead is false
227  if (!rule.common.lookAhead) {
228  rule.common.attributeName = attrs.value(QLatin1String("attribute")).toString();
229  }
230  bool colOk = false;
231  rule.common.column = attrs.value(QLatin1String("column")).toInt(&colOk);
232  if (!colOk) {
233  rule.common.column = -1;
234  }
235  }
236 
237  return true;
238 }
239 
240 template<class Data1, class Data2, class Visitor>
241 static void dataRuleVisit(HighlightingContextData::Rule::Type type, Data1 &&data1, Data2 &&data2, Visitor &&visitor)
242 {
243  using Rule = HighlightingContextData::Rule;
244  using Type = Rule::Type;
245  switch (type) {
246  case Type::AnyChar:
247  visitor(data1.anyChar, data2.anyChar);
248  break;
249  case Type::DetectChar:
250  visitor(data1.detectChar, data2.detectChar);
251  break;
252  case Type::Detect2Chars:
253  visitor(data1.detect2Chars, data2.detect2Chars);
254  break;
255  case Type::HlCOct:
256  visitor(data1.hlCOct, data2.hlCOct);
257  break;
258  case Type::IncludeRules:
259  visitor(data1.includeRules, data2.includeRules);
260  break;
261  case Type::Int:
262  visitor(data1.detectInt, data2.detectInt);
263  break;
264  case Type::Keyword:
265  visitor(data1.keyword, data2.keyword);
266  break;
267  case Type::LineContinue:
268  visitor(data1.lineContinue, data2.lineContinue);
269  break;
270  case Type::RangeDetect:
271  visitor(data1.rangeDetect, data2.rangeDetect);
272  break;
273  case Type::RegExpr:
274  visitor(data1.regExpr, data2.regExpr);
275  break;
276  case Type::StringDetect:
277  visitor(data1.stringDetect, data2.stringDetect);
278  break;
279  case Type::WordDetect:
280  visitor(data1.wordDetect, data2.wordDetect);
281  break;
282  case Type::Float:
283  visitor(data1.detectFloat, data2.detectFloat);
284  break;
285  case Type::HlCHex:
286  visitor(data1.hlCHex, data2.hlCHex);
287  break;
288 
289  case Type::HlCStringChar:
290  case Type::DetectIdentifier:
291  case Type::DetectSpaces:
292  case Type::HlCChar:
293  case Type::Unknown:;
294  }
295 }
296 
297 HighlightingContextData::Rule::Rule() noexcept = default;
298 
299 HighlightingContextData::Rule::Rule(Rule &&other) noexcept
300  : common(std::move(other.common))
301 {
302  dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
303  using Data = std::remove_reference_t<decltype(data1)>;
304  new (&data1) Data(std::move(data2));
305  });
306  type = other.type;
307 }
308 
309 HighlightingContextData::Rule::Rule(const Rule &other)
310  : common(other.common)
311 {
312  dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
313  using Data = std::remove_reference_t<decltype(data1)>;
314  new (&data1) Data(data2);
315  });
316  type = other.type;
317 }
318 
319 HighlightingContextData::Rule::~Rule()
320 {
321  dataRuleVisit(type, data, data, [](auto &data, auto &) {
322  using Data = std::remove_reference_t<decltype(data)>;
323  data.~Data();
324  });
325 }
326 
327 HighlightingContextData::ContextSwitch::ContextSwitch(QStringView str)
328 {
329  if (str.isEmpty() || str == QStringLiteral("#stay")) {
330  return;
331  }
332 
333  while (str.startsWith(QStringLiteral("#pop"))) {
334  ++m_popCount;
335  if (str.size() > 4 && str.at(4) == QLatin1Char('!')) {
336  str = str.mid(5);
337  break;
338  }
339  str = str.mid(4);
340  }
341 
342  if (str.isEmpty()) {
343  return;
344  }
345 
346  m_contextAndDefName = str.toString();
347  m_defNameIndex = str.indexOf(QStringLiteral("##"));
348 }
349 
350 bool HighlightingContextData::ContextSwitch::isStay() const
351 {
352  return m_popCount == -1 && m_contextAndDefName.isEmpty();
353 }
354 
355 QStringView HighlightingContextData::ContextSwitch::contextName() const
356 {
357  if (m_defNameIndex == -1) {
358  return m_contextAndDefName;
359  }
360  return QStringView(m_contextAndDefName).left(m_defNameIndex);
361 }
362 
363 QStringView HighlightingContextData::ContextSwitch::defName() const
364 {
365  if (m_defNameIndex == -1) {
366  return QStringView();
367  }
368  return QStringView(m_contextAndDefName).mid(m_defNameIndex + 2);
369 }
370 
371 void HighlightingContextData::load(const QString &defName, QXmlStreamReader &reader)
372 {
373  Q_ASSERT(reader.name() == QLatin1String("context"));
374  Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement);
375 
376  name = reader.attributes().value(QLatin1String("name")).toString();
377  attribute = reader.attributes().value(QLatin1String("attribute")).toString();
378  lineEndContext = reader.attributes().value(QLatin1String("lineEndContext")).toString();
379  lineEmptyContext = reader.attributes().value(QLatin1String("lineEmptyContext")).toString();
380  fallthroughContext = reader.attributes().value(QLatin1String("fallthroughContext")).toString();
381  noIndentationBasedFolding = Xml::attrToBool(reader.attributes().value(QLatin1String("noIndentationBasedFolding")));
382 
383  rules.reserve(8);
384 
385  reader.readNext();
386  while (!reader.atEnd()) {
387  switch (reader.tokenType()) {
389  auto &rule = rules.emplace_back();
390  if (!loadRule(defName, rule, reader)) {
391  rules.pop_back();
392  }
393  // be done with this rule, skip all subelements, e.g. no longer supported sub-rules
394  reader.skipCurrentElement();
395  reader.readNext();
396  break;
397  }
399  return;
400  default:
401  reader.readNext();
402  break;
403  }
404  }
405 }
QChar at(qsizetype n) const const
void skipCurrentElement()
QChar toLower() const const
CaseSensitivity
QString pattern(Mode mode=Reading)
QStringRef value(const QString &namespaceUri, const QString &name) const const
Type type(const QSqlDatabase &db)
Represents the raw xml data of a context and its rules.
QStringView mid(qsizetype start) const const
QChar toUpper() const const
QStringView left(qsizetype length) const const
qsizetype size() const const
QStringRef name() const const
QString toString() const const
qint64 lineNumber() const const
QXmlStreamReader::TokenType readNext()
QXmlStreamReader::TokenType tokenType() const const
QXmlStreamAttributes attributes() const const
bool isEmpty() const const
QString toString() const const
QString name(StandardShortcut id)
qsizetype indexOf(QChar c, qsizetype from, Qt::CaseSensitivity cs) const const
bool atEnd() const const
QString attribute
attribute name, to lookup our format
bool startsWith(QStringView str, Qt::CaseSensitivity cs) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Sun Mar 26 2023 04:09:17 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.