KSyntaxHighlighting

katehighlightingindexer.cpp
1 /*
2  SPDX-FileCopyrightText: 2014 Christoph Cullmann <[email protected]>
3  SPDX-FileCopyrightText: 2020 Jonathan Poelen <[email protected]>
4 
5  SPDX-License-Identifier: MIT
6 */
7 
8 #include <QCborValue>
9 #include <QCoreApplication>
10 #include <QDebug>
11 #include <QFile>
12 #include <QFileInfo>
13 #include <QMutableMapIterator>
14 #include <QRegularExpression>
15 #include <QVariant>
16 #include <QXmlStreamReader>
17 
18 #ifdef QT_XMLPATTERNS_LIB
19 #include <QXmlSchema>
20 #include <QXmlSchemaValidator>
21 #endif
22 
23 #include "../lib/worddelimiters_p.h"
24 #include "../lib/xml_p.h"
25 
26 #include <array>
27 
28 using KSyntaxHighlighting::WordDelimiters;
29 using KSyntaxHighlighting::Xml::attrToBool;
30 
31 class HlFilesChecker
32 {
33 public:
34  template<typename T>
35  void setDefinition(const T &verStr, const QString &filename, const QString &name)
36  {
37  m_currentDefinition = &*m_definitions.insert(name, Definition{});
38  m_currentDefinition->languageName = name;
39  m_currentDefinition->filename = filename;
40  m_currentDefinition->kateVersionStr = verStr.toString();
41  m_currentKeywords = nullptr;
42  m_currentContext = nullptr;
43 
44  const auto idx = verStr.indexOf(QLatin1Char('.'));
45  if (idx <= 0) {
46  qWarning() << filename << "invalid kateversion" << verStr;
47  m_success = false;
48  } else {
49  m_currentDefinition->kateVersion = {verStr.left(idx).toInt(), verStr.mid(idx + 1).toInt()};
50  }
51  }
52 
53  void processElement(QXmlStreamReader &xml)
54  {
55  if (xml.isStartElement()) {
56  if (m_currentContext) {
57  m_currentContext->rules.push_back(Context::Rule{});
58  auto &rule = m_currentContext->rules.back();
59  m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success;
60  m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
61  } else if (m_currentKeywords) {
62  m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml) && m_success;
63  } else if (xml.name() == QStringLiteral("context")) {
64  processContextElement(xml);
65  } else if (xml.name() == QStringLiteral("list")) {
66  processListElement(xml);
67  } else if (xml.name() == QStringLiteral("keywords")) {
68  m_success = m_currentDefinition->parseKeywords(xml) && m_success;
69  } else if (xml.name() == QStringLiteral("emptyLine")) {
70  m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success;
71  } else if (xml.name() == QStringLiteral("itemData")) {
72  m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success;
73  }
74  } else if (xml.isEndElement()) {
75  if (m_currentContext && xml.name() == QStringLiteral("context")) {
76  m_currentContext = nullptr;
77  } else if (m_currentKeywords && xml.name() == QStringLiteral("list")) {
78  m_currentKeywords = nullptr;
79  }
80  }
81  }
82 
83  //! Resolve context attribute and include tag
84  void resolveContexts()
85  {
87  while (def.hasNext()) {
88  def.next();
89  auto &definition = def.value();
90  auto &contexts = definition.contexts;
91 
92  if (contexts.isEmpty()) {
93  qWarning() << definition.filename << "has no context";
94  m_success = false;
95  continue;
96  }
97 
98  auto markAsUsedContext = [](ContextName &contextName) {
99  if (!contextName.stay && contextName.context) {
100  contextName.context->isOnlyIncluded = false;
101  }
102  };
103 
104  QMutableMapIterator<QString, Context> contextIt(contexts);
105  while (contextIt.hasNext()) {
106  contextIt.next();
107  auto &context = contextIt.value();
108  resolveContextName(definition, context, context.lineEndContext, context.line);
109  resolveContextName(definition, context, context.lineEmptyContext, context.line);
110  resolveContextName(definition, context, context.fallthroughContext, context.line);
111  markAsUsedContext(context.lineEndContext);
112  markAsUsedContext(context.lineEmptyContext);
113  markAsUsedContext(context.fallthroughContext);
114  for (auto &rule : context.rules) {
115  rule.parentContext = &context;
116  resolveContextName(definition, context, rule.context, rule.line);
117  if (rule.type != Context::Rule::Type::IncludeRules) {
118  markAsUsedContext(rule.context);
119  } else if (rule.includeAttrib == XmlBool::True && rule.context.context) {
120  rule.context.context->referencedWithIncludeAttrib = true;
121  }
122  }
123  }
124 
125  auto *firstContext = &*definition.contexts.find(definition.firstContextName);
126  firstContext->isOnlyIncluded = false;
127  definition.firstContext = firstContext;
128  }
129 
130  resolveIncludeRules();
131  }
132 
133  bool check() const
134  {
135  bool success = m_success;
136 
137  const auto usedContexts = extractUsedContexts();
138 
139  QMap<const Definition *, const Definition *> maxVersionByDefinitions;
141 
142  QMapIterator<QString, Definition> def(m_definitions);
143  while (def.hasNext()) {
144  def.next();
145  const auto &definition = def.value();
146  const auto &filename = definition.filename;
147 
148  auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
149  if (maxDef != &definition) {
150  qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr
151  << ". Please, increase kateversion.";
152  success = false;
153  }
154 
155  QSet<const Keywords *> referencedKeywords;
156  QSet<ItemDatas::Style> usedAttributeNames;
157  QSet<ItemDatas::Style> ignoredAttributeNames;
158  success = checkKeywordsList(definition, referencedKeywords) && success;
159  success =
160  checkContexts(definition, referencedKeywords, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
161 
162  // search for non-existing itemDatas.
163  const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
164  for (const auto &styleName : invalidNames) {
165  qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name;
166  success = false;
167  }
168 
169  // search for existing itemDatas, but unusable.
170  const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
171  for (const auto &styleName : ignoredNames) {
172  qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name
173  << "is never used. All uses are with lookAhead=true or <IncludeRules/>";
174  success = false;
175  }
176 
177  // search for unused itemDatas.
178  auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
179  unusedNames -= ignoredNames;
180  for (const auto &styleName : std::as_const(unusedNames)) {
181  qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name;
182  success = false;
183  }
184  }
185 
186  QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
187  while (unreachableIncludedRuleIt.hasNext()) {
188  unreachableIncludedRuleIt.next();
189  IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
190  if (unreachableRulesBy.alwaysUnreachable) {
191  auto *rule = unreachableIncludedRuleIt.key();
192 
193  if (!rule->parentContext->isOnlyIncluded) {
194  continue;
195  }
196 
197  // remove duplicates rules
199  auto &unreachableBy = unreachableRulesBy.unreachableBy;
200  unreachableBy.erase(std::remove_if(unreachableBy.begin(),
201  unreachableBy.end(),
202  [&](const RuleAndInclude &ruleAndInclude) {
203  if (rules.contains(ruleAndInclude.rule)) {
204  return true;
205  }
206  rules.insert(ruleAndInclude.rule);
207  return false;
208  }),
209  unreachableBy.end());
210 
212  message.reserve(128);
213  for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
214  message += QStringLiteral("line ");
215  message += QString::number(ruleAndInclude.rule->line);
216  message += QStringLiteral(" [");
217  message += ruleAndInclude.rule->parentContext->name;
218  if (rule->filename != ruleAndInclude.rule->filename) {
219  message += QStringLiteral(" (");
220  message += ruleAndInclude.rule->filename;
221  message += QLatin1Char(')');
222  }
223  if (ruleAndInclude.includeRules) {
224  message += QStringLiteral(" via line ");
225  message += QString::number(ruleAndInclude.includeRules->line);
226  }
227  message += QStringLiteral("], ");
228  }
229  message.chop(2);
230 
231  qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message;
232  success = false;
233  }
234  }
235 
236  return success;
237  }
238 
239 private:
240  enum class XmlBool {
241  Unspecified,
242  False,
243  True,
244  };
245 
246  struct Context;
247 
248  struct ContextName {
249  QString name;
250  int popCount = 0;
251  bool stay = false;
252 
253  Context *context = nullptr;
254  };
255 
256  struct Parser {
257  const QString &filename;
258  QXmlStreamReader &xml;
259  QXmlStreamAttribute &attr;
260  bool success;
261 
262  //! Read a string type attribute, \c success = \c false when \p str is not empty
263  //! \return \c true when attr.name() == attrName, otherwise false
264  bool extractString(QString &str, const QString &attrName)
265  {
266  if (attr.name() != attrName) {
267  return false;
268  }
269 
270  str = attr.value().toString();
271  if (str.isEmpty()) {
272  qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty";
273  success = false;
274  }
275 
276  return true;
277  }
278 
279  //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified.
280  //! \return \c true when attr.name() == attrName, otherwise false
281  bool extractXmlBool(XmlBool &xmlBool, const QString &attrName)
282  {
283  if (attr.name() != attrName) {
284  return false;
285  }
286 
287  xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False;
288 
289  return true;
290  }
291 
292  //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0
293  //! \return \c true when attr.name() == attrName, otherwise false
294  bool extractPositive(int &positive, const QString &attrName)
295  {
296  if (attr.name() != attrName) {
297  return false;
298  }
299 
300  bool ok = true;
301  positive = attr.value().toInt(&ok);
302 
303  if (!ok || positive < 0) {
304  qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value();
305  success = false;
306  }
307 
308  return true;
309  }
310 
311  //! Read a color, \c success = \c false when \p color is already greater than or equal to 0
312  //! \return \c true when attr.name() == attrName, otherwise false
313  bool checkColor(const QString &attrName)
314  {
315  if (attr.name() != attrName) {
316  return false;
317  }
318 
319  const auto value = attr.value();
320  if (value.isEmpty() /*|| QColor(value).isValid()*/) {
321  qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value;
322  success = false;
323  }
324 
325  return true;
326  }
327 
328  //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char
329  //! \return \c true when attr.name() == attrName, otherwise false
330  bool extractChar(QChar &c, const QString &attrName)
331  {
332  if (attr.name() != attrName) {
333  return false;
334  }
335 
336  if (attr.value().size() == 1) {
337  c = attr.value()[0];
338  } else {
339  c = QLatin1Char('_');
340  qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value();
341  success = false;
342  }
343 
344  return true;
345  }
346 
347  //! \return parsing status when \p isExtracted is \c true, otherwise \c false
348  bool checkIfExtracted(bool isExtracted)
349  {
350  if (isExtracted) {
351  return success;
352  }
353 
354  qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name();
355  return false;
356  }
357  };
358 
359  struct Keywords {
360  struct Items {
361  struct Item {
362  QString content;
363  int line;
364 
365  friend uint qHash(const Item &item, uint seed = 0)
366  {
367  return qHash(item.content, seed);
368  }
369 
370  friend bool operator==(const Item &item0, const Item &item1)
371  {
372  return item0.content == item1.content;
373  }
374  };
375 
376  QVector<Item> keywords;
377  QSet<Item> includes;
378 
379  bool parseElement(const QString &filename, QXmlStreamReader &xml)
380  {
381  bool success = true;
382 
383  const int line = xml.lineNumber();
384  QString content = xml.readElementText();
385 
386  if (content.isEmpty()) {
387  qWarning() << filename << "line" << line << "is empty:" << xml.name();
388  success = false;
389  }
390 
391  if (xml.name() == QStringLiteral("include")) {
392  includes.insert({content, line});
393  } else if (xml.name() == QStringLiteral("item")) {
394  keywords.append({content, line});
395  } else {
396  qWarning() << filename << "line" << line << "invalid element:" << xml.name();
397  success = false;
398  }
399 
400  return success;
401  }
402  };
403 
404  QString name;
405  Items items;
406  int line;
407 
408  bool parseElement(const QString &filename, QXmlStreamReader &xml)
409  {
410  line = xml.lineNumber();
411 
412  bool success = true;
413  for (auto &attr : xml.attributes()) {
414  Parser parser{filename, xml, attr, success};
415 
416  const bool isExtracted = parser.extractString(name, QStringLiteral("name"));
417 
418  success = parser.checkIfExtracted(isExtracted);
419  }
420  return success;
421  }
422  };
423 
424  struct Context {
425  struct Rule {
426  enum class Type {
427  Unknown,
428  AnyChar,
429  Detect2Chars,
430  DetectChar,
431  DetectIdentifier,
432  DetectSpaces,
433  Float,
434  HlCChar,
435  HlCHex,
436  HlCOct,
437  HlCStringChar,
438  IncludeRules,
439  Int,
440  LineContinue,
441  RangeDetect,
442  RegExpr,
443  StringDetect,
444  WordDetect,
445  keyword,
446  };
447 
448  Type type{};
449 
450  bool isDotRegex = false;
451  int line = -1;
452 
453  // commonAttributes
454  QString attribute;
455  ContextName context;
456  QString beginRegion;
457  QString endRegion;
458  int column = -1;
459  XmlBool lookAhead{};
460  XmlBool firstNonSpace{};
461 
462  // StringDetect, WordDetect, keyword
463  XmlBool insensitive{};
464 
465  // DetectChar, StringDetect, RegExpr, keyword
466  XmlBool dynamic{};
467 
468  // Regex
469  XmlBool minimal{};
470 
471  // IncludeRule
472  XmlBool includeAttrib{};
473 
474  // DetectChar, Detect2Chars, LineContinue, RangeDetect
475  QChar char0;
476  // Detect2Chars, RangeDetect
477  QChar char1;
478 
479  // AnyChar, DetectChar, StringDetect, RegExpr, WordDetect, keyword
480  QString string;
481  // RegExpr without .* as suffix
482  QString sanitizedString;
483 
484  // Float, HlCHex, HlCOct, Int, WordDetect, keyword
485  QString additionalDeliminator;
486  QString weakDeliminator;
487 
488  // rules included by IncludeRules (without IncludeRule)
489  QVector<const Rule *> includedRules;
490 
491  // IncludeRules included by IncludeRules
492  QSet<const Rule *> includedIncludeRules;
493 
494  Context const *parentContext = nullptr;
495 
496  QString filename;
497 
498  bool parseElement(const QString &filename, QXmlStreamReader &xml)
499  {
500  this->filename = filename;
501  line = xml.lineNumber();
502 
503  using Pair = QPair<QString, Type>;
504  static const auto pairs = {
505  Pair{QStringLiteral("AnyChar"), Type::AnyChar},
506  Pair{QStringLiteral("Detect2Chars"), Type::Detect2Chars},
507  Pair{QStringLiteral("DetectChar"), Type::DetectChar},
508  Pair{QStringLiteral("DetectIdentifier"), Type::DetectIdentifier},
509  Pair{QStringLiteral("DetectSpaces"), Type::DetectSpaces},
510  Pair{QStringLiteral("Float"), Type::Float},
511  Pair{QStringLiteral("HlCChar"), Type::HlCChar},
512  Pair{QStringLiteral("HlCHex"), Type::HlCHex},
513  Pair{QStringLiteral("HlCOct"), Type::HlCOct},
514  Pair{QStringLiteral("HlCStringChar"), Type::HlCStringChar},
515  Pair{QStringLiteral("IncludeRules"), Type::IncludeRules},
516  Pair{QStringLiteral("Int"), Type::Int},
517  Pair{QStringLiteral("LineContinue"), Type::LineContinue},
518  Pair{QStringLiteral("RangeDetect"), Type::RangeDetect},
519  Pair{QStringLiteral("RegExpr"), Type::RegExpr},
520  Pair{QStringLiteral("StringDetect"), Type::StringDetect},
521  Pair{QStringLiteral("WordDetect"), Type::WordDetect},
522  Pair{QStringLiteral("keyword"), Type::keyword},
523  };
524 
525  for (auto pair : pairs) {
526  if (xml.name() == pair.first) {
527  type = pair.second;
528  bool success = parseAttributes(filename, xml);
529  success = checkMandoryAttributes(filename, xml) && success;
530  if (success && type == Type::RegExpr) {
531  // ., (.) followed by *, +, {1} or nothing
532  static const QRegularExpression isDot(QStringLiteral(R"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
533  // remove "(?:" and ")"
534  static const QRegularExpression removeParentheses(QStringLiteral(R"(\((?:\?:)?|\))"));
535  // remove parentheses on a copy of string
536  auto reg = QString(string).replace(removeParentheses, QString());
537  isDotRegex = reg.contains(isDot);
538 
539  // Remove .* and .*$ suffix.
540  static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$"));
541  sanitizedString = string;
542  sanitizedString.replace(allSuffix, QString());
543  // string is a catch-all, do not sanitize
544  if (sanitizedString.isEmpty() || sanitizedString == QStringLiteral("^")) {
545  sanitizedString = string;
546  }
547  }
548  return success;
549  }
550  }
551 
552  qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name();
553  return false;
554  }
555 
556  private:
557  bool parseAttributes(const QString &filename, QXmlStreamReader &xml)
558  {
559  bool success = true;
560 
561  for (auto &attr : xml.attributes()) {
562  Parser parser{filename, xml, attr, success};
563 
564  // clang-format off
565  const bool isExtracted
566  = parser.extractString(attribute, QStringLiteral("attribute"))
567  || parser.extractString(context.name, QStringLiteral("context"))
568  || parser.extractXmlBool(lookAhead, QStringLiteral("lookAhead"))
569  || parser.extractXmlBool(firstNonSpace, QStringLiteral("firstNonSpace"))
570  || parser.extractString(beginRegion, QStringLiteral("beginRegion"))
571  || parser.extractString(endRegion, QStringLiteral("endRegion"))
572  || parser.extractPositive(column, QStringLiteral("column"))
573  || ((type == Type::RegExpr
574  || type == Type::StringDetect
575  || type == Type::WordDetect
576  || type == Type::keyword
577  ) && parser.extractXmlBool(insensitive, QStringLiteral("insensitive")))
578  || ((type == Type::DetectChar
579  || type == Type::RegExpr
580  || type == Type::StringDetect
581  || type == Type::keyword
582  ) && parser.extractXmlBool(dynamic, QStringLiteral("dynamic")))
583  || ((type == Type::RegExpr)
584  && parser.extractXmlBool(minimal, QStringLiteral("minimal")))
585  || ((type == Type::DetectChar
586  || type == Type::Detect2Chars
587  || type == Type::LineContinue
588  || type == Type::RangeDetect
589  ) && parser.extractChar(char0, QStringLiteral("char")))
590  || ((type == Type::Detect2Chars
591  || type == Type::RangeDetect
592  ) && parser.extractChar(char1, QStringLiteral("char1")))
593  || ((type == Type::AnyChar
594  || type == Type::RegExpr
595  || type == Type::StringDetect
596  || type == Type::WordDetect
597  || type == Type::keyword
598  ) && parser.extractString(string, QStringLiteral("String")))
599  || ((type == Type::IncludeRules)
600  && parser.extractXmlBool(includeAttrib, QStringLiteral("includeAttrib")))
601  || ((type == Type::Float
602  || type == Type::HlCHex
603  || type == Type::HlCOct
604  || type == Type::Int
605  || type == Type::keyword
606  || type == Type::WordDetect
607  ) && (parser.extractString(additionalDeliminator, QStringLiteral("additionalDeliminator"))
608  || parser.extractString(weakDeliminator, QStringLiteral("weakDeliminator"))))
609  ;
610  // clang-format on
611 
612  success = parser.checkIfExtracted(isExtracted);
613 
614  if (type == Type::LineContinue && char0 == QLatin1Char('\0')) {
615  char0 = QLatin1Char('\\');
616  }
617  }
618 
619  return success;
620  }
621 
622  bool checkMandoryAttributes(const QString &filename, QXmlStreamReader &xml)
623  {
624  QString missingAttr;
625 
626  switch (type) {
627  case Type::Unknown:
628  return false;
629 
630  case Type::AnyChar:
631  case Type::RegExpr:
632  case Type::StringDetect:
633  case Type::WordDetect:
634  case Type::keyword:
635  missingAttr = string.isEmpty() ? QStringLiteral("String") : QString();
636  break;
637 
638  case Type::DetectChar:
639  missingAttr = !char0.unicode() ? QStringLiteral("char") : QString();
640  break;
641 
642  case Type::Detect2Chars:
643  case Type::RangeDetect:
644  missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1")
645  : !char0.unicode() ? QStringLiteral("char")
646  : !char1.unicode() ? QStringLiteral("char1")
647  : QString();
648  break;
649 
650  case Type::IncludeRules:
651  missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString();
652  break;
653 
654  case Type::DetectIdentifier:
655  case Type::DetectSpaces:
656  case Type::Float:
657  case Type::HlCChar:
658  case Type::HlCHex:
659  case Type::HlCOct:
660  case Type::HlCStringChar:
661  case Type::Int:
662  case Type::LineContinue:
663  break;
664  }
665 
666  if (!missingAttr.isEmpty()) {
667  qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr;
668  return false;
669  }
670 
671  return true;
672  }
673  };
674 
675  int line;
676  // becomes false when a context (except includeRule) refers to it
677  bool isOnlyIncluded = true;
678  // becomes true when an includedRule refers to it with includeAttrib=true
679  bool referencedWithIncludeAttrib = false;
680  bool hasDynamicRule = false;
681  QString name;
682  QString attribute;
683  ContextName lineEndContext;
684  ContextName lineEmptyContext;
685  ContextName fallthroughContext;
686  QVector<Rule> rules;
687  XmlBool dynamic{};
688  XmlBool fallthrough{};
689 
690  bool parseElement(const QString &filename, QXmlStreamReader &xml)
691  {
692  line = xml.lineNumber();
693 
694  bool success = true;
695 
696  for (auto &attr : xml.attributes()) {
697  Parser parser{filename, xml, attr, success};
698  XmlBool noIndentationBasedFolding{};
699 
700  const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(attribute, QStringLiteral("attribute"))
701  || parser.extractString(lineEndContext.name, QStringLiteral("lineEndContext"))
702  || parser.extractString(lineEmptyContext.name, QStringLiteral("lineEmptyContext"))
703  || parser.extractString(fallthroughContext.name, QStringLiteral("fallthroughContext"))
704  || parser.extractXmlBool(dynamic, QStringLiteral("dynamic")) || parser.extractXmlBool(fallthrough, QStringLiteral("fallthrough"))
705  || parser.extractXmlBool(noIndentationBasedFolding, QStringLiteral("noIndentationBasedFolding"));
706 
707  success = parser.checkIfExtracted(isExtracted);
708  }
709 
710  if (name.isEmpty()) {
711  qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name";
712  success = false;
713  }
714 
715  if (attribute.isEmpty()) {
716  qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute";
717  success = false;
718  }
719 
720  if (lineEndContext.name.isEmpty()) {
721  qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: lineEndContext";
722  success = false;
723  }
724 
725  return success;
726  }
727  };
728 
729  struct Version {
730  int majorRevision;
731  int minorRevision;
732 
733  Version(int majorRevision = 0, int minorRevision = 0)
734  : majorRevision(majorRevision)
735  , minorRevision(minorRevision)
736  {
737  }
738 
739  bool operator<(const Version &version) const
740  {
741  return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision);
742  }
743  };
744 
745  struct ItemDatas {
746  struct Style {
747  QString name;
748  int line;
749 
750  friend uint qHash(const Style &style, uint seed = 0)
751  {
752  return qHash(style.name, seed);
753  }
754 
755  friend bool operator==(const Style &style0, const Style &style1)
756  {
757  return style0.name == style1.name;
758  }
759  };
760 
761  QSet<Style> styleNames;
762 
763  bool parseElement(const QString &filename, QXmlStreamReader &xml)
764  {
765  bool success = true;
766 
767  QString name;
768  QString defStyleNum;
769  XmlBool boolean;
770 
771  for (auto &attr : xml.attributes()) {
772  Parser parser{filename, xml, attr, success};
773 
774  const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(defStyleNum, QStringLiteral("defStyleNum"))
775  || parser.extractXmlBool(boolean, QStringLiteral("bold")) || parser.extractXmlBool(boolean, QStringLiteral("italic"))
776  || parser.extractXmlBool(boolean, QStringLiteral("underline")) || parser.extractXmlBool(boolean, QStringLiteral("strikeOut"))
777  || parser.extractXmlBool(boolean, QStringLiteral("spellChecking")) || parser.checkColor(QStringLiteral("color"))
778  || parser.checkColor(QStringLiteral("selColor")) || parser.checkColor(QStringLiteral("backgroundColor"))
779  || parser.checkColor(QStringLiteral("selBackgroundColor"));
780 
781  success = parser.checkIfExtracted(isExtracted);
782  }
783 
784  if (!name.isEmpty()) {
785  const auto len = styleNames.size();
786  styleNames.insert({name, int(xml.lineNumber())});
787  if (len == styleNames.size()) {
788  qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name;
789  success = false;
790  }
791  }
792 
793  return success;
794  }
795  };
796 
797  struct Definition {
798  QMap<QString, Keywords> keywordsList;
799  QMap<QString, Context> contexts;
800  ItemDatas itemDatas;
801  QString firstContextName;
802  const Context *firstContext = nullptr;
803  QString filename;
804  WordDelimiters wordDelimiters;
805  XmlBool casesensitive{};
806  Version kateVersion{};
807  QString kateVersionStr;
808  QString languageName;
809  QSet<const Definition *> referencedDefinitions;
810 
811  // Parse <keywords ...>
812  bool parseKeywords(QXmlStreamReader &xml)
813  {
814  wordDelimiters.append(xml.attributes().value(QStringLiteral("additionalDeliminator")));
815  wordDelimiters.remove(xml.attributes().value(QStringLiteral("weakDeliminator")));
816  return true;
817  }
818  };
819 
820  // Parse <context>
821  void processContextElement(QXmlStreamReader &xml)
822  {
823  Context context;
824  m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success;
825  if (m_currentDefinition->firstContextName.isEmpty()) {
826  m_currentDefinition->firstContextName = context.name;
827  }
828  if (m_currentDefinition->contexts.contains(context.name)) {
829  qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name;
830  m_success = false;
831  }
832  m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context);
833  }
834 
835  // Parse <list name="...">
836  void processListElement(QXmlStreamReader &xml)
837  {
838  Keywords keywords;
839  m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success;
840  if (m_currentDefinition->keywordsList.contains(keywords.name)) {
841  qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name;
842  m_success = false;
843  }
844  m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords);
845  }
846 
847  const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const
848  {
849  auto it = maxVersionByDefinitions.find(&definition);
850  if (it != maxVersionByDefinitions.end()) {
851  return it.value();
852  } else {
853  auto it = maxVersionByDefinitions.insert(&definition, &definition);
854  for (const auto &referencedDef : definition.referencedDefinitions) {
855  auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions);
856  if (it.value()->kateVersion < maxDef->kateVersion) {
857  it.value() = maxDef;
858  }
859  }
860  return it.value();
861  }
862  }
863 
864  // Initialize the referenced rules (Rule::includedRules)
865  void resolveIncludeRules()
866  {
867  QSet<const Context *> usedContexts;
868  QVector<const Context *> contexts;
869 
870  QMutableMapIterator<QString, Definition> def(m_definitions);
871  while (def.hasNext()) {
872  def.next();
873  auto &definition = def.value();
874  QMutableMapIterator<QString, Context> contextIt(definition.contexts);
875  while (contextIt.hasNext()) {
876  contextIt.next();
877  auto &currentContext = contextIt.value();
878  for (auto &rule : currentContext.rules) {
879  if (rule.type != Context::Rule::Type::IncludeRules) {
880  continue;
881  }
882 
883  if (rule.context.stay) {
884  qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself";
885  m_success = false;
886  continue;
887  }
888 
889  if (rule.context.popCount) {
890  qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix";
891  m_success = false;
892  }
893 
894  if (!rule.context.context) {
895  m_success = false;
896  continue;
897  }
898 
899  // resolve includedRules and includedIncludeRules
900 
901  usedContexts.clear();
902  usedContexts.insert(rule.context.context);
903  contexts.clear();
904  contexts.append(rule.context.context);
905 
906  for (int i = 0; i < contexts.size(); ++i) {
907  currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
908  for (const auto &includedRule : contexts[i]->rules) {
909  if (includedRule.type != Context::Rule::Type::IncludeRules) {
910  rule.includedRules.append(&includedRule);
911  } else if (&rule == &includedRule) {
912  qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity";
913  m_success = false;
914  } else {
915  rule.includedIncludeRules.insert(&includedRule);
916 
917  if (includedRule.includedRules.isEmpty()) {
918  const auto *context = includedRule.context.context;
919  if (context && !usedContexts.contains(context)) {
920  contexts.append(context);
921  usedContexts.insert(context);
922  }
923  } else {
924  rule.includedRules.append(includedRule.includedRules);
925  }
926  }
927  }
928  }
929  }
930  }
931  }
932  }
933 
934  //! Recursively extracts the contexts used from the first context of the definitions.
935  //! This method detects groups of contexts which are only used among themselves.
936  QSet<const Context *> extractUsedContexts() const
937  {
938  QSet<const Context *> usedContexts;
939  QVector<const Context *> contexts;
940 
941  QMapIterator<QString, Definition> def(m_definitions);
942  while (def.hasNext()) {
943  def.next();
944  const auto &definition = def.value();
945 
946  if (definition.firstContext) {
947  usedContexts.insert(definition.firstContext);
948  contexts.clear();
949  contexts.append(definition.firstContext);
950 
951  for (int i = 0; i < contexts.size(); ++i) {
952  auto appendContext = [&](const Context *context) {
953  if (context && !usedContexts.contains(context)) {
954  contexts.append(context);
955  usedContexts.insert(context);
956  }
957  };
958 
959  const auto *context = contexts[i];
960  appendContext(context->lineEndContext.context);
961  appendContext(context->lineEmptyContext.context);
962  appendContext(context->fallthroughContext.context);
963 
964  for (auto &rule : context->rules) {
965  appendContext(rule.context.context);
966  }
967  }
968  }
969  }
970 
971  return usedContexts;
972  }
973 
974  struct RuleAndInclude {
975  const Context::Rule *rule;
976  const Context::Rule *includeRules;
977 
978  explicit operator bool() const
979  {
980  return rule;
981  }
982  };
983 
984  struct IncludedRuleUnreachableBy {
985  QVector<RuleAndInclude> unreachableBy;
986  bool alwaysUnreachable = true;
987  };
988 
989  //! Check contexts and rules
990  bool checkContexts(const Definition &definition,
991  QSet<const Keywords *> &referencedKeywords,
992  QSet<ItemDatas::Style> &usedAttributeNames,
993  QSet<ItemDatas::Style> &ignoredAttributeNames,
994  const QSet<const Context *> &usedContexts,
995  QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
996  {
997  bool success = true;
998 
999  QMapIterator<QString, Context> contextIt(definition.contexts);
1000  while (contextIt.hasNext()) {
1001  contextIt.next();
1002 
1003  const auto &context = contextIt.value();
1004  const auto &filename = definition.filename;
1005 
1006  if (!usedContexts.contains(&context)) {
1007  qWarning() << filename << "line" << context.line << "unused context:" << context.name;
1008  success = false;
1009  continue;
1010  }
1011 
1012  if (context.name.startsWith(QStringLiteral("#pop"))) {
1013  qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name;
1014  success = false;
1015  }
1016 
1017  if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1018  usedAttributeNames.insert({context.attribute, context.line});
1019  }
1020 
1021  success = checkfallthrough(definition, context) && success;
1022  success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success;
1023  success = suggestRuleMerger(definition.filename, context) && success;
1024 
1025  for (const auto &rule : context.rules) {
1026  if (!rule.attribute.isEmpty()) {
1027  if (rule.lookAhead != XmlBool::True) {
1028  usedAttributeNames.insert({rule.attribute, rule.line});
1029  } else {
1030  ignoredAttributeNames.insert({rule.attribute, rule.line});
1031  }
1032  }
1033  success = checkLookAhead(rule) && success;
1034  success = checkStringDetect(rule) && success;
1035  success = checkKeyword(definition, rule, referencedKeywords) && success;
1036  success = checkRegExpr(filename, rule, context) && success;
1037  success = checkDelimiters(definition, rule) && success;
1038  }
1039  }
1040 
1041  return success;
1042  }
1043 
1044  //! Check that a regular expression in a RegExpr rule:
1045  //! - isValid()
1046  //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1047  //! - dynamic=true but no place holder used?
1048  //! - is not . with lookAhead="1"
1049  //! - is not ^... without column ou firstNonSpace attribute
1050  //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect
1051  //! - has no unused captures
1052  //! - has no unnecessary quantifier with lookAhead
1053  bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const
1054  {
1055  if (rule.type == Context::Rule::Type::RegExpr) {
1056  const QRegularExpression regexp(rule.string);
1057  if (!checkRegularExpression(rule.filename, regexp, rule.line)) {
1058  return false;
1059  }
1060 
1061  // dynamic == true and no place holder?
1062  if (rule.dynamic == XmlBool::True) {
1063  static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1064  if (!rule.string.contains(placeHolder)) {
1065  qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1066  return false;
1067  }
1068  }
1069 
1070  auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1071  if (rule.lookAhead == XmlBool::True) {
1072  static const QRegularExpression removeAllSuffix(QStringLiteral(
1073  R"(((?<!\\)\\(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1074  reg.replace(removeAllSuffix, QString());
1075  }
1076 
1077  reg.replace(QStringLiteral("{1}"), QString());
1078 
1079  // is DetectSpaces
1080  // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, +
1081  static const QRegularExpression isDetectSpaces(
1082  QStringLiteral(R"(^\^?(?:\((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1083  if (rule.string.contains(isDetectSpaces)) {
1084  char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1085  qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":"
1086  << rule.string;
1087  return false;
1088  }
1089 
1090 #define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1091 #define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1092 
1093  // is RangeDetect
1094  static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:"
1095  "\\.\\*[?*]?" REG_CHAR "|"
1096  "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?*]?\\1"
1097  ")$"));
1098  if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(QStringLiteral(".*?"))
1099  || rule.string.contains(QStringLiteral("[^")))
1100  && reg.contains(isRange)) {
1101  qWarning() << filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string;
1102  return false;
1103  }
1104 
1105  // is LineContinue
1106  static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$"));
1107  if (reg.contains(isLineContinue)) {
1108  auto extra = (reg[0] == QLatin1Char('^')) ? "with column=\"0\"" : "";
1109  qWarning() << filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra;
1110  return false;
1111  }
1112 
1113  // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _
1114  static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR));
1115  reg.replace(sanitize1, QStringLiteral("_"));
1116 
1117 #undef REG_CHAR
1118 #undef REG_ESCAPE_CHAR
1119 
1120  // use minimal or lazy operator
1121  static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1122  static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]"));
1123 
1124  if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy)
1125  && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0)
1126  && (reg.back() != QLatin1Char('$') || reg.contains(QLatin1Char('|')))) {
1127  qWarning() << filename << "line" << rule.line
1128  << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1129  return false;
1130  }
1131 
1132  // replace [:...:] with ___
1133  static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])"));
1134  reg.replace(sanitize2, QStringLiteral("___"));
1135 
1136  // replace [ccc...], [special] with ...
1137  static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1138  reg.replace(sanitize3, QStringLiteral("...\\1"));
1139 
1140  // replace [c] with _
1141  static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])"));
1142  reg.replace(sanitize4, QStringLiteral("_"));
1143 
1144  const int len = reg.size();
1145  // replace [cC] with _
1146  static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])"));
1147  reg = reg.toUpper();
1148  reg.replace(toInsensitive, QString());
1149 
1150  // is StringDetect
1151  // ignore (?:, ) and {n}
1152  static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\(\?:)+$)"));
1153  if (reg.contains(isStringDetect)) {
1154  char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1155  qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1156  << ":" << rule.string;
1157  if (len != reg.size()) {
1158  qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string;
1159  }
1160  return false;
1161  }
1162 
1163  // column="0" or firstNonSpace="1"
1164  if (rule.column == -1 && rule.firstNonSpace != XmlBool::True) {
1165  // ^ without |
1166  // (^sas*) -> ok
1167  // (^sa|s*) -> ko
1168  // (^(sa|s*)) -> ok
1169  auto first = std::as_const(reg).begin();
1170  auto last = std::as_const(reg).end();
1171  int depth = 0;
1172 
1173  while (QLatin1Char('(') == *first) {
1174  ++depth;
1175  ++first;
1176  if (QLatin1Char('?') == *first || QLatin1Char(':') == first[1]) {
1177  first += 2;
1178  }
1179  }
1180 
1181  if (QLatin1Char('^') == *first) {
1182  const int bolDepth = depth;
1183  bool replace = true;
1184 
1185  while (++first != last) {
1186  if (QLatin1Char('(') == *first) {
1187  ++depth;
1188  } else if (QLatin1Char(')') == *first) {
1189  --depth;
1190  if (depth < bolDepth) {
1191  // (^a)? === (^a|) -> ko
1192  if (first + 1 != last && QStringLiteral("*?").contains(first[1])) {
1193  replace = false;
1194  break;
1195  }
1196  }
1197  } else if (QLatin1Char('|') == *first) {
1198  // ignore '|' within subgroup
1199  if (depth <= bolDepth) {
1200  replace = false;
1201  break;
1202  }
1203  }
1204  }
1205 
1206  if (replace) {
1207  qWarning() << rule.filename << "line" << rule.line << "column=\"0\" or firstNonSpace=\"1\" missing with RegExpr:" << rule.string;
1208  return false;
1209  }
1210  }
1211  }
1212 
1213  // add ^ with column=0
1214  if (rule.column == 0 && !rule.isDotRegex) {
1215  bool hasStartOfLine = false;
1216  auto first = std::as_const(reg).begin();
1217  auto last = std::as_const(reg).end();
1218  for (; first != last; ++first) {
1219  if (*first == QLatin1Char('^')) {
1220  hasStartOfLine = true;
1221  break;
1222  } else if (*first == QLatin1Char('(')) {
1223  if (last - first >= 3 && first[1] == QLatin1Char('?') && first[2] == QLatin1Char(':')) {
1224  first += 2;
1225  }
1226  } else {
1227  break;
1228  }
1229  }
1230 
1231  if (!hasStartOfLine) {
1232  qWarning() << rule.filename << "line" << rule.line
1233  << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1234  return false;
1235  }
1236  }
1237 
1238  bool useCapture = false;
1239 
1240  // detection of unnecessary capture
1241  if (regexp.captureCount()) {
1242  auto maximalCapture = [](const QString(&referenceNames)[9], const QString &s) {
1243  int maxCapture = 9;
1244  while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) {
1245  --maxCapture;
1246  }
1247  return maxCapture;
1248  };
1249 
1250  int maxCaptureUsed = 0;
1251  // maximal dynamic reference
1252  if (rule.context.context && !rule.context.stay) {
1253  for (const auto &nextRule : rule.context.context->rules) {
1254  if (nextRule.dynamic == XmlBool::True) {
1255  static const QString cap[]{
1256  QStringLiteral("%1"),
1257  QStringLiteral("%2"),
1258  QStringLiteral("%3"),
1259  QStringLiteral("%4"),
1260  QStringLiteral("%5"),
1261  QStringLiteral("%6"),
1262  QStringLiteral("%7"),
1263  QStringLiteral("%8"),
1264  QStringLiteral("%9"),
1265  };
1266  int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1267  maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture);
1268  }
1269  }
1270  }
1271 
1272  static const QString num1[]{
1273  QStringLiteral("\\1"),
1274  QStringLiteral("\\2"),
1275  QStringLiteral("\\3"),
1276  QStringLiteral("\\4"),
1277  QStringLiteral("\\5"),
1278  QStringLiteral("\\6"),
1279  QStringLiteral("\\7"),
1280  QStringLiteral("\\8"),
1281  QStringLiteral("\\9"),
1282  };
1283  static const QString num2[]{
1284  QStringLiteral("\\g1"),
1285  QStringLiteral("\\g2"),
1286  QStringLiteral("\\g3"),
1287  QStringLiteral("\\g4"),
1288  QStringLiteral("\\g5"),
1289  QStringLiteral("\\g6"),
1290  QStringLiteral("\\g7"),
1291  QStringLiteral("\\g8"),
1292  QStringLiteral("\\g9"),
1293  };
1294  const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num1, rule.string));
1295 
1296  const int maxCapture = std::max(maxCaptureUsed, maxBackReference);
1297 
1298  if (maxCapture && regexp.captureCount() > maxCapture) {
1299  qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture
1300  << "are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1301  return false;
1302  }
1303 
1304  useCapture = maxCapture;
1305  }
1306 
1307  if (!useCapture) {
1308  // is DetectIdentifier
1309  static const QRegularExpression isInsensitiveDetectIdentifier(
1310  QStringLiteral(R"(^(\((\?:)?)?\[((a-z|_){2}|(A-Z|_){2})\]([+][*?]?)?\[((0-9|a-z|_){3}|(0-9|A-Z|_){3})\][*][*?]?(\))?$)"));
1311  static const QRegularExpression isSensitiveDetectIdentifier(
1312  QStringLiteral(R"(^(\((\?:)?)?\[(a-z|A-Z|_){3}\]([+][*?]?)?\[(0-9|a-z|A-Z|_){4}\][*][*?]?(\))?$)"));
1313  auto &isDetectIdentifier = (rule.insensitive == XmlBool::True) ? isInsensitiveDetectIdentifier : isSensitiveDetectIdentifier;
1314  if (rule.string.contains(isDetectIdentifier)) {
1315  qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string;
1316  return false;
1317  }
1318  }
1319 
1320  if (rule.isDotRegex) {
1321  // search next rule with same column or firstNonSpace
1322  int i = &rule - context.rules.data() + 1;
1323  const bool hasColumn = (rule.column != -1);
1324  const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1325  const bool isSpecial = (hasColumn || hasFirstNonSpace);
1326  for (; i < context.rules.size(); ++i) {
1327  auto &rule2 = context.rules[i];
1328  if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1329  i = context.rules.size();
1330  break;
1331  }
1332 
1333  const bool hasColumn2 = (rule2.column != -1);
1334  const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1335  if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1336  || (hasFirstNonSpace && hasFirstNonSpace2)) {
1337  break;
1338  }
1339  }
1340 
1341  auto ruleFilename = (filename == rule.filename) ? QString() : QStringLiteral("in ") + rule.filename;
1342  if (i == context.rules.size()) {
1343  if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1344  && rule.endRegion.isEmpty() && !useCapture) {
1345  qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename
1346  << "should be replaced by fallthroughContext:" << rule.string;
1347  }
1348  } else {
1349  auto &nextRule = context.rules[i];
1350  auto nextRuleFilename = (filename == nextRule.filename) ? QString() : QStringLiteral("in ") + nextRule.filename;
1351  qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename
1352  << "because a dot RegExpr is used line" << rule.line << ruleFilename;
1353  }
1354 
1355  // unnecessary quantifier
1356  static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)"));
1357  static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)"));
1358  auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1359  if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) {
1360  qWarning() << filename << "line" << rule.line
1361  << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1362  return false;
1363  }
1364  }
1365  }
1366 
1367  return true;
1368  }
1369 
1370  // Parse and check <emptyLine>
1371  bool parseEmptyLine(const QString &filename, QXmlStreamReader &xml)
1372  {
1373  bool success = true;
1374 
1375  QString pattern;
1376  XmlBool casesensitive{};
1377 
1378  for (auto &attr : xml.attributes()) {
1379  Parser parser{filename, xml, attr, success};
1380 
1381  const bool isExtracted =
1382  parser.extractString(pattern, QStringLiteral("regexpr")) || parser.extractXmlBool(casesensitive, QStringLiteral("casesensitive"));
1383 
1384  success = parser.checkIfExtracted(isExtracted);
1385  }
1386 
1387  if (pattern.isEmpty()) {
1388  qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr";
1389  success = false;
1390  } else {
1391  success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber());
1392  }
1393 
1394  return success;
1395  }
1396 
1397  //! Check that a regular expression:
1398  //! - isValid()
1399  //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1400  bool checkRegularExpression(const QString &filename, const QRegularExpression &regexp, int line) const
1401  {
1402  const auto pattern = regexp.pattern();
1403 
1404  // validate regexp
1405  if (!regexp.isValid()) {
1406  qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset"
1407  << regexp.patternErrorOffset();
1408  return false;
1409  }
1410 
1411  // catch possible case typos: [A-z] or [a-Z]
1412  const int azOffset = std::max(pattern.indexOf(QStringLiteral("A-z")), pattern.indexOf(QStringLiteral("a-Z")));
1413  if (azOffset >= 0) {
1414  qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset;
1415  return false;
1416  }
1417 
1418  return true;
1419  }
1420 
1421  //! Search for rules with lookAhead="true" and context="#stay".
1422  //! This would cause an infinite loop.
1423  bool checkfallthrough(const Definition &definition, const Context &context) const
1424  {
1425  bool success = true;
1426 
1427  if (!context.fallthroughContext.name.isEmpty()) {
1428  if (context.fallthroughContext.stay) {
1429  qWarning() << definition.filename << "line" << context.line << "possible infinite loop due to fallthroughContext=\"#stay\" in context "
1430  << context.name;
1431  success = false;
1432  }
1433 
1434  const bool mandatoryFallthroughAttribute = definition.kateVersion < Version{5, 62};
1435  if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1436  qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1437  << context.name;
1438  success = false;
1439  } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1440  qWarning() << definition.filename << "line" << context.line
1441  << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1442  << context.name;
1443  success = false;
1444  }
1445  }
1446 
1447  return success;
1448  }
1449 
1450  //! Search for additionalDeliminator/weakDeliminator which has no effect.
1451  bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const
1452  {
1453  if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1454  return true;
1455  }
1456 
1457  bool success = true;
1458 
1459  if (definition.kateVersion < Version{5, 79}) {
1460  qWarning() << definition.filename << "line" << rule.line
1461  << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1462  success = false;
1463  }
1464 
1465  for (QChar c : rule.additionalDeliminator) {
1466  if (!definition.wordDelimiters.contains(c)) {
1467  return success;
1468  }
1469  }
1470 
1471  for (QChar c : rule.weakDeliminator) {
1472  if (definition.wordDelimiters.contains(c)) {
1473  return success;
1474  }
1475  }
1476 
1477  qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1478  return false;
1479  }
1480 
1481  //! Search for rules with lookAhead="true" and context="#stay".
1482  //! This would cause an infinite loop.
1483  bool checkKeyword(const Definition &definition, const Context::Rule &rule, QSet<const Keywords *> &referencedKeywords) const
1484  {
1485  if (rule.type == Context::Rule::Type::keyword) {
1486  auto it = definition.keywordsList.find(rule.string);
1487  if (it != definition.keywordsList.end()) {
1488  referencedKeywords.insert(&*it);
1489  } else {
1490  qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string;
1491  return false;
1492  }
1493  }
1494  return true;
1495  }
1496 
1497  //! Search for rules with lookAhead="true" and context="#stay".
1498  //! This would cause an infinite loop.
1499  bool checkLookAhead(const Context::Rule &rule) const
1500  {
1501  if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1502  qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay";
1503  }
1504  return true;
1505  }
1506 
1507  //! Check that StringDetect contains more that 2 characters
1508  //! Fix with following command:
1509  //! \code
1510  //! sed -E
1511  //! '/StringDetect/{/dynamic="(1|true)|insensitive="(1|true)/!{s/StringDetect(.*)String="(.|&lt;|&gt;|&quot;|&amp;)(.|&lt;|&gt;|&quot;|&amp;)"/Detect2Chars\1char="\2"
1512  //! char1="\3"/;t;s/StringDetect(.*)String="(.|&lt;|&gt;|&quot;|&amp;)"/DetectChar\1char="\2"/}}' -i file.xml...
1513  //! \endcode
1514  bool checkStringDetect(const Context::Rule &rule) const
1515  {
1516  if (rule.type == Context::Rule::Type::StringDetect) {
1517  // dynamic == true and no place holder?
1518  if (rule.dynamic == XmlBool::True) {
1519  static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1520  if (!rule.string.contains(placeHolder)) {
1521  qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1522  return false;
1523  }
1524  }
1525  }
1526  return true;
1527  }
1528 
1529  //! Check <include> and delimiter in a keyword list
1530  bool checkKeywordsList(const Definition &definition, QSet<const Keywords *> &referencedKeywords) const
1531  {
1532  bool success = true;
1533 
1534  bool includeNotSupport = (definition.kateVersion < Version{5, 53});
1535  QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList);
1536  while (keywordsIt.hasNext()) {
1537  keywordsIt.next();
1538 
1539  for (const auto &include : keywordsIt.value().items.includes) {
1540  if (includeNotSupport) {
1541  qWarning() << definition.filename << "line" << include.line
1542  << "<include> is only available since version \"5.53\". Please, increase kateversion.";
1543  success = false;
1544  }
1545  success = checkKeywordInclude(definition, include, referencedKeywords) && success;
1546  }
1547 
1548  // Check that keyword list items do not have deliminator character
1549 #if 0
1550  for (const auto& keyword : keywordsIt.value().items.keywords) {
1551  for (QChar c : keyword.content) {
1552  if (definition.wordDelimiters.contains(c)) {
1553  qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content;
1554  success = false;
1555  }
1556  }
1557  }
1558 #endif
1559  }
1560 
1561  return success;
1562  }
1563 
1564  //! Search for non-existing keyword include.
1565  bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include, QSet<const Keywords *> &referencedKeywords) const
1566  {
1567  bool containsKeywordName = true;
1568  int const idx = include.content.indexOf(QStringLiteral("##"));
1569  if (idx == -1) {
1570  auto it = definition.keywordsList.find(include.content);
1571  containsKeywordName = (it != definition.keywordsList.end());
1572  if (containsKeywordName) {
1573  referencedKeywords.insert(&*it);
1574  }
1575  } else {
1576  auto defName = include.content.mid(idx + 2);
1577  auto listName = include.content.left(idx);
1578  auto it = m_definitions.find(defName);
1579  if (it == m_definitions.end()) {
1580  qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content;
1581  return false;
1582  }
1583  containsKeywordName = it->keywordsList.contains(listName);
1584  }
1585 
1586  if (!containsKeywordName) {
1587  qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content;
1588  }
1589 
1590  return containsKeywordName;
1591  }
1592 
1593  //! Check if a rule is hidden by another
1594  //! - rule hidden by DetectChar or AnyChar
1595  //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar
1596  //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings
1597  //! - duplicate rule (Int, Float, keyword with same String, etc)
1598  //! - Rule hidden by a dot regex
1599  bool checkUreachableRules(const QString &filename,
1600  const Context &context,
1601  QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1602  {
1603  if (context.isOnlyIncluded) {
1604  return true;
1605  }
1606 
1607  struct Rule4 {
1608  RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr)
1609  {
1610  auto set = [&](RuleAndInclude &ruleAndInclude) {
1611  auto old = ruleAndInclude;
1612  ruleAndInclude = {&rule, includeRules};
1613  return old;
1614  };
1615 
1616  if (rule.firstNonSpace == XmlBool::True) {
1617  return set(firstNonSpace);
1618  } else if (rule.column == 0) {
1619  return set(column0);
1620  } else if (rule.column > 0) {
1621  return set(columnGreaterThan0[rule.column]);
1622  } else {
1623  return set(normal);
1624  }
1625  }
1626 
1627  private:
1628  RuleAndInclude normal;
1629  RuleAndInclude column0;
1630  QMap<int, RuleAndInclude> columnGreaterThan0;
1631  RuleAndInclude firstNonSpace;
1632  };
1633 
1634  // Associate QChar with RuleAndInclude
1635  struct CharTable {
1636  /// Search RuleAndInclude associated with @p c.
1637  RuleAndInclude find(QChar c) const
1638  {
1639  if (c.unicode() < 128) {
1640  return m_asciiMap[c.unicode()];
1641  }
1642  auto it = m_utf8Map.find(c);
1643  return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value();
1644  }
1645 
1646  /// Search RuleAndInclude associated with the characters of @p s.
1647  /// \return an empty QVector when at least one character is not found.
1649  {
1650  QVector<RuleAndInclude> result;
1651 
1652  for (QChar c : s) {
1653  if (!find(c)) {
1654  return result;
1655  }
1656  }
1657 
1658  for (QChar c : s) {
1659  result.append(find(c));
1660  }
1661 
1662  return result;
1663  }
1664 
1665  /// Associates @p c with a rule.
1666  void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1667  {
1668  if (c.unicode() < 128) {
1669  m_asciiMap[c.unicode()] = {&rule, includeRule};
1670  } else {
1671  m_utf8Map[c] = {&rule, includeRule};
1672  }
1673  }
1674 
1675  /// Associates each character of @p s with a rule.
1676  void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1677  {
1678  for (QChar c : s) {
1679  append(c, rule, includeRule);
1680  }
1681  }
1682 
1683  private:
1684  RuleAndInclude m_asciiMap[127]{};
1685  QMap<QChar, RuleAndInclude> m_utf8Map;
1686  };
1687 
1688  struct Char4Tables {
1689  CharTable chars;
1690  CharTable charsColumn0;
1691  QMap<int, CharTable> charsColumnGreaterThan0;
1692  CharTable charsFirstNonSpace;
1693  };
1694 
1695  // View on Char4Tables members
1696  struct CharTableArray {
1697  // Append Char4Tables members that satisfies firstNonSpace and column.
1698  // Char4Tables::char is always added.
1699  CharTableArray(Char4Tables &tables, const Context::Rule &rule)
1700  {
1701  if (rule.firstNonSpace == XmlBool::True) {
1702  appendTable(tables.charsFirstNonSpace);
1703  }
1704 
1705  if (rule.column == 0) {
1706  appendTable(tables.charsColumn0);
1707  } else if (rule.column > 0) {
1708  appendTable(tables.charsColumnGreaterThan0[rule.column]);
1709  }
1710 
1711  appendTable(tables.chars);
1712  }
1713 
1714  // Removes Char4Tables::chars when the rule contains firstNonSpace or column
1715  void removeNonSpecialWhenSpecial()
1716  {
1717  if (m_size > 1) {
1718  --m_size;
1719  }
1720  }
1721 
1722  /// Search RuleAndInclude associated with @p c.
1723  RuleAndInclude find(QChar c) const
1724  {
1725  for (int i = 0; i < m_size; ++i) {
1726  if (auto ruleAndInclude = m_charTables[i]->find(c)) {
1727  return ruleAndInclude;
1728  }
1729  }
1730  return RuleAndInclude{nullptr, nullptr};
1731  }
1732 
1733  /// Search RuleAndInclude associated with the characters of @p s.
1734  /// \return an empty QVector when at least one character is not found.
1736  {
1737  for (int i = 0; i < m_size; ++i) {
1738  auto result = m_charTables[i]->find(s);
1739  if (result.size()) {
1740  while (++i < m_size) {
1741  result.append(m_charTables[i]->find(s));
1742  }
1743  return result;
1744  }
1745  }
1746  return QVector<RuleAndInclude>();
1747  }
1748 
1749  /// Associates @p c with a rule.
1750  void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1751  {
1752  for (int i = 0; i < m_size; ++i) {
1753  m_charTables[i]->append(c, rule, includeRule);
1754  }
1755  }
1756 
1757  /// Associates each character of @p s with a rule.
1758  void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1759  {
1760  for (int i = 0; i < m_size; ++i) {
1761  m_charTables[i]->append(s, rule, includeRule);
1762  }
1763  }
1764 
1765  private:
1766  void appendTable(CharTable &t)
1767  {
1768  m_charTables[m_size] = &t;
1769  ++m_size;
1770  }
1771 
1772  CharTable *m_charTables[3];
1773  int m_size = 0;
1774  };
1775 
1776  struct ObservableRule {
1777  const Context::Rule *rule;
1778  const Context::Rule *includeRules;
1779 
1780  bool hasResolvedIncludeRules() const
1781  {
1782  return rule == includeRules;
1783  }
1784  };
1785 
1786  // Iterates over all the rules, including those in includedRules
1787  struct RuleIterator {
1788  RuleIterator(const QVector<ObservableRule> &rules, const ObservableRule &endRule)
1789  : m_end(&endRule - rules.data())
1790  , m_rules(rules)
1791  {
1792  }
1793 
1794  /// \return next rule or nullptr
1795  const Context::Rule *next()
1796  {
1797  // if in includedRules
1798  if (m_includedRules) {
1799  ++m_i2;
1800  if (m_i2 != m_includedRules->size()) {
1801  return (*m_includedRules)[m_i2];
1802  }
1803  ++m_i;
1804  m_includedRules = nullptr;
1805  }
1806 
1807  // if is a includedRules
1808  while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
1809  if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
1810  m_i2 = 0;
1811  m_includedRules = &m_rules[m_i].rule->includedRules;
1812  return (*m_includedRules)[m_i2];
1813  }
1814  ++m_i;
1815  }
1816 
1817  if (m_i < m_end) {
1818  ++m_i;
1819  return m_rules[m_i - 1].rule;
1820  }
1821 
1822  return nullptr;
1823  }
1824 
1825  /// \return current IncludeRules or nullptr
1826  const Context::Rule *currentIncludeRules() const
1827  {
1828  return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
1829  }
1830 
1831  private:
1832  int m_i = 0;
1833  int m_i2;
1834  int m_end;
1835  const QVector<ObservableRule> &m_rules;
1836  const QVector<const Context::Rule *> *m_includedRules = nullptr;
1837  };
1838 
1839  // Dot regex container that satisfies firstNonSpace and column.
1840  struct DotRegex {
1841  /// Append a dot regex rule.
1842  void append(const Context::Rule &rule, const Context::Rule *includedRule)
1843  {
1844  auto array = extractDotRegexes(rule);
1845  if (array[0]) {
1846  *array[0] = {&rule, includedRule};
1847  }
1848  if (array[1]) {
1849  *array[1] = {&rule, includedRule};
1850  }
1851  }
1852 
1853  /// Search dot regex which hides @p rule
1854  RuleAndInclude find(const Context::Rule &rule)
1855  {
1856  auto array = extractDotRegexes(rule);
1857  if (array[0]) {
1858  return *array[0];
1859  }
1860  if (array[1]) {
1861  return *array[1];
1862  }
1863  return RuleAndInclude{};
1864  }
1865 
1866  private:
1867  using Array = std::array<RuleAndInclude *, 2>;
1868 
1869  Array extractDotRegexes(const Context::Rule &rule)
1870  {
1871  Array ret{};
1872 
1873  if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
1874  ret[0] = &dotRegex;
1875  } else {
1876  if (rule.firstNonSpace == XmlBool::True) {
1877  ret[0] = &dotRegexFirstNonSpace;
1878  }
1879 
1880  if (rule.column == 0) {
1881  ret[1] = &dotRegexColumn0;
1882  } else if (rule.column > 0) {
1883  ret[1] = &dotRegexColumnGreaterThan0[rule.column];
1884  }
1885  }
1886 
1887  return ret;
1888  }
1889 
1890  RuleAndInclude dotRegex{};
1891  RuleAndInclude dotRegexColumn0{};
1892  QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
1893  RuleAndInclude dotRegexFirstNonSpace{};
1894  };
1895 
1896  bool success = true;
1897 
1898  // characters of DetectChar/AnyChar
1899  Char4Tables detectChars;
1900  // characters of dynamic DetectChar
1901  Char4Tables dynamicDetectChars;
1902  // characters of LineContinue
1903  Char4Tables lineContinueChars;
1904 
1905  Rule4 intRule{};
1906  Rule4 floatRule{};
1907  Rule4 hlCCharRule{};
1908  Rule4 hlCOctRule{};
1909  Rule4 hlCHexRule{};
1910  Rule4 hlCStringCharRule{};
1911  Rule4 detectIdentifierRule{};
1912 
1913  // Contains includedRules and included includedRules
1914  QMap<Context const *, RuleAndInclude> includeContexts;
1915 
1916  DotRegex dotRegex;
1917 
1918  QVector<ObservableRule> observedRules;
1919  observedRules.reserve(context.rules.size());
1920  for (const Context::Rule &rule : context.rules) {
1921  const Context::Rule *includeRule = nullptr;
1922  if (rule.type == Context::Rule::Type::IncludeRules) {
1923  auto *context = rule.context.context;
1924  if (context && context->isOnlyIncluded) {
1925  includeRule = &rule;
1926  }
1927  }
1928 
1929  observedRules.push_back({&rule, includeRule});
1930  if (includeRule) {
1931  for (const Context::Rule *rule2 : rule.includedRules) {
1932  observedRules.push_back({rule2, includeRule});
1933  }
1934  }
1935  }
1936 
1937  for (auto &observedRule : observedRules) {
1938  const Context::Rule &rule = *observedRule.rule;
1939  bool isUnreachable = false;
1940  QVector<RuleAndInclude> unreachableBy;
1941 
1942  // declare rule as unreachable if ruleAndInclude is not empty
1943  auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
1944  if (ruleAndInclude) {
1945  isUnreachable = true;
1946  unreachableBy.append(ruleAndInclude);
1947  }
1948  };
1949 
1950  // declare rule as unreachable if ruleAndIncludes is not empty
1951  auto updateUnreachable2 = [&](const QVector<RuleAndInclude> &ruleAndIncludes) {
1952  if (!ruleAndIncludes.isEmpty()) {
1953  isUnreachable = true;
1954  unreachableBy.append(ruleAndIncludes);
1955  }
1956  };
1957 
1958  // check if rule2.firstNonSpace/column is compatible with those of rule
1959  auto isCompatible = [&rule](Context::Rule const &rule2) {
1960  return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
1961  || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
1962  };
1963 
1964  updateUnreachable1(dotRegex.find(rule));
1965 
1966  switch (rule.type) {
1967  // checks if hidden by DetectChar/AnyChar
1968  // then add the characters to detectChars
1969  case Context::Rule::Type::AnyChar: {
1970  auto tables = CharTableArray(detectChars, rule);
1971  updateUnreachable2(tables.find(rule.string));
1972  tables.removeNonSpecialWhenSpecial();
1973  tables.append(rule.string, rule);
1974  break;
1975  }
1976 
1977  // check if is hidden by DetectChar/AnyChar
1978  // then add the characters to detectChars or dynamicDetectChars
1979  case Context::Rule::Type::DetectChar: {
1980  auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
1981  auto tables = CharTableArray(chars4, rule);
1982  updateUnreachable1(tables.find(rule.char0));
1983  tables.removeNonSpecialWhenSpecial();
1984  tables.append(rule.char0, rule);
1985  break;
1986  }
1987 
1988  // check if hidden by DetectChar/AnyChar
1989  // then add spaces characters to detectChars
1990  case Context::Rule::Type::DetectSpaces: {
1991  auto tables = CharTableArray(detectChars, rule);
1992  updateUnreachable2(tables.find(QStringLiteral(" \t")));
1993  tables.removeNonSpecialWhenSpecial();
1994  tables.append(QLatin1Char(' '), rule);
1995  tables.append(QLatin1Char('\t'), rule);
1996  break;
1997  }
1998 
1999  // check if hidden by DetectChar/AnyChar
2000  case Context::Rule::Type::HlCChar:
2001  updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\'')));
2002  updateUnreachable1(hlCCharRule.setRule(rule));
2003  break;
2004 
2005  // check if hidden by DetectChar/AnyChar
2006  case Context::Rule::Type::HlCHex:
2007  updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2008  updateUnreachable1(hlCHexRule.setRule(rule));
2009  break;
2010 
2011  // check if hidden by DetectChar/AnyChar
2012  case Context::Rule::Type::HlCOct:
2013  updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2014  updateUnreachable1(hlCOctRule.setRule(rule));
2015  break;
2016 
2017  // check if hidden by DetectChar/AnyChar
2018  case Context::Rule::Type::HlCStringChar:
2019  updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\\')));
2020  updateUnreachable1(hlCStringCharRule.setRule(rule));
2021  break;
2022 
2023  // check if hidden by DetectChar/AnyChar
2024  case Context::Rule::Type::Int:
2025  updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789")));
2026  updateUnreachable1(intRule.setRule(rule));
2027  break;
2028 
2029  // check if hidden by DetectChar/AnyChar
2030  case Context::Rule::Type::Float:
2031  updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789.")));
2032  updateUnreachable1(floatRule.setRule(rule));
2033  break;
2034 
2035  // check if hidden by another DetectIdentifier rule
2036  case Context::Rule::Type::DetectIdentifier:
2037  updateUnreachable1(detectIdentifierRule.setRule(rule));
2038  break;
2039 
2040  // check if hidden by DetectChar/AnyChar or another LineContinue
2041  case Context::Rule::Type::LineContinue: {
2042  updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2043 
2044  auto tables = CharTableArray(lineContinueChars, rule);
2045  updateUnreachable1(tables.find(rule.char0));
2046  tables.removeNonSpecialWhenSpecial();
2047  tables.append(rule.char0, rule);
2048  break;
2049  }
2050 
2051  // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect
2052  case Context::Rule::Type::Detect2Chars:
2053  case Context::Rule::Type::RangeDetect:
2054  updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2055  if (!isUnreachable) {
2056  RuleIterator ruleIterator(observedRules, observedRule);
2057  while (const auto *rulePtr = ruleIterator.next()) {
2058  if (isUnreachable) {
2059  break;
2060  }
2061  const auto &rule2 = *rulePtr;
2062  if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2063  updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2064  }
2065  }
2066  }
2067  break;
2068 
2069  case Context::Rule::Type::RegExpr: {
2070  if (rule.isDotRegex) {
2071  dotRegex.append(rule, nullptr);
2072  break;
2073  }
2074 
2075  // check that `rule` does not have another RegExpr as a prefix
2076  RuleIterator ruleIterator(observedRules, observedRule);
2077  while (const auto *rulePtr = ruleIterator.next()) {
2078  if (isUnreachable) {
2079  break;
2080  }
2081  const auto &rule2 = *rulePtr;
2082  if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2083  && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) {
2084  bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2085  if (!add) {
2086  // \s.* (sanitized = \s) is considered hiding \s*\S
2087  // we check the quantifiers to see if this is the case
2088  auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2089  auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2090  auto c3 = rule2.sanitizedString.back().unicode();
2091  if (c3 == '*' || c3 == '?' || c3 == '+') {
2092  add = true;
2093  } else if (c1 == '*' || c1 == '?') {
2094  add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2095  } else {
2096  add = true;
2097  }
2098  }
2099  if (add) {
2100  updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2101  }
2102  }
2103  }
2104 
2105  Q_FALLTHROUGH();
2106  }
2107  // check if a rule does not have another rule as a prefix
2108  case Context::Rule::Type::WordDetect:
2109  case Context::Rule::Type::StringDetect: {
2110  // check that dynamic `rule` does not have another dynamic StringDetect as a prefix
2111  if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2112  RuleIterator ruleIterator(observedRules, observedRule);
2113  while (const auto *rulePtr = ruleIterator.next()) {
2114  if (isUnreachable) {
2115  break;
2116  }
2117 
2118  const auto &rule2 = *rulePtr;
2119  if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2120  continue;
2121  }
2122 
2123  const bool isSensitive = (rule2.insensitive == XmlBool::True);
2124  const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2125  if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) {
2126  updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2127  }
2128  }
2129  }
2130 
2131  // string used for comparison and truncated from "dynamic" part
2132  QStringView s = rule.string;
2133 
2134  // truncate to '%' with dynamic rules
2135  if (rule.dynamic == XmlBool::True) {
2136  static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)"));
2137  auto result = dynamicPosition.match(rule.string);
2138  s = s.left(result.capturedLength());
2139  }
2140 
2141  QString sanitizedRegex;
2142  // truncate to special character with RegExpr.
2143  // If regexp contains '|', `s` becomes empty.
2144  if (rule.type == Context::Rule::Type::RegExpr) {
2145  static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2146  static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2147  const qsizetype result = regularChars.match(rule.string).capturedLength();
2148  const qsizetype pos = qMin(result, s.size());
2149  if (rule.string.indexOf(QLatin1Char('|'), pos) < pos) {
2150  sanitizedRegex = rule.string.left(qMin(result, s.size()));
2151  sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1"));
2152  s = sanitizedRegex;
2153  } else {
2154  s = QStringView();
2155  }
2156  }
2157 
2158  // check if hidden by DetectChar/AnyChar
2159  if (s.size() > 0) {
2160  auto t = CharTableArray(detectChars, rule);
2161  if (rule.insensitive != XmlBool::True) {
2162  updateUnreachable1(t.find(s[0]));
2163  } else {
2164  QChar c2[]{s[0].toLower(), s[0].toUpper()};
2165  updateUnreachable2(t.find(QStringView(c2, 2)));
2166  }
2167  }
2168 
2169  // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s
2170  if (s.size() > 0 && !isUnreachable) {
2171  // combination of uppercase and lowercase
2172  RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2173 
2174  RuleIterator ruleIterator(observedRules, observedRule);
2175  while (const auto *rulePtr = ruleIterator.next()) {
2176  if (isUnreachable) {
2177  break;
2178  }
2179  const auto &rule2 = *rulePtr;
2180  const bool isSensitive = (rule2.insensitive == XmlBool::True);
2181  const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2182 
2183  switch (rule2.type) {
2184  // check that it is not a detectChars prefix
2185  case Context::Rule::Type::Detect2Chars:
2186  if (isCompatible(rule2) && s.size() >= 2) {
2187  if (rule.insensitive != XmlBool::True) {
2188  if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2189  updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2190  }
2191  } else {
2192  // when the string is case insensitive,
2193  // all 4 upper/lower case combinations must be found
2194  auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) {
2195  if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2196  x = {&rule2, ruleIterator.currentIncludeRules()};
2197  }
2198  };
2199  set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2200  set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2201  set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2202  set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2203 
2204  if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2205  && detect2CharsInsensitives[3]) {
2206  isUnreachable = true;
2207  unreachableBy.append(detect2CharsInsensitives[0]);
2208  unreachableBy.append(detect2CharsInsensitives[1]);
2209  unreachableBy.append(detect2CharsInsensitives[2]);
2210  unreachableBy.append(detect2CharsInsensitives[3]);
2211  }
2212  }
2213  }
2214  break;
2215 
2216  // check that it is not a StringDetect prefix
2217  case Context::Rule::Type::StringDetect:
2218  if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2219  && s.startsWith(rule2.string, caseSensitivity)) {
2220  updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2221  }
2222  break;
2223 
2224  // check if a WordDetect is hidden by another WordDetect
2225  case Context::Rule::Type::WordDetect:
2226  if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2227  && 0 == rule.string.compare(rule2.string, caseSensitivity)) {
2228  updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2229  }
2230  break;
2231 
2232  default:;
2233  }
2234  }
2235  }
2236 
2237  break;
2238  }
2239 
2240  // check if hidden by another keyword rule
2241  case Context::Rule::Type::keyword: {
2242  RuleIterator ruleIterator(observedRules, observedRule);
2243  while (const auto *rulePtr = ruleIterator.next()) {
2244  if (isUnreachable) {
2245  break;
2246  }
2247  const auto &rule2 = *rulePtr;
2248  if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2249  updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2250  }
2251  }
2252  // TODO check that all keywords are hidden by another rules
2253  break;
2254  }
2255 
2256  // add characters in those used but without checking if they are already.
2257  // <DetectChar char="}" />
2258  // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked
2259  // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added
2260  // <DetectChar char="{" /> <- hidden by previous rule
2261  case Context::Rule::Type::IncludeRules:
2262  if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2263  break;
2264  }
2265 
2266  if (auto &ruleAndInclude = includeContexts[rule.context.context]) {
2267  updateUnreachable1(ruleAndInclude);
2268  } else {
2269  ruleAndInclude.rule = &rule;
2270  }
2271 
2272  for (const auto *rulePtr : rule.includedIncludeRules) {
2273  includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule});
2274  }
2275 
2276  if (observedRule.includeRules) {
2277  break;
2278  }
2279 
2280  for (const auto *rulePtr : rule.includedRules) {
2281  const auto &rule2 = *rulePtr;
2282  switch (rule2.type) {
2283  case Context::Rule::Type::AnyChar: {
2284  auto tables = CharTableArray(detectChars, rule2);
2285  tables.removeNonSpecialWhenSpecial();
2286  tables.append(rule2.string, rule2, &rule);
2287  break;
2288  }
2289 
2290  case Context::Rule::Type::DetectChar: {
2291  auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2292  auto tables = CharTableArray(chars4, rule2);
2293  tables.removeNonSpecialWhenSpecial();
2294  tables.append(rule2.char0, rule2, &rule);
2295  break;
2296  }
2297 
2298  case Context::Rule::Type::DetectSpaces: {
2299  auto tables = CharTableArray(detectChars, rule2);
2300  tables.removeNonSpecialWhenSpecial();
2301  tables.append(QLatin1Char(' '), rule2, &rule);
2302  tables.append(QLatin1Char('\t'), rule2, &rule);
2303  break;
2304  }
2305 
2306  case Context::Rule::Type::HlCChar:
2307  hlCCharRule.setRule(rule2, &rule);
2308  break;
2309 
2310  case Context::Rule::Type::HlCHex:
2311  hlCHexRule.setRule(rule2, &rule);
2312  break;
2313 
2314  case Context::Rule::Type::HlCOct:
2315  hlCOctRule.setRule(rule2, &rule);
2316  break;
2317 
2318  case Context::Rule::Type::HlCStringChar:
2319  hlCStringCharRule.setRule(rule2, &rule);
2320  break;
2321 
2322  case Context::Rule::Type::Int:
2323  intRule.setRule(rule2, &rule);
2324  break;
2325 
2326  case Context::Rule::Type::Float:
2327  floatRule.setRule(rule2, &rule);
2328  break;
2329 
2330  case Context::Rule::Type::LineContinue: {
2331  auto tables = CharTableArray(lineContinueChars, rule2);
2332  tables.removeNonSpecialWhenSpecial();
2333  tables.append(rule2.char0, rule2, &rule);
2334  break;
2335  }
2336 
2337  case Context::Rule::Type::RegExpr:
2338  if (rule2.isDotRegex) {
2339  dotRegex.append(rule2, &rule);
2340  }
2341  break;
2342 
2343  case Context::Rule::Type::WordDetect:
2344  case Context::Rule::Type::StringDetect:
2345  case Context::Rule::Type::Detect2Chars:
2346  case Context::Rule::Type::IncludeRules:
2347  case Context::Rule::Type::DetectIdentifier:
2348  case Context::Rule::Type::keyword:
2349  case Context::Rule::Type::Unknown:
2350  case Context::Rule::Type::RangeDetect:
2351  break;
2352  }
2353  }
2354  break;
2355 
2356  case Context::Rule::Type::Unknown:
2357  break;
2358  }
2359 
2360  if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2361  auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2362  if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2363  unreachableIncludedRule.unreachableBy.append(unreachableBy);
2364  } else {
2365  unreachableIncludedRule.alwaysUnreachable = false;
2366  }
2367  } else if (isUnreachable) {
2368  success = false;
2369  QString message;
2370  message.reserve(128);
2371  for (auto &ruleAndInclude : unreachableBy) {
2372  message += QStringLiteral("line ");
2373  if (ruleAndInclude.includeRules) {
2374  message += QString::number(ruleAndInclude.includeRules->line);
2375  message += QStringLiteral(" [by '");
2376  message += ruleAndInclude.includeRules->context.name;
2377  message += QStringLiteral("' line ");
2378  message += QString::number(ruleAndInclude.rule->line);
2379  if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2380  message += QStringLiteral(" (");
2381  message += ruleAndInclude.rule->filename;
2382  message += QLatin1Char(')');
2383  }
2384  message += QLatin1Char(']');
2385  } else {
2386  message += QString::number(ruleAndInclude.rule->line);
2387  }
2388  message += QStringLiteral(", ");
2389  }
2390  message.chop(2);
2391  qWarning() << filename << "line" << rule.line << "unreachable rule by" << message;
2392  }
2393  }
2394 
2395  return success;
2396  }
2397 
2398  //! Proposes to merge certain rule sequences
2399  //! - several DetectChar/AnyChar into AnyChar
2400  //! - several RegExpr into one RegExpr
2401  bool suggestRuleMerger(const QString &filename, const Context &context) const
2402  {
2403  bool success = true;
2404 
2405  if (context.rules.isEmpty()) {
2406  return success;
2407  }
2408 
2409  auto it = context.rules.begin();
2410  const auto end = context.rules.end() - 1;
2411 
2412  for (; it < end; ++it) {
2413  auto &rule1 = *it;
2414  auto &rule2 = it[1];
2415 
2416  auto isCommonCompatible = [&] {
2417  if (rule1.lookAhead != rule2.lookAhead) {
2418  return false;
2419  }
2420  // ignore attribute when lookAhead is true
2421  if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2422  return false;
2423  }
2424  // clang-format off
2425  return rule1.beginRegion == rule2.beginRegion
2426  && rule1.endRegion == rule2.endRegion
2427  && rule1.firstNonSpace == rule2.firstNonSpace
2428  && rule1.context.context == rule2.context.context
2429  && rule1.context.popCount == rule2.context.popCount;
2430  // clang-format on
2431  };
2432 
2433  switch (rule1.type) {
2434  // request to merge AnyChar/DetectChar
2435  case Context::Rule::Type::AnyChar:
2436  case Context::Rule::Type::DetectChar:
2437  if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar) && isCommonCompatible()
2438  && rule1.column == rule2.column) {
2439  qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule";
2440  success = false;
2441  }
2442  break;
2443 
2444  // request to merge multiple RegExpr
2445  case Context::Rule::Type::RegExpr:
2446  if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2447  && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2448  qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule";
2449  success = false;
2450  }
2451  break;
2452 
2453  case Context::Rule::Type::DetectSpaces:
2454  case Context::Rule::Type::HlCChar:
2455  case Context::Rule::Type::HlCHex:
2456  case Context::Rule::Type::HlCOct:
2457  case Context::Rule::Type::HlCStringChar:
2458  case Context::Rule::Type::Int:
2459  case Context::Rule::Type::Float:
2460  case Context::Rule::Type::LineContinue:
2461  case Context::Rule::Type::WordDetect:
2462  case Context::Rule::Type::StringDetect:
2463  case Context::Rule::Type::Detect2Chars:
2464  case Context::Rule::Type::IncludeRules:
2465  case Context::Rule::Type::DetectIdentifier:
2466  case Context::Rule::Type::keyword:
2467  case Context::Rule::Type::Unknown:
2468  case Context::Rule::Type::RangeDetect:
2469  break;
2470  }
2471  }
2472 
2473  return success;
2474  }
2475 
2476  //! Initialize the referenced context (ContextName::context)
2477  //! Some input / output examples are:
2478  //! - "#stay" -> ""
2479  //! - "#pop" -> ""
2480  //! - "Comment" -> "Comment"
2481  //! - "#pop!Comment" -> "Comment"
2482  //! - "##ISO C++" -> ""
2483  //! - "Comment##ISO C++"-> "Comment" in ISO C++
2484  void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line)
2485  {
2486  QStringView name = contextName.name;
2487  if (name.isEmpty()) {
2488  contextName.stay = true;
2489  } else if (name.startsWith(QStringLiteral("#stay"))) {
2490  name = name.mid(5);
2491  contextName.stay = true;
2492  contextName.context = &context;
2493  if (!name.isEmpty()) {
2494  qWarning() << definition.filename << "line" << line << "invalid context in" << context.name;
2495  m_success = false;
2496  }
2497  } else {
2498  while (name.startsWith(QStringLiteral("#pop"))) {
2499  name = name.mid(4);
2500  ++contextName.popCount;
2501  }
2502 
2503  if (contextName.popCount && !name.isEmpty()) {
2504  if (name.startsWith(QLatin1Char('!')) && name.size() > 1) {
2505  name = name.mid(1);
2506  } else {
2507  qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name;
2508  m_success = false;
2509  }
2510  }
2511 
2512  if (!name.isEmpty()) {
2513  const int idx = name.indexOf(QStringLiteral("##"));
2514  if (idx == -1) {
2515  auto it = definition.contexts.find(name.toString());
2516  if (it != definition.contexts.end()) {
2517  contextName.context = &*it;
2518  }
2519  } else {
2520  auto defName = name.mid(idx + 2);
2521  auto it = m_definitions.find(defName.toString());
2522  if (it != m_definitions.end()) {
2523  auto listName = name.left(idx).toString();
2524  definition.referencedDefinitions.insert(&*it);
2525  auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName);
2526  if (ctxIt != it->contexts.end()) {
2527  contextName.context = &*ctxIt;
2528  }
2529  } else {
2530  qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name;
2531  m_success = false;
2532  }
2533  }
2534 
2535  if (!contextName.context) {
2536  qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name;
2537  m_success = false;
2538  }
2539  }
2540  }
2541  }
2542 
2543  QMap<QString, Definition> m_definitions;
2544  Definition *m_currentDefinition = nullptr;
2545  Keywords *m_currentKeywords = nullptr;
2546  Context *m_currentContext = nullptr;
2547  bool m_success = true;
2548 };
2549 
2550 namespace
2551 {
2552 QStringList readListing(const QString &fileName)
2553 {
2554  QFile file(fileName);
2555  if (!file.open(QIODevice::ReadOnly)) {
2556  return QStringList();
2557  }
2558 
2559  QXmlStreamReader xml(&file);
2560  QStringList listing;
2561  while (!xml.atEnd()) {
2562  xml.readNext();
2563 
2564  // add only .xml files, no .json or stuff
2565  if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) {
2566  listing.append(xml.text().toString());
2567  }
2568  }
2569 
2570  if (xml.hasError()) {
2571  qWarning() << "XML error while reading" << fileName << " - " << qPrintable(xml.errorString()) << "@ offset" << xml.characterOffset();
2572  listing.clear();
2573  }
2574 
2575  return listing;
2576 }
2577 
2578 /**
2579  * check if the "extensions" attribute have valid wildcards
2580  * @param extensions extensions string to check
2581  * @return valid?
2582  */
2583 bool checkExtensions(QStringView extensions)
2584 {
2585  // get list of extensions
2586  const QList<QStringView> extensionParts = extensions.split(QLatin1Char(';'), Qt::SkipEmptyParts);
2587 
2588  // ok if empty
2589  if (extensionParts.isEmpty()) {
2590  return true;
2591  }
2592 
2593  // check that only valid wildcard things are inside the parts
2594  for (const auto &extension : extensionParts) {
2595  for (const auto c : extension) {
2596  // eat normal things
2597  if (c.isDigit() || c.isLetter()) {
2598  continue;
2599  }
2600 
2601  // allow some special characters
2602  if (c == QLatin1Char('.') || c == QLatin1Char('-') || c == QLatin1Char('_') || c == QLatin1Char('+')) {
2603  continue;
2604  }
2605 
2606  // only allowed wildcard things: '?' and '*'
2607  if (c == QLatin1Char('?') || c == QLatin1Char('*')) {
2608  continue;
2609  }
2610 
2611  qWarning() << "invalid character" << c << "seen in extensions wildcard";
2612  return false;
2613  }
2614  }
2615 
2616  // all checks passed
2617  return true;
2618 }
2619 
2620 }
2621 
2622 int main(int argc, char *argv[])
2623 {
2624  // get app instance
2625  QCoreApplication app(argc, argv);
2626 
2627  // ensure enough arguments are passed
2628  if (app.arguments().size() < 3) {
2629  return 1;
2630  }
2631 
2632 #ifdef QT_XMLPATTERNS_LIB
2633  // open schema
2634  QXmlSchema schema;
2635  if (!schema.load(QUrl::fromLocalFile(app.arguments().at(2)))) {
2636  return 2;
2637  }
2638 #endif
2639 
2640  const QString hlFilenamesListing = app.arguments().value(3);
2641  if (hlFilenamesListing.isEmpty()) {
2642  return 1;
2643  }
2644 
2645  QStringList hlFilenames = readListing(hlFilenamesListing);
2646  if (hlFilenames.isEmpty()) {
2647  qWarning("Failed to read %s", qPrintable(hlFilenamesListing));
2648  return 3;
2649  }
2650 
2651  // text attributes
2652  const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("section") << QStringLiteral("mimetype")
2653  << QStringLiteral("extensions") << QStringLiteral("style") << QStringLiteral("author")
2654  << QStringLiteral("license") << QStringLiteral("indenter");
2655 
2656  // index all given highlightings
2657  HlFilesChecker filesChecker;
2658  QVariantMap hls;
2659  int anyError = 0;
2660  for (const QString &hlFilename : std::as_const(hlFilenames)) {
2661  QFile hlFile(hlFilename);
2662  if (!hlFile.open(QIODevice::ReadOnly)) {
2663  qWarning("Failed to open %s", qPrintable(hlFilename));
2664  anyError = 3;
2665  continue;
2666  }
2667 
2668 #ifdef QT_XMLPATTERNS_LIB
2669  // validate against schema
2670  QXmlSchemaValidator validator(schema);
2671  if (!validator.validate(&hlFile, QUrl::fromLocalFile(hlFile.fileName()))) {
2672  anyError = 4;
2673  continue;
2674  }
2675 #endif
2676 
2677  // read the needed attributes from toplevel language tag
2678  hlFile.reset();
2679  QXmlStreamReader xml(&hlFile);
2680  if (xml.readNextStartElement()) {
2681  if (xml.name() != QLatin1String("language")) {
2682  anyError = 5;
2683  continue;
2684  }
2685  } else {
2686  anyError = 6;
2687  continue;
2688  }
2689 
2690  // map to store hl info
2691  QVariantMap hl;
2692 
2693  // transfer text attributes
2694  for (const QString &attribute : std::as_const(textAttributes)) {
2695  hl[attribute] = xml.attributes().value(attribute).toString();
2696  }
2697 
2698  // check if extensions have the right format
2699  if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) {
2700  qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString();
2701  anyError = 23;
2702  }
2703 
2704  // numerical attributes
2705  hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt();
2706  hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt();
2707 
2708  // add boolean one
2709  hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden")));
2710 
2711  // remember hl
2712  hls[QFileInfo(hlFile).fileName()] = hl;
2713 
2714  const QString hlName = hl[QStringLiteral("name")].toString();
2715 
2716  filesChecker.setDefinition(xml.attributes().value(QStringLiteral("kateversion")), hlFilename, hlName);
2717 
2718  // scan for broken regex or keywords with spaces
2719  while (!xml.atEnd()) {
2720  xml.readNext();
2721  filesChecker.processElement(xml);
2722  }
2723 
2724  if (xml.hasError()) {
2725  anyError = 33;
2726  qWarning() << hlFilename << "-" << xml.errorString() << "@ offset" << xml.characterOffset();
2727  }
2728  }
2729 
2730  filesChecker.resolveContexts();
2731 
2732  if (!filesChecker.check()) {
2733  anyError = 7;
2734  }
2735 
2736  // bail out if any problem was seen
2737  if (anyError) {
2738  return anyError;
2739  }
2740 
2741  // create outfile, after all has worked!
2742  QFile outFile(app.arguments().at(1));
2743  if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
2744  return 9;
2745  }
2746 
2747  // write out json
2748  outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor());
2749 
2750  // be done
2751  return 0;
2752 }
void append(const T &value)
QString pattern() const const
int toInt(bool *ok, int base) const const
QString errorString() const const
bool remove(const T &value)
QString number(int n, int base)
QString errorString() const const
int size() const const
CaseInsensitive
QString pattern(Mode mode=Reading)
QStringRef value(const QString &namespaceUri, const QString &name) const const
QSet::iterator erase(QSet::iterator pos)
Type type(const QSqlDatabase &db)
bool isNull() const const
QStringRef text() const const
bool readNextStartElement()
void append(const T &value)
void push_back(const T &value)
bool isDigit() const const
void chop(int n)
void reserve(int size)
bool isLetter() const const
bool isEndElement() const const
int patternErrorOffset() const const
QStringView left(qsizetype length) const const
int size() const const
qsizetype size() const const
QStringRef name() const const
QMap::iterator insert(const Key &key, const T &value)
QMap::iterator end()
bool operator==(const Qt3DRender::QGraphicsApiFilter &reference, const Qt3DRender::QGraphicsApiFilter &sample)
void clear()
qint64 lineNumber() const const
QMap::iterator find(const Key &key)
char * toString(const T &value)
SkipEmptyParts
bool isEmpty() const const
QUrl fromLocalFile(const QString &localFile)
QXmlStreamReader::TokenType readNext()
QCborValue fromVariant(const QVariant &variant)
bool load(const QUrl &source)
const QList< QKeySequence > & replace()
QStringRef name() const const
int toInt(bool *ok, int base) const const
KCALENDARCORE_EXPORT uint qHash(const KCalendarCore::Period &key)
bool isEmpty() const const
QString readElementText(QXmlStreamReader::ReadElementTextBehaviour behaviour)
QXmlStreamAttributes attributes() const const
int indexOf(QChar ch, int from, Qt::CaseSensitivity cs) const const
void reserve(int size)
QString fileName() const const
const QList< QKeySequence > & find()
bool contains(const T &value) const const
QString & replace(int position, int n, QChar after)
bool startsWith(const QString &s, Qt::CaseSensitivity cs) const const
KGuiItem add()
QString toString() const const
unsigned int version()
void clear()
QString left(int n) const const
QString name(StandardShortcut id)
int size() const const
const QList< QKeySequence > & next()
void clear()
QSet::iterator insert(const T &value)
int size() const const
bool isStartElement() const const
bool atEnd() const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
QStringRef value() const const
QString mid(int position, int n) const const
bool startsWith(QStringView str, Qt::CaseSensitivity cs) const const
bool contains(const QString &str, Qt::CaseSensitivity cs) const const
QString message
const QList< QKeySequence > & end()
bool hasError() const const
Keywords
bool isCharacters() const const
bool isValid() const const
ushort unicode() const const
qint64 characterOffset() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Sun Mar 26 2023 04:09:17 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.