KSyntaxHighlighting

katehighlightingindexer.cpp
1/*
2 SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org>
3 SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com>
4
5 SPDX-License-Identifier: MIT
6*/
7
8#include <QCborValue>
9#include <QCoreApplication>
10#include <QDebug>
11#include <QFile>
12#include <QFileInfo>
13#include <QMutableMapIterator>
14#include <QRegularExpression>
15#include <QScopeGuard>
16#include <QVariant>
17#include <QXmlStreamReader>
18
19#ifdef HAS_XERCESC
20
21#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
22
23#include <xercesc/parsers/SAX2XMLReaderImpl.hpp>
24
25#include <xercesc/sax/ErrorHandler.hpp>
26#include <xercesc/sax/SAXParseException.hpp>
27
28#include <xercesc/util/PlatformUtils.hpp>
29#include <xercesc/util/XMLString.hpp>
30#include <xercesc/util/XMLUni.hpp>
31
32#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
33#include <xercesc/validators/common/Grammar.hpp>
34
35using namespace xercesc;
36
37/*
38 * Ideas taken from:
39 *
40 * author : Boris Kolpackov <boris@codesynthesis.com>
41 * copyright : not copyrighted - public domain
42 *
43 * This program uses Xerces-C++ SAX2 parser to load a set of schema files
44 * and then to validate a set of XML documents against these schemas. To
45 * build this program you will need Xerces-C++ 3.0.0 or later. For more
46 * information, see:
47 *
48 * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/
49 */
50
51/**
52 * Error handler object used during xml schema validation.
53 */
54class CustomErrorHandler : public ErrorHandler
55{
56public:
57 /**
58 * Constructor
59 * @param messages Pointer to the error message string to fill.
60 */
61 CustomErrorHandler(QString *messages)
62 : m_messages(messages)
63 {
64 }
65
66 /**
67 * Check global success/fail state.
68 * @return True if there was a failure, false otherwise.
69 */
70 bool failed() const
71 {
72 return m_failed;
73 }
74
75private:
76 /**
77 * Severity classes for error messages.
78 */
79 enum severity { s_warning, s_error, s_fatal };
80
81 /**
82 * Wrapper for warning exceptions.
83 * @param e Exception to handle.
84 */
85 void warning(const SAXParseException &e) override
86 {
87 m_failed = true; // be strict, warnings are evil, too!
88 handle(e, s_warning);
89 }
90
91 /**
92 * Wrapper for error exceptions.
93 * @param e Exception to handle.
94 */
95 void error(const SAXParseException &e) override
96 {
97 m_failed = true;
98 handle(e, s_error);
99 }
100
101 /**
102 * Wrapper for fatal error exceptions.
103 * @param e Exception to handle.
104 */
105 void fatalError(const SAXParseException &e) override
106 {
107 m_failed = true;
108 handle(e, s_fatal);
109 }
110
111 /**
112 * Reset the error status to "no error".
113 */
114 void resetErrors() override
115 {
116 m_failed = false;
117 }
118
119 /**
120 * Generic handler for error/warning/fatal error message exceptions.
121 * @param e Exception to handle.
122 * @param s Enum value encoding the message severtity.
123 */
124 void handle(const SAXParseException &e, severity s)
125 {
126 // get id to print
127 const XMLCh *xid(e.getPublicId());
128 if (!xid)
129 xid = e.getSystemId();
130
131 m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: ")
132 << QString::fromUtf16(e.getMessage()) << Qt::endl;
133 }
134
135private:
136 /**
137 * Storage for created error messages in this handler.
138 */
139 QTextStream m_messages;
140
141 /**
142 * Global error state. True if there was an error, false otherwise.
143 */
144 bool m_failed = false;
145};
146
147void init_parser(SAX2XMLReaderImpl &parser)
148{
149 // Commonly useful configuration.
150 //
151 parser.setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
152 parser.setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
153 parser.setFeature(XMLUni::fgSAX2CoreValidation, true);
154
155 // Enable validation.
156 //
157 parser.setFeature(XMLUni::fgXercesSchema, true);
158 parser.setFeature(XMLUni::fgXercesSchemaFullChecking, true);
159 parser.setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
160
161 // Use the loaded grammar during parsing.
162 //
163 parser.setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true);
164
165 // Don't load schemas from any other source (e.g., from XML document's
166 // xsi:schemaLocation attributes).
167 //
168 parser.setFeature(XMLUni::fgXercesLoadSchema, false);
169
170 // Xerces-C++ 3.1.0 is the first version with working multi import
171 // support.
172 //
173 parser.setFeature(XMLUni::fgXercesHandleMultipleImports, true);
174}
175
176#endif
177
178#include "../lib/worddelimiters_p.h"
179#include "../lib/xml_p.h"
180
181#include <array>
182
183using KSyntaxHighlighting::WordDelimiters;
184using KSyntaxHighlighting::Xml::attrToBool;
185
186class HlFilesChecker
187{
188public:
189 template<typename T>
190 void setDefinition(const T &verStr, const QString &filename, const QString &name)
191 {
192 m_currentDefinition = &*m_definitions.insert(name, Definition{});
193 m_currentDefinition->languageName = name;
194 m_currentDefinition->filename = filename;
195 m_currentDefinition->kateVersionStr = verStr.toString();
196 m_currentKeywords = nullptr;
197 m_currentContext = nullptr;
198
199 const auto idx = verStr.indexOf(QLatin1Char('.'));
200 if (idx <= 0) {
201 qWarning() << filename << "invalid kateversion" << verStr;
202 m_success = false;
203 } else {
204 m_currentDefinition->kateVersion = {verStr.left(idx).toInt(), verStr.mid(idx + 1).toInt()};
205 }
206 }
207
208 void processElement(QXmlStreamReader &xml)
209 {
210 if (xml.isStartElement()) {
211 if (m_currentContext) {
212 m_currentContext->rules.push_back(Context::Rule{});
213 auto &rule = m_currentContext->rules.back();
214 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success;
215 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
216 } else if (m_currentKeywords) {
217 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml) && m_success;
218 } else if (xml.name() == QStringLiteral("context")) {
219 processContextElement(xml);
220 } else if (xml.name() == QStringLiteral("list")) {
221 processListElement(xml);
222 } else if (xml.name() == QStringLiteral("keywords")) {
223 m_success = m_currentDefinition->parseKeywords(xml) && m_success;
224 } else if (xml.name() == QStringLiteral("emptyLine")) {
225 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success;
226 } else if (xml.name() == QStringLiteral("itemData")) {
227 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success;
228 }
229 } else if (xml.isEndElement()) {
230 if (m_currentContext && xml.name() == QStringLiteral("context")) {
231 m_currentContext = nullptr;
232 } else if (m_currentKeywords && xml.name() == QStringLiteral("list")) {
233 m_currentKeywords = nullptr;
234 }
235 }
236 }
237
238 //! Resolve context attribute and include tag
239 void resolveContexts()
240 {
242 while (def.hasNext()) {
243 def.next();
244 auto &definition = def.value();
245 auto &contexts = definition.contexts;
246
247 if (contexts.isEmpty()) {
248 qWarning() << definition.filename << "has no context";
249 m_success = false;
250 continue;
251 }
252
253 auto markAsUsedContext = [](ContextName &contextName) {
254 if (!contextName.stay && contextName.context) {
255 contextName.context->isOnlyIncluded = false;
256 }
257 };
258
259 QMutableMapIterator<QString, Context> contextIt(contexts);
260 while (contextIt.hasNext()) {
261 contextIt.next();
262 auto &context = contextIt.value();
263 resolveContextName(definition, context, context.lineEndContext, context.line);
264 resolveContextName(definition, context, context.lineEmptyContext, context.line);
265 resolveContextName(definition, context, context.fallthroughContext, context.line);
266 markAsUsedContext(context.lineEndContext);
267 markAsUsedContext(context.lineEmptyContext);
268 markAsUsedContext(context.fallthroughContext);
269 for (auto &rule : context.rules) {
270 rule.parentContext = &context;
271 resolveContextName(definition, context, rule.context, rule.line);
272 if (rule.type != Context::Rule::Type::IncludeRules) {
273 markAsUsedContext(rule.context);
274 } else if (rule.includeAttrib == XmlBool::True && rule.context.context) {
275 rule.context.context->referencedWithIncludeAttrib = true;
276 }
277 }
278 }
279
280 auto *firstContext = &*definition.contexts.find(definition.firstContextName);
281 firstContext->isOnlyIncluded = false;
282 definition.firstContext = firstContext;
283 }
284
285 resolveIncludeRules();
286 }
287
288 bool check() const
289 {
290 bool success = m_success;
291
292 const auto usedContexts = extractUsedContexts();
293
294 QMap<const Definition *, const Definition *> maxVersionByDefinitions;
296
297 QMapIterator<QString, Definition> def(m_definitions);
298 while (def.hasNext()) {
299 def.next();
300 const auto &definition = def.value();
301 const auto &filename = definition.filename;
302
303 auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
304 if (maxDef != &definition) {
305 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr
306 << ". Please, increase kateversion.";
307 success = false;
308 }
309
310 QSet<ItemDatas::Style> usedAttributeNames;
311 QSet<ItemDatas::Style> ignoredAttributeNames;
312 success = checkKeywordsList(definition) && success;
313 success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
314
315 // search for non-existing itemDatas.
316 const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
317 for (const auto &styleName : invalidNames) {
318 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name;
319 success = false;
320 }
321
322 // search for existing itemDatas, but unusable.
323 const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
324 for (const auto &styleName : ignoredNames) {
325 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name
326 << "is never used. All uses are with lookAhead=true or <IncludeRules/>";
327 success = false;
328 }
329
330 // search for unused itemDatas.
331 auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
332 unusedNames -= ignoredNames;
333 for (const auto &styleName : std::as_const(unusedNames)) {
334 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name;
335 success = false;
336 }
337 }
338
339 QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
340 while (unreachableIncludedRuleIt.hasNext()) {
341 unreachableIncludedRuleIt.next();
342 IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
343 if (unreachableRulesBy.alwaysUnreachable) {
344 auto *rule = unreachableIncludedRuleIt.key();
345
346 if (!rule->parentContext->isOnlyIncluded) {
347 continue;
348 }
349
350 // remove duplicates rules
352 auto &unreachableBy = unreachableRulesBy.unreachableBy;
353 unreachableBy.erase(std::remove_if(unreachableBy.begin(),
354 unreachableBy.end(),
355 [&](const RuleAndInclude &ruleAndInclude) {
356 if (rules.contains(ruleAndInclude.rule)) {
357 return true;
358 }
359 rules.insert(ruleAndInclude.rule);
360 return false;
361 }),
362 unreachableBy.end());
363
364 QString message;
365 message.reserve(128);
366 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
367 message += QStringLiteral("line ");
368 message += QString::number(ruleAndInclude.rule->line);
369 message += QStringLiteral(" [");
370 message += ruleAndInclude.rule->parentContext->name;
371 if (rule->filename != ruleAndInclude.rule->filename) {
372 message += QStringLiteral(" (");
373 message += ruleAndInclude.rule->filename;
374 message += QLatin1Char(')');
375 }
376 if (ruleAndInclude.includeRules) {
377 message += QStringLiteral(" via line ");
378 message += QString::number(ruleAndInclude.includeRules->line);
379 }
380 message += QStringLiteral("], ");
381 }
382 message.chop(2);
383
384 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message;
385 success = false;
386 }
387 }
388
389 return success;
390 }
391
392private:
393 enum class XmlBool {
395 False,
396 True,
397 };
398
399 struct Context;
400
401 struct ContextName {
403 int popCount = 0;
404 bool stay = false;
405
406 Context *context = nullptr;
407 };
408
409 struct Parser {
410 const QString &filename;
411 QXmlStreamReader &xml;
413 bool success;
414
415 //! Read a string type attribute, \c success = \c false when \p str is not empty
416 //! \return \c true when attr.name() == attrName, otherwise false
417 bool extractString(QString &str, const QString &attrName)
418 {
419 if (attr.name() != attrName) {
420 return false;
421 }
422
423 str = attr.value().toString();
424 if (str.isEmpty()) {
425 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty";
426 success = false;
427 }
428
429 return true;
430 }
431
432 //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified.
433 //! \return \c true when attr.name() == attrName, otherwise false
434 bool extractXmlBool(XmlBool &xmlBool, const QString &attrName)
435 {
436 if (attr.name() != attrName) {
437 return false;
438 }
439
440 xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False;
441
442 return true;
443 }
444
445 //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0
446 //! \return \c true when attr.name() == attrName, otherwise false
447 bool extractPositive(int &positive, const QString &attrName)
448 {
449 if (attr.name() != attrName) {
450 return false;
451 }
452
453 bool ok = true;
454 positive = attr.value().toInt(&ok);
455
456 if (!ok || positive < 0) {
457 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value();
458 success = false;
459 }
460
461 return true;
462 }
463
464 //! Read a color, \c success = \c false when \p color is already greater than or equal to 0
465 //! \return \c true when attr.name() == attrName, otherwise false
466 bool checkColor(const QString &attrName)
467 {
468 if (attr.name() != attrName) {
469 return false;
470 }
471
472 const auto value = attr.value();
473 if (value.isEmpty() /*|| QColor(value).isValid()*/) {
474 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value;
475 success = false;
476 }
477
478 return true;
479 }
480
481 //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char
482 //! \return \c true when attr.name() == attrName, otherwise false
483 bool extractChar(QChar &c, const QString &attrName)
484 {
485 if (attr.name() != attrName) {
486 return false;
487 }
488
489 if (attr.value().size() == 1) {
490 c = attr.value()[0];
491 } else {
492 c = QLatin1Char('_');
493 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value();
494 success = false;
495 }
496
497 return true;
498 }
499
500 //! \return parsing status when \p isExtracted is \c true, otherwise \c false
501 bool checkIfExtracted(bool isExtracted)
502 {
503 if (isExtracted) {
504 return success;
505 }
506
507 qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name();
508 return false;
509 }
510 };
511
512 struct Keywords {
513 struct Items {
514 struct Item {
515 QString content;
516 int line;
517
518 friend size_t qHash(const Item &item, size_t seed = 0)
519 {
520 return qHash(item.content, seed);
521 }
522
523 friend bool operator==(const Item &item0, const Item &item1)
524 {
525 return item0.content == item1.content;
526 }
527 };
528
529 QList<Item> keywords;
530 QSet<Item> includes;
531
532 bool parseElement(const QString &filename, QXmlStreamReader &xml)
533 {
534 bool success = true;
535
536 const int line = xml.lineNumber();
537 QString content = xml.readElementText();
538
539 if (content.isEmpty()) {
540 qWarning() << filename << "line" << line << "is empty:" << xml.name();
541 success = false;
542 }
543
544 if (xml.name() == QStringLiteral("include")) {
545 includes.insert({content, line});
546 } else if (xml.name() == QStringLiteral("item")) {
547 keywords.append({content, line});
548 } else {
549 qWarning() << filename << "line" << line << "invalid element:" << xml.name();
550 success = false;
551 }
552
553 return success;
554 }
555 };
556
558 Items items;
559 int line;
560
561 bool parseElement(const QString &filename, QXmlStreamReader &xml)
562 {
563 line = xml.lineNumber();
564
565 bool success = true;
566 for (auto &attr : xml.attributes()) {
567 Parser parser{filename, xml, attr, success};
568
569 const bool isExtracted = parser.extractString(name, QStringLiteral("name"));
570
571 success = parser.checkIfExtracted(isExtracted);
572 }
573 return success;
574 }
575 };
576
577 struct Context {
578 struct Rule {
579 enum class Type {
580 Unknown,
581 AnyChar,
582 Detect2Chars,
583 DetectChar,
584 DetectIdentifier,
585 DetectSpaces,
586 Float,
587 HlCChar,
588 HlCHex,
589 HlCOct,
590 HlCStringChar,
591 IncludeRules,
592 Int,
593 LineContinue,
594 RangeDetect,
595 RegExpr,
596 StringDetect,
597 WordDetect,
598 keyword,
599 };
600
601 Type type{};
602
603 bool isDotRegex = false;
604 int line = -1;
605
606 // commonAttributes
607 QString attribute;
608 ContextName context;
609 QString beginRegion;
610 QString endRegion;
611 int column = -1;
612 XmlBool lookAhead{};
613 XmlBool firstNonSpace{};
614
615 // StringDetect, WordDetect, keyword
616 XmlBool insensitive{};
617
618 // DetectChar, StringDetect, RegExpr, keyword
619 XmlBool dynamic{};
620
621 // Regex
622 XmlBool minimal{};
623
624 // IncludeRule
625 XmlBool includeAttrib{};
626
627 // DetectChar, Detect2Chars, LineContinue, RangeDetect
628 QChar char0;
629 // Detect2Chars, RangeDetect
630 QChar char1;
631
632 // AnyChar, DetectChar, StringDetect, RegExpr, WordDetect, keyword
633 QString string;
634 // RegExpr without .* as suffix
635 QString sanitizedString;
636
637 // Float, HlCHex, HlCOct, Int, WordDetect, keyword
638 QString additionalDeliminator;
639 QString weakDeliminator;
640
641 // rules included by IncludeRules (without IncludeRule)
642 QList<const Rule *> includedRules;
643
644 // IncludeRules included by IncludeRules
645 QSet<const Rule *> includedIncludeRules;
646
647 Context const *parentContext = nullptr;
648
649 QString filename;
650
651 bool parseElement(const QString &filename, QXmlStreamReader &xml)
652 {
653 this->filename = filename;
654 line = xml.lineNumber();
655
656 using Pair = QPair<QString, Type>;
657 static const auto pairs = {
658 Pair{QStringLiteral("AnyChar"), Type::AnyChar},
659 Pair{QStringLiteral("Detect2Chars"), Type::Detect2Chars},
660 Pair{QStringLiteral("DetectChar"), Type::DetectChar},
661 Pair{QStringLiteral("DetectIdentifier"), Type::DetectIdentifier},
662 Pair{QStringLiteral("DetectSpaces"), Type::DetectSpaces},
663 Pair{QStringLiteral("Float"), Type::Float},
664 Pair{QStringLiteral("HlCChar"), Type::HlCChar},
665 Pair{QStringLiteral("HlCHex"), Type::HlCHex},
666 Pair{QStringLiteral("HlCOct"), Type::HlCOct},
667 Pair{QStringLiteral("HlCStringChar"), Type::HlCStringChar},
668 Pair{QStringLiteral("IncludeRules"), Type::IncludeRules},
669 Pair{QStringLiteral("Int"), Type::Int},
670 Pair{QStringLiteral("LineContinue"), Type::LineContinue},
671 Pair{QStringLiteral("RangeDetect"), Type::RangeDetect},
672 Pair{QStringLiteral("RegExpr"), Type::RegExpr},
673 Pair{QStringLiteral("StringDetect"), Type::StringDetect},
674 Pair{QStringLiteral("WordDetect"), Type::WordDetect},
675 Pair{QStringLiteral("keyword"), Type::keyword},
676 };
677
678 for (auto pair : pairs) {
679 if (xml.name() == pair.first) {
680 type = pair.second;
681 bool success = parseAttributes(filename, xml);
682 success = checkMandoryAttributes(filename, xml) && success;
683 if (success && type == Type::RegExpr) {
684 // ., (.) followed by *, +, {1} or nothing
685 static const QRegularExpression isDot(QStringLiteral(R"(^\‍(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
686 // remove "(?:" and ")"
687 static const QRegularExpression removeParentheses(QStringLiteral(R"(\‍((?:\?:)?|\))"));
688 // remove parentheses on a copy of string
689 auto reg = QString(string).replace(removeParentheses, QString());
690 isDotRegex = reg.contains(isDot);
691
692 // Remove .* and .*$ suffix.
693 static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$"));
694 sanitizedString = string;
695 sanitizedString.replace(allSuffix, QString());
696 // string is a catch-all, do not sanitize
697 if (sanitizedString.isEmpty() || sanitizedString == QStringLiteral("^")) {
698 sanitizedString = string;
699 }
700 }
701 return success;
702 }
703 }
704
705 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name();
706 return false;
707 }
708
709 private:
710 bool parseAttributes(const QString &filename, QXmlStreamReader &xml)
711 {
712 bool success = true;
713
714 for (auto &attr : xml.attributes()) {
715 Parser parser{filename, xml, attr, success};
716
717 // clang-format off
718 const bool isExtracted
719 = parser.extractString(attribute, QStringLiteral("attribute"))
720 || parser.extractString(context.name, QStringLiteral("context"))
721 || parser.extractXmlBool(lookAhead, QStringLiteral("lookAhead"))
722 || parser.extractXmlBool(firstNonSpace, QStringLiteral("firstNonSpace"))
723 || parser.extractString(beginRegion, QStringLiteral("beginRegion"))
724 || parser.extractString(endRegion, QStringLiteral("endRegion"))
725 || parser.extractPositive(column, QStringLiteral("column"))
726 || ((type == Type::RegExpr
727 || type == Type::StringDetect
728 || type == Type::WordDetect
729 || type == Type::keyword
730 ) && parser.extractXmlBool(insensitive, QStringLiteral("insensitive")))
731 || ((type == Type::DetectChar
732 || type == Type::RegExpr
733 || type == Type::StringDetect
734 || type == Type::keyword
735 ) && parser.extractXmlBool(dynamic, QStringLiteral("dynamic")))
736 || ((type == Type::RegExpr)
737 && parser.extractXmlBool(minimal, QStringLiteral("minimal")))
738 || ((type == Type::DetectChar
739 || type == Type::Detect2Chars
740 || type == Type::LineContinue
741 || type == Type::RangeDetect
742 ) && parser.extractChar(char0, QStringLiteral("char")))
743 || ((type == Type::Detect2Chars
744 || type == Type::RangeDetect
745 ) && parser.extractChar(char1, QStringLiteral("char1")))
746 || ((type == Type::AnyChar
747 || type == Type::RegExpr
748 || type == Type::StringDetect
749 || type == Type::WordDetect
750 || type == Type::keyword
751 ) && parser.extractString(string, QStringLiteral("String")))
752 || ((type == Type::IncludeRules)
753 && parser.extractXmlBool(includeAttrib, QStringLiteral("includeAttrib")))
754 || ((type == Type::Float
755 || type == Type::HlCHex
756 || type == Type::HlCOct
757 || type == Type::Int
758 || type == Type::keyword
759 || type == Type::WordDetect
760 ) && (parser.extractString(additionalDeliminator, QStringLiteral("additionalDeliminator"))
761 || parser.extractString(weakDeliminator, QStringLiteral("weakDeliminator"))))
762 ;
763 // clang-format on
764
765 success = parser.checkIfExtracted(isExtracted);
766
767 if (type == Type::LineContinue && char0 == QLatin1Char('\0')) {
768 char0 = QLatin1Char('\\');
769 }
770 }
771
772 return success;
773 }
774
775 bool checkMandoryAttributes(const QString &filename, QXmlStreamReader &xml)
776 {
777 QString missingAttr;
778
779 switch (type) {
780 case Type::Unknown:
781 return false;
782
783 case Type::AnyChar:
784 case Type::RegExpr:
785 case Type::StringDetect:
786 case Type::WordDetect:
787 case Type::keyword:
788 missingAttr = string.isEmpty() ? QStringLiteral("String") : QString();
789 break;
790
791 case Type::DetectChar:
792 missingAttr = !char0.unicode() ? QStringLiteral("char") : QString();
793 break;
794
795 case Type::Detect2Chars:
796 case Type::RangeDetect:
797 missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1")
798 : !char0.unicode() ? QStringLiteral("char")
799 : !char1.unicode() ? QStringLiteral("char1")
800 : QString();
801 break;
802
803 case Type::IncludeRules:
804 missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString();
805 break;
806
807 case Type::DetectIdentifier:
808 case Type::DetectSpaces:
809 case Type::Float:
810 case Type::HlCChar:
811 case Type::HlCHex:
812 case Type::HlCOct:
813 case Type::HlCStringChar:
814 case Type::Int:
815 case Type::LineContinue:
816 break;
817 }
818
819 if (!missingAttr.isEmpty()) {
820 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr;
821 return false;
822 }
823
824 return true;
825 }
826 };
827
828 int line;
829 // becomes false when a context (except includeRule) refers to it
830 bool isOnlyIncluded = true;
831 // becomes true when an includedRule refers to it with includeAttrib=true
832 bool referencedWithIncludeAttrib = false;
833 bool hasDynamicRule = false;
835 QString attribute;
836 ContextName lineEndContext;
837 ContextName lineEmptyContext;
838 ContextName fallthroughContext;
839 QList<Rule> rules;
840 XmlBool dynamic{};
841 XmlBool fallthrough{};
842 XmlBool stopEmptyLineContextSwitchLoop{};
843
844 bool parseElement(const QString &filename, QXmlStreamReader &xml)
845 {
846 line = xml.lineNumber();
847
848 bool success = true;
849
850 for (auto &attr : xml.attributes()) {
851 Parser parser{filename, xml, attr, success};
852 XmlBool noIndentationBasedFolding{};
853
854 // clang-format off
855 const bool isExtracted = parser.extractString(name, QStringLiteral("name"))
856 || parser.extractString(attribute, QStringLiteral("attribute"))
857 || parser.extractString(lineEndContext.name, QStringLiteral("lineEndContext"))
858 || parser.extractString(lineEmptyContext.name, QStringLiteral("lineEmptyContext"))
859 || parser.extractString(fallthroughContext.name, QStringLiteral("fallthroughContext"))
860 || parser.extractXmlBool(dynamic, QStringLiteral("dynamic"))
861 || parser.extractXmlBool(fallthrough, QStringLiteral("fallthrough"))
862 || parser.extractXmlBool(stopEmptyLineContextSwitchLoop, QStringLiteral("stopEmptyLineContextSwitchLoop"))
863 || parser.extractXmlBool(noIndentationBasedFolding, QStringLiteral("noIndentationBasedFolding"));
864 // clang-format on
865
866 success = parser.checkIfExtracted(isExtracted);
867 }
868
869 if (name.isEmpty()) {
870 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name";
871 success = false;
872 }
873
874 if (attribute.isEmpty()) {
875 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute";
876 success = false;
877 }
878
879 return success;
880 }
881 };
882
883 struct Version {
884 int majorRevision;
885 int minorRevision;
886
887 Version(int majorRevision = 0, int minorRevision = 0)
888 : majorRevision(majorRevision)
889 , minorRevision(minorRevision)
890 {
891 }
892
893 bool operator<(const Version &version) const
894 {
895 return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision);
896 }
897 };
898
899 struct ItemDatas {
900 struct Style {
902 int line;
903
904 friend size_t qHash(const Style &style, size_t seed = 0)
905 {
906 return qHash(style.name, seed);
907 }
908
909 friend bool operator==(const Style &style0, const Style &style1)
910 {
911 return style0.name == style1.name;
912 }
913 };
914
915 QSet<Style> styleNames;
916
917 bool parseElement(const QString &filename, QXmlStreamReader &xml)
918 {
919 bool success = true;
920
922 QString defStyleNum;
923 XmlBool boolean;
924
925 for (auto &attr : xml.attributes()) {
926 Parser parser{filename, xml, attr, success};
927
928 const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(defStyleNum, QStringLiteral("defStyleNum"))
929 || parser.extractXmlBool(boolean, QStringLiteral("bold")) || parser.extractXmlBool(boolean, QStringLiteral("italic"))
930 || parser.extractXmlBool(boolean, QStringLiteral("underline")) || parser.extractXmlBool(boolean, QStringLiteral("strikeOut"))
931 || parser.extractXmlBool(boolean, QStringLiteral("spellChecking")) || parser.checkColor(QStringLiteral("color"))
932 || parser.checkColor(QStringLiteral("selColor")) || parser.checkColor(QStringLiteral("backgroundColor"))
933 || parser.checkColor(QStringLiteral("selBackgroundColor"));
934
935 success = parser.checkIfExtracted(isExtracted);
936 }
937
938 if (!name.isEmpty()) {
939 const auto len = styleNames.size();
940 styleNames.insert({name, int(xml.lineNumber())});
941 if (len == styleNames.size()) {
942 qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name;
943 success = false;
944 }
945 }
946
947 return success;
948 }
949 };
950
951 struct Definition {
952 QMap<QString, Keywords> keywordsList;
953 QMap<QString, Context> contexts;
954 ItemDatas itemDatas;
955 QString firstContextName;
956 const Context *firstContext = nullptr;
957 QString filename;
958 WordDelimiters wordDelimiters;
959 Version kateVersion{};
960 QString kateVersionStr;
961 QString languageName;
962 QSet<const Definition *> referencedDefinitions;
963
964 // Parse <keywords ...>
965 bool parseKeywords(QXmlStreamReader &xml)
966 {
967 wordDelimiters.append(xml.attributes().value(QStringLiteral("additionalDeliminator")));
968 wordDelimiters.remove(xml.attributes().value(QStringLiteral("weakDeliminator")));
969 return true;
970 }
971 };
972
973 // Parse <context>
974 void processContextElement(QXmlStreamReader &xml)
975 {
976 Context context;
977 m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success;
978 if (m_currentDefinition->firstContextName.isEmpty()) {
979 m_currentDefinition->firstContextName = context.name;
980 }
981 if (m_currentDefinition->contexts.contains(context.name)) {
982 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name;
983 m_success = false;
984 }
985 m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context);
986 }
987
988 // Parse <list name="...">
989 void processListElement(QXmlStreamReader &xml)
990 {
991 Keywords keywords;
992 m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success;
993 if (m_currentDefinition->keywordsList.contains(keywords.name)) {
994 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name;
995 m_success = false;
996 }
997 m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords);
998 }
999
1000 const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const
1001 {
1002 auto it = maxVersionByDefinitions.find(&definition);
1003 if (it != maxVersionByDefinitions.end()) {
1004 return it.value();
1005 } else {
1006 auto it = maxVersionByDefinitions.insert(&definition, &definition);
1007 for (const auto &referencedDef : definition.referencedDefinitions) {
1008 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions);
1009 if (it.value()->kateVersion < maxDef->kateVersion) {
1010 it.value() = maxDef;
1011 }
1012 }
1013 return it.value();
1014 }
1015 }
1016
1017 // Initialize the referenced rules (Rule::includedRules)
1018 void resolveIncludeRules()
1019 {
1020 QSet<const Context *> usedContexts;
1021 QList<const Context *> contexts;
1022
1024 while (def.hasNext()) {
1025 def.next();
1026 auto &definition = def.value();
1027 QMutableMapIterator<QString, Context> contextIt(definition.contexts);
1028 while (contextIt.hasNext()) {
1029 contextIt.next();
1030 auto &currentContext = contextIt.value();
1031 for (auto &rule : currentContext.rules) {
1032 if (rule.type != Context::Rule::Type::IncludeRules) {
1033 continue;
1034 }
1035
1036 if (rule.context.stay) {
1037 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself";
1038 m_success = false;
1039 continue;
1040 }
1041
1042 if (rule.context.popCount) {
1043 qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix";
1044 m_success = false;
1045 }
1046
1047 if (!rule.context.context) {
1048 m_success = false;
1049 continue;
1050 }
1051
1052 // resolve includedRules and includedIncludeRules
1053
1054 usedContexts.clear();
1055 usedContexts.insert(rule.context.context);
1056 contexts.clear();
1057 contexts.append(rule.context.context);
1058
1059 for (int i = 0; i < contexts.size(); ++i) {
1060 currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
1061 for (const auto &includedRule : contexts[i]->rules) {
1062 if (includedRule.type != Context::Rule::Type::IncludeRules) {
1063 rule.includedRules.append(&includedRule);
1064 } else if (&rule == &includedRule) {
1065 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity";
1066 m_success = false;
1067 } else {
1068 rule.includedIncludeRules.insert(&includedRule);
1069
1070 if (includedRule.includedRules.isEmpty()) {
1071 const auto *context = includedRule.context.context;
1072 if (context && !usedContexts.contains(context)) {
1073 contexts.append(context);
1074 usedContexts.insert(context);
1075 }
1076 } else {
1077 rule.includedRules.append(includedRule.includedRules);
1078 }
1079 }
1080 }
1081 }
1082 }
1083 }
1084 }
1085 }
1086
1087 //! Recursively extracts the contexts used from the first context of the definitions.
1088 //! This method detects groups of contexts which are only used among themselves.
1089 QSet<const Context *> extractUsedContexts() const
1090 {
1091 QSet<const Context *> usedContexts;
1092 QList<const Context *> contexts;
1093
1094 QMapIterator<QString, Definition> def(m_definitions);
1095 while (def.hasNext()) {
1096 def.next();
1097 const auto &definition = def.value();
1098
1099 if (definition.firstContext) {
1100 usedContexts.insert(definition.firstContext);
1101 contexts.clear();
1102 contexts.append(definition.firstContext);
1103
1104 for (int i = 0; i < contexts.size(); ++i) {
1105 auto appendContext = [&](const Context *context) {
1106 if (context && !usedContexts.contains(context)) {
1107 contexts.append(context);
1108 usedContexts.insert(context);
1109 }
1110 };
1111
1112 const auto *context = contexts[i];
1113 appendContext(context->lineEndContext.context);
1114 appendContext(context->lineEmptyContext.context);
1115 appendContext(context->fallthroughContext.context);
1116
1117 for (auto &rule : context->rules) {
1118 appendContext(rule.context.context);
1119 }
1120 }
1121 }
1122 }
1123
1124 return usedContexts;
1125 }
1126
1127 struct RuleAndInclude {
1128 const Context::Rule *rule;
1129 const Context::Rule *includeRules;
1130
1131 explicit operator bool() const
1132 {
1133 return rule;
1134 }
1135 };
1136
1137 struct IncludedRuleUnreachableBy {
1138 QList<RuleAndInclude> unreachableBy;
1139 bool alwaysUnreachable = true;
1140 };
1141
1142 //! Check contexts and rules
1143 bool checkContexts(const Definition &definition,
1144 QSet<ItemDatas::Style> &usedAttributeNames,
1145 QSet<ItemDatas::Style> &ignoredAttributeNames,
1146 const QSet<const Context *> &usedContexts,
1147 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1148 {
1149 bool success = true;
1150
1151 QMapIterator<QString, Context> contextIt(definition.contexts);
1152 while (contextIt.hasNext()) {
1153 contextIt.next();
1154
1155 const auto &context = contextIt.value();
1156 const auto &filename = definition.filename;
1157
1158 if (!usedContexts.contains(&context)) {
1159 qWarning() << filename << "line" << context.line << "unused context:" << context.name;
1160 success = false;
1161 continue;
1162 }
1163
1164 if (context.name.startsWith(QStringLiteral("#pop"))) {
1165 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name;
1166 success = false;
1167 }
1168
1169 if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1170 usedAttributeNames.insert({context.attribute, context.line});
1171 }
1172
1173 success = checkContextAttribute(definition, context) && success;
1174 success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success;
1175 success = suggestRuleMerger(definition.filename, context) && success;
1176
1177 for (const auto &rule : context.rules) {
1178 if (!rule.attribute.isEmpty()) {
1179 if (rule.lookAhead != XmlBool::True) {
1180 usedAttributeNames.insert({rule.attribute, rule.line});
1181 } else {
1182 ignoredAttributeNames.insert({rule.attribute, rule.line});
1183 }
1184 }
1185 success = checkLookAhead(rule) && success;
1186 success = checkStringDetect(rule) && success;
1187 success = checkKeyword(definition, rule) && success;
1188 success = checkRegExpr(filename, rule, context) && success;
1189 success = checkDelimiters(definition, rule) && success;
1190 }
1191 }
1192
1193 return success;
1194 }
1195
1196 //! Check that a regular expression in a RegExpr rule:
1197 //! - isValid()
1198 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1199 //! - dynamic=true but no place holder used?
1200 //! - is not . with lookAhead="1"
1201 //! - is not ^... without column ou firstNonSpace attribute
1202 //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar
1203 //! - has no unused captures
1204 //! - has no unnecessary quantifier with lookAhead
1205 bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const
1206 {
1207 // ignore empty regex because the error is raised during xml parsing
1208 if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) {
1209 const QRegularExpression regexp(rule.string);
1210 if (!checkRegularExpression(rule.filename, regexp, rule.line)) {
1211 return false;
1212 }
1213
1214 // dynamic == true and no place holder?
1215 if (rule.dynamic == XmlBool::True) {
1216 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1217 if (!rule.string.contains(placeHolder)) {
1218 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1219 return false;
1220 }
1221 }
1222
1223 auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1224 if (rule.lookAhead == XmlBool::True) {
1225 static const QRegularExpression removeAllSuffix(QStringLiteral(
1226 R"(((?<!\\)\\‍(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1227 reg.replace(removeAllSuffix, QString());
1228 }
1229
1230 reg.replace(QStringLiteral("{1}"), QString());
1231
1232 // is DetectSpaces
1233 // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, +
1234 static const QRegularExpression isDetectSpaces(
1235 QStringLiteral(R"(^\^?(?:\‍((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1236 if (rule.string.contains(isDetectSpaces)) {
1237 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1238 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":"
1239 << rule.string;
1240 return false;
1241 }
1242
1243#define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1244#define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1245
1246 // is RangeDetect
1247 static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:"
1248 "\\.\\*[?+]?" REG_CHAR "|"
1249 "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1"
1250 ")$"));
1251 if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(QStringLiteral(".*?"))
1252 || rule.string.contains(QStringLiteral("[^")))
1253 && reg.contains(isRange)) {
1254 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string;
1255 return false;
1256 }
1257
1258 // is AnyChar
1259 static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\‍((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)"));
1260 if (rule.string.contains(isAnyChar)) {
1261 auto extra = (reg[0] == QLatin1Char('^') || reg[1] == QLatin1Char('^')) ? "with column=\"0\"" : "";
1262 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra;
1263 return false;
1264 }
1265
1266 // is LineContinue
1267 static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$"));
1268 if (reg.contains(isLineContinue)) {
1269 auto extra = (reg[0] == QLatin1Char('^')) ? "with column=\"0\"" : "";
1270 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra;
1271 return false;
1272 }
1273
1274 // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _
1275 static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR));
1276 reg.replace(sanitize1, QStringLiteral("_"));
1277
1278#undef REG_CHAR
1279#undef REG_ESCAPE_CHAR
1280
1281 // use minimal or lazy operator
1282 static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1283 static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]"));
1284
1285 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy)
1286 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0)
1287 && (reg.back() != QLatin1Char('$') || reg.contains(QLatin1Char('|')))) {
1288 qWarning() << rule.filename << "line" << rule.line
1289 << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1290 return false;
1291 }
1292
1293 // replace [:...:] with ___
1294 static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])"));
1295 reg.replace(sanitize2, QStringLiteral("___"));
1296
1297 // replace [ccc...], [special] with ...
1298 static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1299 reg.replace(sanitize3, QStringLiteral("...\\1"));
1300
1301 // replace [c] with _
1302 static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])"));
1303 reg.replace(sanitize4, QStringLiteral("_"));
1304
1305 const int len = reg.size();
1306 // replace [cC] with _
1307 static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])"));
1308 reg = reg.toUpper();
1309 reg.replace(toInsensitive, QString());
1310
1311 // is StringDetect
1312 // ignore (?:, ) and {n}
1313 static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\‍(\?:)+$)"));
1314 if (reg.contains(isStringDetect)) {
1315 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1316 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1317 << ":" << rule.string;
1318 if (len != reg.size()) {
1319 qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string;
1320 }
1321 return false;
1322 }
1323
1324 // column="0"
1325 if (rule.column == -1) {
1326 // ^ without |
1327 // (^sas*) -> ok
1328 // (^sa|s*) -> ko
1329 // (^(sa|s*)) -> ok
1330 auto first = std::as_const(reg).begin();
1331 auto last = std::as_const(reg).end();
1332 int depth = 0;
1333
1334 while (QLatin1Char('(') == *first) {
1335 ++depth;
1336 ++first;
1337 if (QLatin1Char('?') == *first || QLatin1Char(':') == first[1]) {
1338 first += 2;
1339 }
1340 }
1341
1342 if (QLatin1Char('^') == *first) {
1343 const int bolDepth = depth;
1344 bool replace = true;
1345
1346 while (++first != last) {
1347 if (QLatin1Char('(') == *first) {
1348 ++depth;
1349 } else if (QLatin1Char(')') == *first) {
1350 --depth;
1351 if (depth < bolDepth) {
1352 // (^a)? === (^a|) -> ko
1353 if (first + 1 != last && QStringLiteral("*?").contains(first[1])) {
1354 replace = false;
1355 break;
1356 }
1357 }
1358 } else if (QLatin1Char('|') == *first) {
1359 // ignore '|' within subgroup
1360 if (depth <= bolDepth) {
1361 replace = false;
1362 break;
1363 }
1364 }
1365 }
1366
1367 if (replace) {
1368 qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string;
1369 return false;
1370 }
1371 }
1372 }
1373
1374 // add ^ with column=0
1375 if (rule.column == 0 && !rule.isDotRegex) {
1376 bool hasStartOfLine = false;
1377 auto first = std::as_const(reg).begin();
1378 auto last = std::as_const(reg).end();
1379 for (; first != last; ++first) {
1380 if (*first == QLatin1Char('^')) {
1381 hasStartOfLine = true;
1382 break;
1383 } else if (*first == QLatin1Char('(')) {
1384 if (last - first >= 3 && first[1] == QLatin1Char('?') && first[2] == QLatin1Char(':')) {
1385 first += 2;
1386 }
1387 } else {
1388 break;
1389 }
1390 }
1391
1392 if (!hasStartOfLine) {
1393 qWarning() << rule.filename << "line" << rule.line
1394 << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1395 return false;
1396 }
1397 }
1398
1399 bool useCapture = false;
1400
1401 // detection of unnecessary capture
1402 if (regexp.captureCount()) {
1403 auto maximalCapture = [](const QString(&referenceNames)[9], const QString &s) {
1404 int maxCapture = 9;
1405 while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) {
1406 --maxCapture;
1407 }
1408 return maxCapture;
1409 };
1410
1411 int maxCaptureUsed = 0;
1412 // maximal dynamic reference
1413 if (rule.context.context && !rule.context.stay) {
1414 for (const auto &nextRule : rule.context.context->rules) {
1415 if (nextRule.dynamic == XmlBool::True) {
1416 static const QString cap[]{
1417 QStringLiteral("%1"),
1418 QStringLiteral("%2"),
1419 QStringLiteral("%3"),
1420 QStringLiteral("%4"),
1421 QStringLiteral("%5"),
1422 QStringLiteral("%6"),
1423 QStringLiteral("%7"),
1424 QStringLiteral("%8"),
1425 QStringLiteral("%9"),
1426 };
1427 int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1428 maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture);
1429 }
1430 }
1431 }
1432
1433 static const QString num1[]{
1434 QStringLiteral("\\1"),
1435 QStringLiteral("\\2"),
1436 QStringLiteral("\\3"),
1437 QStringLiteral("\\4"),
1438 QStringLiteral("\\5"),
1439 QStringLiteral("\\6"),
1440 QStringLiteral("\\7"),
1441 QStringLiteral("\\8"),
1442 QStringLiteral("\\9"),
1443 };
1444 static const QString num2[]{
1445 QStringLiteral("\\g1"),
1446 QStringLiteral("\\g2"),
1447 QStringLiteral("\\g3"),
1448 QStringLiteral("\\g4"),
1449 QStringLiteral("\\g5"),
1450 QStringLiteral("\\g6"),
1451 QStringLiteral("\\g7"),
1452 QStringLiteral("\\g8"),
1453 QStringLiteral("\\g9"),
1454 };
1455 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num1, rule.string));
1456
1457 const int maxCapture = std::max(maxCaptureUsed, maxBackReference);
1458
1459 if (maxCapture && regexp.captureCount() > maxCapture) {
1460 qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture
1461 << "are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1462 return false;
1463 }
1464
1465 useCapture = maxCapture;
1466 }
1467
1468 if (!useCapture) {
1469 // is DetectIdentifier
1470 static const QRegularExpression isDetectIdentifier(
1471 QStringLiteral(R"(^(\‍((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)"));
1472 if (rule.string.contains(isDetectIdentifier)) {
1473 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string;
1474 return false;
1475 }
1476 }
1477
1478 if (rule.isDotRegex) {
1479 // search next rule with same column or firstNonSpace
1480 int i = &rule - context.rules.data() + 1;
1481 const bool hasColumn = (rule.column != -1);
1482 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1483 const bool isSpecial = (hasColumn || hasFirstNonSpace);
1484 for (; i < context.rules.size(); ++i) {
1485 auto &rule2 = context.rules[i];
1486 if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1487 i = context.rules.size();
1488 break;
1489 }
1490
1491 const bool hasColumn2 = (rule2.column != -1);
1492 const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1493 if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1494 || (hasFirstNonSpace && hasFirstNonSpace2)) {
1495 break;
1496 }
1497 }
1498
1499 auto ruleFilename = (filename == rule.filename) ? QString() : QStringLiteral("in ") + rule.filename;
1500 if (i == context.rules.size()) {
1501 if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1502 && rule.endRegion.isEmpty() && !useCapture) {
1503 qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename
1504 << "should be replaced by fallthroughContext:" << rule.string;
1505 }
1506 } else {
1507 auto &nextRule = context.rules[i];
1508 auto nextRuleFilename = (filename == nextRule.filename) ? QString() : QStringLiteral("in ") + nextRule.filename;
1509 qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename
1510 << "because a dot RegExpr is used line" << rule.line << ruleFilename;
1511 }
1512
1513 // unnecessary quantifier
1514 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)"));
1515 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)"));
1516 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1517 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) {
1518 qWarning() << rule.filename << "line" << rule.line
1519 << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1520 return false;
1521 }
1522 }
1523 }
1524
1525 return true;
1526 }
1527
1528 // Parse and check <emptyLine>
1529 bool parseEmptyLine(const QString &filename, QXmlStreamReader &xml)
1530 {
1531 bool success = true;
1532
1533 QString pattern;
1534 XmlBool casesensitive{};
1535
1536 for (auto &attr : xml.attributes()) {
1537 Parser parser{filename, xml, attr, success};
1538
1539 const bool isExtracted =
1540 parser.extractString(pattern, QStringLiteral("regexpr")) || parser.extractXmlBool(casesensitive, QStringLiteral("casesensitive"));
1541
1542 success = parser.checkIfExtracted(isExtracted);
1543 }
1544
1545 if (pattern.isEmpty()) {
1546 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr";
1547 success = false;
1548 } else {
1549 success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber());
1550 }
1551
1552 return success;
1553 }
1554
1555 //! Check that a regular expression:
1556 //! - isValid()
1557 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1558 bool checkRegularExpression(const QString &filename, const QRegularExpression &regexp, int line) const
1559 {
1560 const auto pattern = regexp.pattern();
1561
1562 // validate regexp
1563 if (!regexp.isValid()) {
1564 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset"
1565 << regexp.patternErrorOffset();
1566 return false;
1567 }
1568
1569 // catch possible case typos: [A-z] or [a-Z]
1570 const int azOffset = std::max(pattern.indexOf(QStringLiteral("A-z")), pattern.indexOf(QStringLiteral("a-Z")));
1571 if (azOffset >= 0) {
1572 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset;
1573 return false;
1574 }
1575
1576 return true;
1577 }
1578
1579 //! Check fallthrough and fallthroughContext.
1580 //! Check kateversion for stopEmptyLineContextSwitchLoop.
1581 bool checkContextAttribute(const Definition &definition, const Context &context) const
1582 {
1583 bool success = true;
1584
1585 if (!context.fallthroughContext.name.isEmpty()) {
1586 const bool mandatoryFallthroughAttribute = definition.kateVersion < Version{5, 62};
1587 if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1588 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1589 << context.name;
1590 success = false;
1591 } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1592 qWarning() << definition.filename << "line" << context.line
1593 << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1594 << context.name;
1595 success = false;
1596 }
1597 }
1598
1599 if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < Version{5, 103}) {
1600 qWarning() << definition.filename << "line" << context.line
1601 << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name;
1602 success = false;
1603 }
1604
1605 return success;
1606 }
1607
1608 //! Search for additionalDeliminator/weakDeliminator which has no effect.
1609 bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const
1610 {
1611 if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1612 return true;
1613 }
1614
1615 bool success = true;
1616
1617 if (definition.kateVersion < Version{5, 79}) {
1618 qWarning() << definition.filename << "line" << rule.line
1619 << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1620 success = false;
1621 }
1622
1623 for (QChar c : rule.additionalDeliminator) {
1624 if (!definition.wordDelimiters.contains(c)) {
1625 return success;
1626 }
1627 }
1628
1629 for (QChar c : rule.weakDeliminator) {
1630 if (definition.wordDelimiters.contains(c)) {
1631 return success;
1632 }
1633 }
1634
1635 qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1636 return false;
1637 }
1638
1639 //! Check that keyword rule reference an existing keyword list.
1640 bool checkKeyword(const Definition &definition, const Context::Rule &rule) const
1641 {
1642 if (rule.type == Context::Rule::Type::keyword) {
1643 auto it = definition.keywordsList.find(rule.string);
1644 if (it == definition.keywordsList.end()) {
1645 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string;
1646 return false;
1647 }
1648 }
1649 return true;
1650 }
1651
1652 //! Search for rules with lookAhead="true" and context="#stay".
1653 //! This would cause an infinite loop.
1654 bool checkLookAhead(const Context::Rule &rule) const
1655 {
1656 if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1657 qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay";
1658 }
1659 return true;
1660 }
1661
1662 //! Check that StringDetect contains a placeHolder when dynamic="1"
1663 bool checkStringDetect(const Context::Rule &rule) const
1664 {
1665 if (rule.type == Context::Rule::Type::StringDetect) {
1666 // dynamic == true and no place holder?
1667 if (rule.dynamic == XmlBool::True) {
1668 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1669 if (!rule.string.contains(placeHolder)) {
1670 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1671 return false;
1672 }
1673 }
1674 }
1675 return true;
1676 }
1677
1678 //! Check <include> and delimiter in a keyword list
1679 bool checkKeywordsList(const Definition &definition) const
1680 {
1681 bool success = true;
1682
1683 bool includeNotSupport = (definition.kateVersion < Version{5, 53});
1684 QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList);
1685 while (keywordsIt.hasNext()) {
1686 keywordsIt.next();
1687
1688 for (const auto &include : keywordsIt.value().items.includes) {
1689 if (includeNotSupport) {
1690 qWarning() << definition.filename << "line" << include.line
1691 << "<include> is only available since version \"5.53\". Please, increase kateversion.";
1692 success = false;
1693 }
1694 success = checkKeywordInclude(definition, include) && success;
1695 }
1696
1697 // Check that keyword list items do not have deliminator character
1698#if 0
1699 for (const auto& keyword : keywordsIt.value().items.keywords) {
1700 for (QChar c : keyword.content) {
1701 if (definition.wordDelimiters.contains(c)) {
1702 qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content;
1703 success = false;
1704 }
1705 }
1706 }
1707#endif
1708 }
1709
1710 return success;
1711 }
1712
1713 //! Search for non-existing keyword include.
1714 bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const
1715 {
1716 bool containsKeywordName = true;
1717 int const idx = include.content.indexOf(QStringLiteral("##"));
1718 if (idx == -1) {
1719 auto it = definition.keywordsList.find(include.content);
1720 containsKeywordName = (it != definition.keywordsList.end());
1721 } else {
1722 auto defName = include.content.mid(idx + 2);
1723 auto listName = include.content.left(idx);
1724 auto it = m_definitions.find(defName);
1725 if (it == m_definitions.end()) {
1726 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content;
1727 return false;
1728 }
1729 containsKeywordName = it->keywordsList.contains(listName);
1730 }
1731
1732 if (!containsKeywordName) {
1733 qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content;
1734 }
1735
1736 return containsKeywordName;
1737 }
1738
1739 //! Check if a rule is hidden by another
1740 //! - rule hidden by DetectChar or AnyChar
1741 //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar
1742 //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings
1743 //! - duplicate rule (Int, Float, keyword with same String, etc)
1744 //! - Rule hidden by a dot regex
1745 bool checkUreachableRules(const QString &filename,
1746 const Context &context,
1747 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1748 {
1749 if (context.isOnlyIncluded) {
1750 return true;
1751 }
1752
1753 struct Rule4 {
1754 RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr)
1755 {
1756 auto set = [&](RuleAndInclude &ruleAndInclude) {
1757 auto old = ruleAndInclude;
1758 ruleAndInclude = {&rule, includeRules};
1759 return old;
1760 };
1761
1762 if (rule.firstNonSpace == XmlBool::True) {
1763 return set(firstNonSpace);
1764 } else if (rule.column == 0) {
1765 return set(column0);
1766 } else if (rule.column > 0) {
1767 return set(columnGreaterThan0[rule.column]);
1768 } else {
1769 return set(normal);
1770 }
1771 }
1772
1773 private:
1774 RuleAndInclude normal;
1775 RuleAndInclude column0;
1776 QMap<int, RuleAndInclude> columnGreaterThan0;
1777 RuleAndInclude firstNonSpace;
1778 };
1779
1780 // Associate QChar with RuleAndInclude
1781 struct CharTable {
1782 /// Search RuleAndInclude associated with @p c.
1783 RuleAndInclude find(QChar c) const
1784 {
1785 if (c.unicode() < 128) {
1786 return m_asciiMap[c.unicode()];
1787 }
1788 auto it = m_utf8Map.find(c);
1789 return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value();
1790 }
1791
1792 /// Search RuleAndInclude associated with the characters of @p s.
1793 /// \return an empty QList when at least one character is not found.
1795 {
1796 QList<RuleAndInclude> result;
1797
1798 for (QChar c : s) {
1799 if (!find(c)) {
1800 return result;
1801 }
1802 }
1803
1804 for (QChar c : s) {
1805 result.append(find(c));
1806 }
1807
1808 return result;
1809 }
1810
1811 /// Associates @p c with a rule.
1812 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1813 {
1814 if (c.unicode() < 128) {
1815 m_asciiMap[c.unicode()] = {&rule, includeRule};
1816 } else {
1817 m_utf8Map[c] = {&rule, includeRule};
1818 }
1819 }
1820
1821 /// Associates each character of @p s with a rule.
1822 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1823 {
1824 for (QChar c : s) {
1825 append(c, rule, includeRule);
1826 }
1827 }
1828
1829 private:
1830 RuleAndInclude m_asciiMap[127]{};
1832 };
1833
1834 struct Char4Tables {
1835 CharTable chars;
1836 CharTable charsColumn0;
1837 QMap<int, CharTable> charsColumnGreaterThan0;
1838 CharTable charsFirstNonSpace;
1839 };
1840
1841 // View on Char4Tables members
1842 struct CharTableArray {
1843 // Append Char4Tables members that satisfies firstNonSpace and column.
1844 // Char4Tables::char is always added.
1845 CharTableArray(Char4Tables &tables, const Context::Rule &rule)
1846 {
1847 if (rule.firstNonSpace == XmlBool::True) {
1848 appendTable(tables.charsFirstNonSpace);
1849 }
1850
1851 if (rule.column == 0) {
1852 appendTable(tables.charsColumn0);
1853 } else if (rule.column > 0) {
1854 appendTable(tables.charsColumnGreaterThan0[rule.column]);
1855 }
1856
1857 appendTable(tables.chars);
1858 }
1859
1860 // Removes Char4Tables::chars when the rule contains firstNonSpace or column
1861 void removeNonSpecialWhenSpecial()
1862 {
1863 if (m_size > 1) {
1864 --m_size;
1865 }
1866 }
1867
1868 /// Search RuleAndInclude associated with @p c.
1869 RuleAndInclude find(QChar c) const
1870 {
1871 for (int i = 0; i < m_size; ++i) {
1872 if (auto ruleAndInclude = m_charTables[i]->find(c)) {
1873 return ruleAndInclude;
1874 }
1875 }
1876 return RuleAndInclude{nullptr, nullptr};
1877 }
1878
1879 /// Search RuleAndInclude associated with the characters of @p s.
1880 /// \return an empty QList when at least one character is not found.
1882 {
1883 for (int i = 0; i < m_size; ++i) {
1884 auto result = m_charTables[i]->find(s);
1885 if (result.size()) {
1886 while (++i < m_size) {
1887 result.append(m_charTables[i]->find(s));
1888 }
1889 return result;
1890 }
1891 }
1892 return QList<RuleAndInclude>();
1893 }
1894
1895 /// Associates @p c with a rule.
1896 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1897 {
1898 for (int i = 0; i < m_size; ++i) {
1899 m_charTables[i]->append(c, rule, includeRule);
1900 }
1901 }
1902
1903 /// Associates each character of @p s with a rule.
1904 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1905 {
1906 for (int i = 0; i < m_size; ++i) {
1907 m_charTables[i]->append(s, rule, includeRule);
1908 }
1909 }
1910
1911 private:
1912 void appendTable(CharTable &t)
1913 {
1914 m_charTables[m_size] = &t;
1915 ++m_size;
1916 }
1917
1918 CharTable *m_charTables[3];
1919 int m_size = 0;
1920 };
1921
1922 struct ObservableRule {
1923 const Context::Rule *rule;
1924 const Context::Rule *includeRules;
1925
1926 bool hasResolvedIncludeRules() const
1927 {
1928 return rule == includeRules;
1929 }
1930 };
1931
1932 // Iterates over all the rules, including those in includedRules
1933 struct RuleIterator {
1934 RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule)
1935 : m_end(&endRule - rules.data())
1936 , m_rules(rules)
1937 {
1938 }
1939
1940 /// \return next rule or nullptr
1941 const Context::Rule *next()
1942 {
1943 // if in includedRules
1944 if (m_includedRules) {
1945 ++m_i2;
1946 if (m_i2 != m_includedRules->size()) {
1947 return (*m_includedRules)[m_i2];
1948 }
1949 ++m_i;
1950 m_includedRules = nullptr;
1951 }
1952
1953 // if is a includedRules
1954 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
1955 if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
1956 m_i2 = 0;
1957 m_includedRules = &m_rules[m_i].rule->includedRules;
1958 return (*m_includedRules)[m_i2];
1959 }
1960 ++m_i;
1961 }
1962
1963 if (m_i < m_end) {
1964 ++m_i;
1965 return m_rules[m_i - 1].rule;
1966 }
1967
1968 return nullptr;
1969 }
1970
1971 /// \return current IncludeRules or nullptr
1972 const Context::Rule *currentIncludeRules() const
1973 {
1974 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
1975 }
1976
1977 private:
1978 int m_i = 0;
1979 int m_i2 = 0;
1980 const int m_end;
1981 const QList<ObservableRule> &m_rules;
1982 const QList<const Context::Rule *> *m_includedRules = nullptr;
1983 };
1984
1985 // Dot regex container that satisfies firstNonSpace and column.
1986 struct DotRegex {
1987 /// Append a dot regex rule.
1988 void append(const Context::Rule &rule, const Context::Rule *includedRule)
1989 {
1990 auto array = extractDotRegexes(rule);
1991 if (array[0]) {
1992 *array[0] = {&rule, includedRule};
1993 }
1994 if (array[1]) {
1995 *array[1] = {&rule, includedRule};
1996 }
1997 }
1998
1999 /// Search dot regex which hides @p rule
2000 RuleAndInclude find(const Context::Rule &rule)
2001 {
2002 auto array = extractDotRegexes(rule);
2003 if (array[0]) {
2004 return *array[0];
2005 }
2006 if (array[1]) {
2007 return *array[1];
2008 }
2009 return RuleAndInclude{};
2010 }
2011
2012 private:
2013 using Array = std::array<RuleAndInclude *, 2>;
2014
2015 Array extractDotRegexes(const Context::Rule &rule)
2016 {
2017 Array ret{};
2018
2019 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
2020 ret[0] = &dotRegex;
2021 } else {
2022 if (rule.firstNonSpace == XmlBool::True) {
2023 ret[0] = &dotRegexFirstNonSpace;
2024 }
2025
2026 if (rule.column == 0) {
2027 ret[1] = &dotRegexColumn0;
2028 } else if (rule.column > 0) {
2029 ret[1] = &dotRegexColumnGreaterThan0[rule.column];
2030 }
2031 }
2032
2033 return ret;
2034 }
2035
2036 RuleAndInclude dotRegex{};
2037 RuleAndInclude dotRegexColumn0{};
2038 QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
2039 RuleAndInclude dotRegexFirstNonSpace{};
2040 };
2041
2042 bool success = true;
2043
2044 // characters of DetectChar/AnyChar
2045 Char4Tables detectChars;
2046 // characters of dynamic DetectChar
2047 Char4Tables dynamicDetectChars;
2048 // characters of LineContinue
2049 Char4Tables lineContinueChars;
2050
2051 Rule4 intRule{};
2052 Rule4 floatRule{};
2053 Rule4 hlCCharRule{};
2054 Rule4 hlCOctRule{};
2055 Rule4 hlCHexRule{};
2056 Rule4 hlCStringCharRule{};
2057 Rule4 detectIdentifierRule{};
2058
2059 // Contains includedRules and included includedRules
2061
2062 DotRegex dotRegex;
2063
2064 QList<ObservableRule> observedRules;
2065 observedRules.reserve(context.rules.size());
2066 for (const Context::Rule &rule : context.rules) {
2067 const Context::Rule *includeRule = nullptr;
2068 if (rule.type == Context::Rule::Type::IncludeRules) {
2069 auto *context = rule.context.context;
2070 if (context && context->isOnlyIncluded) {
2071 includeRule = &rule;
2072 }
2073 }
2074
2075 observedRules.push_back({&rule, includeRule});
2076 if (includeRule) {
2077 for (const Context::Rule *rule2 : rule.includedRules) {
2078 observedRules.push_back({rule2, includeRule});
2079 }
2080 }
2081 }
2082
2083 for (auto &observedRule : observedRules) {
2084 const Context::Rule &rule = *observedRule.rule;
2085 bool isUnreachable = false;
2086 QList<RuleAndInclude> unreachableBy;
2087
2088 // declare rule as unreachable if ruleAndInclude is not empty
2089 auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
2090 if (ruleAndInclude) {
2091 isUnreachable = true;
2092 unreachableBy.append(ruleAndInclude);
2093 }
2094 };
2095
2096 // declare rule as unreachable if ruleAndIncludes is not empty
2097 auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) {
2098 if (!ruleAndIncludes.isEmpty()) {
2099 isUnreachable = true;
2100 unreachableBy.append(ruleAndIncludes);
2101 }
2102 };
2103
2104 // check if rule2.firstNonSpace/column is compatible with those of rule
2105 auto isCompatible = [&rule](Context::Rule const &rule2) {
2106 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
2107 || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
2108 };
2109
2110 updateUnreachable1(dotRegex.find(rule));
2111
2112 switch (rule.type) {
2113 // checks if hidden by DetectChar/AnyChar
2114 // then add the characters to detectChars
2115 case Context::Rule::Type::AnyChar: {
2116 auto tables = CharTableArray(detectChars, rule);
2117 updateUnreachable2(tables.find(rule.string));
2118 tables.removeNonSpecialWhenSpecial();
2119 tables.append(rule.string, rule);
2120 break;
2121 }
2122
2123 // check if is hidden by DetectChar/AnyChar
2124 // then add the characters to detectChars or dynamicDetectChars
2125 case Context::Rule::Type::DetectChar: {
2126 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2127 auto tables = CharTableArray(chars4, rule);
2128 updateUnreachable1(tables.find(rule.char0));
2129 tables.removeNonSpecialWhenSpecial();
2130 tables.append(rule.char0, rule);
2131 break;
2132 }
2133
2134 // check if hidden by DetectChar/AnyChar
2135 // then add spaces characters to detectChars
2136 case Context::Rule::Type::DetectSpaces: {
2137 auto tables = CharTableArray(detectChars, rule);
2138 updateUnreachable2(tables.find(QStringLiteral(" \t")));
2139 tables.removeNonSpecialWhenSpecial();
2140 tables.append(QLatin1Char(' '), rule);
2141 tables.append(QLatin1Char('\t'), rule);
2142 break;
2143 }
2144
2145 // check if hidden by DetectChar/AnyChar
2146 case Context::Rule::Type::HlCChar:
2147 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\'')));
2148 updateUnreachable1(hlCCharRule.setRule(rule));
2149 break;
2150
2151 // check if hidden by DetectChar/AnyChar
2152 case Context::Rule::Type::HlCHex:
2153 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2154 updateUnreachable1(hlCHexRule.setRule(rule));
2155 break;
2156
2157 // check if hidden by DetectChar/AnyChar
2158 case Context::Rule::Type::HlCOct:
2159 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2160 updateUnreachable1(hlCOctRule.setRule(rule));
2161 break;
2162
2163 // check if hidden by DetectChar/AnyChar
2164 case Context::Rule::Type::HlCStringChar:
2165 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\\')));
2166 updateUnreachable1(hlCStringCharRule.setRule(rule));
2167 break;
2168
2169 // check if hidden by DetectChar/AnyChar
2170 case Context::Rule::Type::Int:
2171 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789")));
2172 updateUnreachable1(intRule.setRule(rule));
2173 break;
2174
2175 // check if hidden by DetectChar/AnyChar
2176 case Context::Rule::Type::Float:
2177 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789.")));
2178 updateUnreachable1(floatRule.setRule(rule));
2179 // check that Float is before Int
2180 updateUnreachable1(Rule4(intRule).setRule(rule));
2181 break;
2182
2183 // check if hidden by another DetectIdentifier rule
2184 case Context::Rule::Type::DetectIdentifier:
2185 updateUnreachable1(detectIdentifierRule.setRule(rule));
2186 break;
2187
2188 // check if hidden by DetectChar/AnyChar or another LineContinue
2189 case Context::Rule::Type::LineContinue: {
2190 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2191
2192 auto tables = CharTableArray(lineContinueChars, rule);
2193 updateUnreachable1(tables.find(rule.char0));
2194 tables.removeNonSpecialWhenSpecial();
2195 tables.append(rule.char0, rule);
2196 break;
2197 }
2198
2199 // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect
2200 case Context::Rule::Type::Detect2Chars:
2201 case Context::Rule::Type::RangeDetect:
2202 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2203 if (!isUnreachable) {
2204 RuleIterator ruleIterator(observedRules, observedRule);
2205 while (const auto *rulePtr = ruleIterator.next()) {
2206 if (isUnreachable) {
2207 break;
2208 }
2209 const auto &rule2 = *rulePtr;
2210 if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2211 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2212 }
2213 }
2214 }
2215 break;
2216
2217 case Context::Rule::Type::RegExpr: {
2218 if (rule.isDotRegex) {
2219 dotRegex.append(rule, nullptr);
2220 break;
2221 }
2222
2223 // check that `rule` does not have another RegExpr as a prefix
2224 RuleIterator ruleIterator(observedRules, observedRule);
2225 while (const auto *rulePtr = ruleIterator.next()) {
2226 if (isUnreachable) {
2227 break;
2228 }
2229 const auto &rule2 = *rulePtr;
2230 if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2231 && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) {
2232 bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2233 if (!add) {
2234 // \s.* (sanitized = \s) is considered hiding \s*\S
2235 // we check the quantifiers to see if this is the case
2236 auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2237 auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2238 auto c3 = rule2.sanitizedString.back().unicode();
2239 if (c3 == '*' || c3 == '?' || c3 == '+') {
2240 add = true;
2241 } else if (c1 == '*' || c1 == '?') {
2242 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2243 } else {
2244 add = true;
2245 }
2246 }
2247 if (add) {
2248 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2249 }
2250 }
2251 }
2252
2253 Q_FALLTHROUGH();
2254 }
2255 // check if a rule does not have another rule as a prefix
2256 case Context::Rule::Type::WordDetect:
2257 case Context::Rule::Type::StringDetect: {
2258 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix
2259 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2260 RuleIterator ruleIterator(observedRules, observedRule);
2261 while (const auto *rulePtr = ruleIterator.next()) {
2262 if (isUnreachable) {
2263 break;
2264 }
2265
2266 const auto &rule2 = *rulePtr;
2267 if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2268 continue;
2269 }
2270
2271 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2272 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2273 if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) {
2274 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2275 }
2276 }
2277 }
2278
2279 // string used for comparison and truncated from "dynamic" part
2280 QStringView s = rule.string;
2281
2282 // truncate to '%' with dynamic rules
2283 if (rule.dynamic == XmlBool::True) {
2284 static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)"));
2285 auto result = dynamicPosition.match(rule.string);
2286 s = s.left(result.capturedLength());
2287 }
2288
2289 QString sanitizedRegex;
2290 // truncate to special character with RegExpr.
2291 // If regexp contains '|', `s` becomes empty.
2292 if (rule.type == Context::Rule::Type::RegExpr) {
2293 static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2294 static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\‍([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2295 const qsizetype result = regularChars.match(rule.string).capturedLength();
2296 const qsizetype pos = qMin(result, s.size());
2297 if (rule.string.indexOf(QLatin1Char('|'), pos) < pos) {
2298 sanitizedRegex = rule.string.left(qMin(result, s.size()));
2299 sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1"));
2300 s = sanitizedRegex;
2301 } else {
2302 s = QStringView();
2303 }
2304 }
2305
2306 // check if hidden by DetectChar/AnyChar
2307 if (s.size() > 0) {
2308 auto t = CharTableArray(detectChars, rule);
2309 if (rule.insensitive != XmlBool::True) {
2310 updateUnreachable1(t.find(s[0]));
2311 } else {
2312 QChar c2[]{s[0].toLower(), s[0].toUpper()};
2313 updateUnreachable2(t.find(QStringView(c2, 2)));
2314 }
2315 }
2316
2317 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s
2318 if (s.size() > 0 && !isUnreachable) {
2319 // combination of uppercase and lowercase
2320 RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2321
2322 RuleIterator ruleIterator(observedRules, observedRule);
2323 while (const auto *rulePtr = ruleIterator.next()) {
2324 if (isUnreachable) {
2325 break;
2326 }
2327 const auto &rule2 = *rulePtr;
2328 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2329 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2330
2331 switch (rule2.type) {
2332 // check that it is not a detectChars prefix
2333 case Context::Rule::Type::Detect2Chars:
2334 if (isCompatible(rule2) && s.size() >= 2) {
2335 if (rule.insensitive != XmlBool::True) {
2336 if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2337 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2338 }
2339 } else {
2340 // when the string is case insensitive,
2341 // all 4 upper/lower case combinations must be found
2342 auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) {
2343 if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2344 x = {&rule2, ruleIterator.currentIncludeRules()};
2345 }
2346 };
2347 set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2348 set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2349 set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2350 set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2351
2352 if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2353 && detect2CharsInsensitives[3]) {
2354 isUnreachable = true;
2355 unreachableBy.append(detect2CharsInsensitives[0]);
2356 unreachableBy.append(detect2CharsInsensitives[1]);
2357 unreachableBy.append(detect2CharsInsensitives[2]);
2358 unreachableBy.append(detect2CharsInsensitives[3]);
2359 }
2360 }
2361 }
2362 break;
2363
2364 // check that it is not a StringDetect prefix
2365 case Context::Rule::Type::StringDetect:
2366 if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2367 && s.startsWith(rule2.string, caseSensitivity)) {
2368 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2369 }
2370 break;
2371
2372 // check if a WordDetect is hidden by another WordDetect
2373 case Context::Rule::Type::WordDetect:
2374 if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2375 && 0 == rule.string.compare(rule2.string, caseSensitivity)) {
2376 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2377 }
2378 break;
2379
2380 default:;
2381 }
2382 }
2383 }
2384
2385 break;
2386 }
2387
2388 // check if hidden by another keyword rule
2389 case Context::Rule::Type::keyword: {
2390 RuleIterator ruleIterator(observedRules, observedRule);
2391 while (const auto *rulePtr = ruleIterator.next()) {
2392 if (isUnreachable) {
2393 break;
2394 }
2395 const auto &rule2 = *rulePtr;
2396 if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2397 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2398 }
2399 }
2400 // TODO check that all keywords are hidden by another rules
2401 break;
2402 }
2403
2404 // add characters in those used but without checking if they are already.
2405 // <DetectChar char="}" />
2406 // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked
2407 // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added
2408 // <DetectChar char="{" /> <- hidden by previous rule
2409 case Context::Rule::Type::IncludeRules:
2410 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2411 break;
2412 }
2413
2414 if (auto &ruleAndInclude = includeContexts[rule.context.context]) {
2415 updateUnreachable1(ruleAndInclude);
2416 } else {
2417 ruleAndInclude.rule = &rule;
2418 }
2419
2420 for (const auto *rulePtr : rule.includedIncludeRules) {
2421 includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule});
2422 }
2423
2424 if (observedRule.includeRules) {
2425 break;
2426 }
2427
2428 for (const auto *rulePtr : rule.includedRules) {
2429 const auto &rule2 = *rulePtr;
2430 switch (rule2.type) {
2431 case Context::Rule::Type::AnyChar: {
2432 auto tables = CharTableArray(detectChars, rule2);
2433 tables.removeNonSpecialWhenSpecial();
2434 tables.append(rule2.string, rule2, &rule);
2435 break;
2436 }
2437
2438 case Context::Rule::Type::DetectChar: {
2439 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2440 auto tables = CharTableArray(chars4, rule2);
2441 tables.removeNonSpecialWhenSpecial();
2442 tables.append(rule2.char0, rule2, &rule);
2443 break;
2444 }
2445
2446 case Context::Rule::Type::DetectSpaces: {
2447 auto tables = CharTableArray(detectChars, rule2);
2448 tables.removeNonSpecialWhenSpecial();
2449 tables.append(QLatin1Char(' '), rule2, &rule);
2450 tables.append(QLatin1Char('\t'), rule2, &rule);
2451 break;
2452 }
2453
2454 case Context::Rule::Type::HlCChar:
2455 hlCCharRule.setRule(rule2, &rule);
2456 break;
2457
2458 case Context::Rule::Type::HlCHex:
2459 hlCHexRule.setRule(rule2, &rule);
2460 break;
2461
2462 case Context::Rule::Type::HlCOct:
2463 hlCOctRule.setRule(rule2, &rule);
2464 break;
2465
2466 case Context::Rule::Type::HlCStringChar:
2467 hlCStringCharRule.setRule(rule2, &rule);
2468 break;
2469
2470 case Context::Rule::Type::Int:
2471 intRule.setRule(rule2, &rule);
2472 break;
2473
2474 case Context::Rule::Type::Float:
2475 floatRule.setRule(rule2, &rule);
2476 break;
2477
2478 case Context::Rule::Type::LineContinue: {
2479 auto tables = CharTableArray(lineContinueChars, rule2);
2480 tables.removeNonSpecialWhenSpecial();
2481 tables.append(rule2.char0, rule2, &rule);
2482 break;
2483 }
2484
2485 case Context::Rule::Type::RegExpr:
2486 if (rule2.isDotRegex) {
2487 dotRegex.append(rule2, &rule);
2488 }
2489 break;
2490
2491 case Context::Rule::Type::WordDetect:
2492 case Context::Rule::Type::StringDetect:
2493 case Context::Rule::Type::Detect2Chars:
2494 case Context::Rule::Type::IncludeRules:
2495 case Context::Rule::Type::DetectIdentifier:
2496 case Context::Rule::Type::keyword:
2497 case Context::Rule::Type::Unknown:
2498 case Context::Rule::Type::RangeDetect:
2499 break;
2500 }
2501 }
2502 break;
2503
2504 case Context::Rule::Type::Unknown:
2505 break;
2506 }
2507
2508 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2509 auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2510 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2511 unreachableIncludedRule.unreachableBy.append(unreachableBy);
2512 } else {
2513 unreachableIncludedRule.alwaysUnreachable = false;
2514 }
2515 } else if (isUnreachable) {
2516 success = false;
2517 QString message;
2518 message.reserve(128);
2519 for (auto &ruleAndInclude : unreachableBy) {
2520 message += QStringLiteral("line ");
2521 if (ruleAndInclude.includeRules) {
2522 message += QString::number(ruleAndInclude.includeRules->line);
2523 message += QStringLiteral(" [by '");
2524 message += ruleAndInclude.includeRules->context.name;
2525 message += QStringLiteral("' line ");
2526 message += QString::number(ruleAndInclude.rule->line);
2527 if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2528 message += QStringLiteral(" (");
2529 message += ruleAndInclude.rule->filename;
2530 message += QLatin1Char(')');
2531 }
2532 message += QLatin1Char(']');
2533 } else {
2534 message += QString::number(ruleAndInclude.rule->line);
2535 }
2536 message += QStringLiteral(", ");
2537 }
2538 message.chop(2);
2539 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message;
2540 }
2541 }
2542
2543 return success;
2544 }
2545
2546 //! Proposes to merge certain rule sequences
2547 //! - several DetectChar/AnyChar into AnyChar
2548 //! - several RegExpr into one RegExpr
2549 bool suggestRuleMerger(const QString &filename, const Context &context) const
2550 {
2551 bool success = true;
2552
2553 if (context.rules.isEmpty()) {
2554 return success;
2555 }
2556
2557 auto it = context.rules.begin();
2558 const auto end = context.rules.end() - 1;
2559
2560 for (; it < end; ++it) {
2561 auto &rule1 = *it;
2562 auto &rule2 = it[1];
2563
2564 auto isCommonCompatible = [&] {
2565 if (rule1.lookAhead != rule2.lookAhead) {
2566 return false;
2567 }
2568 // ignore attribute when lookAhead is true
2569 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2570 return false;
2571 }
2572 // clang-format off
2573 return rule1.beginRegion == rule2.beginRegion
2574 && rule1.endRegion == rule2.endRegion
2575 && rule1.firstNonSpace == rule2.firstNonSpace
2576 && rule1.context.context == rule2.context.context
2577 && rule1.context.popCount == rule2.context.popCount;
2578 // clang-format on
2579 };
2580
2581 switch (rule1.type) {
2582 // request to merge AnyChar/DetectChar
2583 case Context::Rule::Type::AnyChar:
2584 case Context::Rule::Type::DetectChar:
2585 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar) && isCommonCompatible()
2586 && rule1.column == rule2.column) {
2587 qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule";
2588 success = false;
2589 }
2590 break;
2591
2592 // request to merge multiple RegExpr
2593 case Context::Rule::Type::RegExpr:
2594 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2595 && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2596 qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule";
2597 success = false;
2598 }
2599 break;
2600
2601 case Context::Rule::Type::DetectSpaces:
2602 case Context::Rule::Type::HlCChar:
2603 case Context::Rule::Type::HlCHex:
2604 case Context::Rule::Type::HlCOct:
2605 case Context::Rule::Type::HlCStringChar:
2606 case Context::Rule::Type::Int:
2607 case Context::Rule::Type::Float:
2608 case Context::Rule::Type::LineContinue:
2609 case Context::Rule::Type::WordDetect:
2610 case Context::Rule::Type::StringDetect:
2611 case Context::Rule::Type::Detect2Chars:
2612 case Context::Rule::Type::IncludeRules:
2613 case Context::Rule::Type::DetectIdentifier:
2614 case Context::Rule::Type::keyword:
2615 case Context::Rule::Type::Unknown:
2616 case Context::Rule::Type::RangeDetect:
2617 break;
2618 }
2619 }
2620
2621 return success;
2622 }
2623
2624 //! Initialize the referenced context (ContextName::context)
2625 //! Some input / output examples are:
2626 //! - "#stay" -> ""
2627 //! - "#pop" -> ""
2628 //! - "Comment" -> "Comment"
2629 //! - "#pop!Comment" -> "Comment"
2630 //! - "##ISO C++" -> ""
2631 //! - "Comment##ISO C++"-> "Comment" in ISO C++
2632 void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line)
2633 {
2634 QStringView name = contextName.name;
2635 if (name.isEmpty()) {
2636 contextName.stay = true;
2637 } else if (name.startsWith(QStringLiteral("#stay"))) {
2638 name = name.mid(5);
2639 contextName.stay = true;
2640 contextName.context = &context;
2641 if (!name.isEmpty()) {
2642 qWarning() << definition.filename << "line" << line << "invalid context in" << context.name;
2643 m_success = false;
2644 }
2645 } else {
2646 while (name.startsWith(QStringLiteral("#pop"))) {
2647 name = name.mid(4);
2648 ++contextName.popCount;
2649 }
2650
2651 if (contextName.popCount && !name.isEmpty()) {
2652 if (name.startsWith(QLatin1Char('!')) && name.size() > 1) {
2653 name = name.mid(1);
2654 } else {
2655 qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name;
2656 m_success = false;
2657 }
2658 }
2659
2660 if (!name.isEmpty()) {
2661 const int idx = name.indexOf(QStringLiteral("##"));
2662 if (idx == -1) {
2663 auto it = definition.contexts.find(name.toString());
2664 if (it != definition.contexts.end()) {
2665 contextName.context = &*it;
2666 }
2667 } else {
2668 auto defName = name.mid(idx + 2);
2669 auto it = m_definitions.find(defName.toString());
2670 if (it != m_definitions.end()) {
2671 auto listName = name.left(idx).toString();
2672 definition.referencedDefinitions.insert(&*it);
2673 auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName);
2674 if (ctxIt != it->contexts.end()) {
2675 contextName.context = &*ctxIt;
2676 }
2677 } else {
2678 qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name;
2679 m_success = false;
2680 }
2681 }
2682
2683 if (!contextName.context) {
2684 qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name;
2685 m_success = false;
2686 }
2687 }
2688 }
2689 }
2690
2691 QMap<QString, Definition> m_definitions;
2692 Definition *m_currentDefinition = nullptr;
2693 Keywords *m_currentKeywords = nullptr;
2694 Context *m_currentContext = nullptr;
2695 bool m_success = true;
2696};
2697
2698namespace
2699{
2700QStringList readListing(const QString &fileName)
2701{
2702 QFile file(fileName);
2703 if (!file.open(QIODevice::ReadOnly)) {
2704 return QStringList();
2705 }
2706
2707 QXmlStreamReader xml(&file);
2708 QStringList listing;
2709 while (!xml.atEnd()) {
2710 xml.readNext();
2711
2712 // add only .xml files, no .json or stuff
2713 if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) {
2714 listing.append(xml.text().toString());
2715 }
2716 }
2717
2718 if (xml.hasError()) {
2719 qWarning() << "XML error while reading" << fileName << " - " << qPrintable(xml.errorString()) << "@ offset" << xml.characterOffset();
2720 listing.clear();
2721 }
2722
2723 return listing;
2724}
2725
2726/**
2727 * check if the "extensions" attribute have valid wildcards
2728 * @param extensions extensions string to check
2729 * @return valid?
2730 */
2731bool checkExtensions(QStringView extensions)
2732{
2733 // get list of extensions
2734 const QList<QStringView> extensionParts = extensions.split(QLatin1Char(';'), Qt::SkipEmptyParts);
2735
2736 // ok if empty
2737 if (extensionParts.isEmpty()) {
2738 return true;
2739 }
2740
2741 // check that only valid wildcard things are inside the parts
2742 for (const auto &extension : extensionParts) {
2743 for (const auto c : extension) {
2744 // eat normal things
2745 if (c.isDigit() || c.isLetter()) {
2746 continue;
2747 }
2748
2749 // allow some special characters
2750 if (c == QLatin1Char('.') || c == QLatin1Char('-') || c == QLatin1Char('_') || c == QLatin1Char('+')) {
2751 continue;
2752 }
2753
2754 // only allowed wildcard things: '?' and '*'
2755 if (c == QLatin1Char('?') || c == QLatin1Char('*')) {
2756 continue;
2757 }
2758
2759 qWarning() << "invalid character" << c << "seen in extensions wildcard";
2760 return false;
2761 }
2762 }
2763
2764 // all checks passed
2765 return true;
2766}
2767
2768}
2769
2770int main(int argc, char *argv[])
2771{
2772 // get app instance
2773 QCoreApplication app(argc, argv);
2774
2775 // ensure enough arguments are passed
2776 if (app.arguments().size() < 3) {
2777 return 1;
2778 }
2779
2780#ifdef HAS_XERCESC
2781 // care for proper init and cleanup
2782 XMLPlatformUtils::Initialize();
2783 auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate);
2784
2785 /*
2786 * parse XSD first time and cache it
2787 */
2788 XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager);
2789
2790 // create parser for the XSD
2791 SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd);
2792 init_parser(parser);
2793 QString messages;
2794 CustomErrorHandler eh(&messages);
2795 parser.setErrorHandler(&eh);
2796
2797 // load grammar into the pool, on error just abort
2798 const auto xsdFile = app.arguments().at(2);
2799 if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || eh.failed()) {
2800 qWarning("Failed to parse XSD %s: %s", qPrintable(xsdFile), qPrintable(messages));
2801 return 2;
2802 }
2803
2804 // lock the pool, no later modifications wanted!
2805 xsd.lockPool();
2806#endif
2807
2808 const QString hlFilenamesListing = app.arguments().value(3);
2809 if (hlFilenamesListing.isEmpty()) {
2810 return 1;
2811 }
2812
2813 QStringList hlFilenames = readListing(hlFilenamesListing);
2814 if (hlFilenames.isEmpty()) {
2815 qWarning("Failed to read %s", qPrintable(hlFilenamesListing));
2816 return 3;
2817 }
2818
2819 // text attributes
2820 const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("alternativeNames") << QStringLiteral("section")
2821 << QStringLiteral("mimetype") << QStringLiteral("extensions") << QStringLiteral("style")
2822 << QStringLiteral("author") << QStringLiteral("license") << QStringLiteral("indenter");
2823
2824 // index all given highlightings
2825 HlFilesChecker filesChecker;
2826 QVariantMap hls;
2827 int anyError = 0;
2828 for (const QString &hlFilename : std::as_const(hlFilenames)) {
2829 QFile hlFile(hlFilename);
2830 if (!hlFile.open(QIODevice::ReadOnly)) {
2831 qWarning("Failed to open %s", qPrintable(hlFilename));
2832 anyError = 3;
2833 continue;
2834 }
2835
2836#ifdef HAS_XERCESC
2837 // create parser
2838 SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd);
2839 init_parser(parser);
2840 QString messages;
2841 CustomErrorHandler eh(&messages);
2842 parser.setErrorHandler(&eh);
2843
2844 // parse the XML file
2845 parser.parse((const char16_t *)hlFile.fileName().utf16());
2846
2847 // report issues
2848 if (eh.failed()) {
2849 qWarning("Failed to validate XML %s: %s", qPrintable(hlFile.fileName()), qPrintable(messages));
2850 anyError = 4;
2851 continue;
2852 }
2853#endif
2854
2855 // read the needed attributes from toplevel language tag
2856 hlFile.reset();
2857 QXmlStreamReader xml(&hlFile);
2858 if (xml.readNextStartElement()) {
2859 if (xml.name() != QLatin1String("language")) {
2860 anyError = 5;
2861 continue;
2862 }
2863 } else {
2864 anyError = 6;
2865 continue;
2866 }
2867
2868 // map to store hl info
2869 QVariantMap hl;
2870
2871 // transfer text attributes
2872 for (const QString &attribute : std::as_const(textAttributes)) {
2873 hl[attribute] = xml.attributes().value(attribute).toString();
2874 }
2875
2876 // check if extensions have the right format
2877 if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) {
2878 qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString();
2879 anyError = 23;
2880 }
2881
2882 // numerical attributes
2883 hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt();
2884 hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt();
2885
2886 // add boolean one
2887 hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden")));
2888
2889 // keep some strings as UTF-8 for faster translations
2890 hl[QStringLiteral("nameUtf8")] = hl[QStringLiteral("name")].toString().toUtf8();
2891 hl[QStringLiteral("sectionUtf8")] = hl[QStringLiteral("section")].toString().toUtf8();
2892
2893 // remember hl
2894 hls[QFileInfo(hlFile).fileName()] = hl;
2895
2896 const QString hlName = hl[QStringLiteral("name")].toString();
2897
2898 filesChecker.setDefinition(xml.attributes().value(QStringLiteral("kateversion")), hlFilename, hlName);
2899
2900 // scan for broken regex or keywords with spaces
2901 while (!xml.atEnd()) {
2902 xml.readNext();
2903 filesChecker.processElement(xml);
2904 }
2905
2906 if (xml.hasError()) {
2907 anyError = 33;
2908 qWarning() << hlFilename << "-" << xml.errorString() << "@ offset" << xml.characterOffset();
2909 }
2910 }
2911
2912 filesChecker.resolveContexts();
2913
2914 if (!filesChecker.check()) {
2915 anyError = 7;
2916 }
2917
2918 // bail out if any problem was seen
2919 if (anyError) {
2920 return anyError;
2921 }
2922
2923 // create outfile, after all has worked!
2924 QFile outFile(app.arguments().at(1));
2925 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
2926 return 9;
2927 }
2928
2929 // write out json
2930 outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor());
2931
2932 // be done
2933 return 0;
2934}
Type type(const QSqlDatabase &db)
char * toString(const EngineQuery &query)
KDB_EXPORT KDbVersionInfo version()
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
KIOCORE_EXPORT void add(const QString &fileClass, const QString &directory)
const QList< QKeySequence > & next()
const QList< QKeySequence > & find()
const QList< QKeySequence > & end()
QString name(StandardShortcut id)
const QList< QKeySequence > & replace()
KTEXTEDITOR_EXPORT size_t qHash(KTextEditor::Cursor cursor, size_t seed=0) noexcept
QCborValue fromVariant(const QVariant &variant)
bool isDigit(char32_t ucs4)
bool isLetter(char32_t ucs4)
char16_t & unicode()
QString fileName() const const
void append(QList< T > &&value)
reference back()
void clear()
bool isEmpty() const const
void push_back(parameter_type value)
void reserve(qsizetype size)
qsizetype size() const const
iterator end()
iterator find(const Key &key)
iterator insert(const Key &key, const T &value)
QString errorString() const const
bool isValid() const const
QString pattern() const const
qsizetype patternErrorOffset() const const
void clear()
bool contains(const QSet< T > &other) const const
iterator erase(const_iterator pos)
iterator insert(const T &value)
qsizetype size() const const
void chop(qsizetype n)
QString fromUtf16(const char16_t *unicode, qsizetype size)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString left(qsizetype n) const const
QString mid(qsizetype position, qsizetype n) const const
QString number(double n, char format, int precision)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
void reserve(qsizetype size)
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QStringView left(qsizetype length) const const
bool contains(QChar c, Qt::CaseSensitivity cs) const const
QChar first() const const
bool isNull() const const
qsizetype size() const const
QList< QStringView > split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar ch) const const
int toInt(bool *ok, int base) const const
QString toString() const const
bool operator==(const QGraphicsApiFilter &reference, const QGraphicsApiFilter &sample)
CaseInsensitive
SkipEmptyParts
QTextStream & endl(QTextStream &stream)
QStringView name() const const
QStringView value() const const
QStringView value(QAnyStringView namespaceUri, QAnyStringView name) const const
bool atEnd() const const
QXmlStreamAttributes attributes() const const
qint64 characterOffset() const const
QString errorString() const const
bool hasError() const const
bool isCharacters() const const
bool isEndElement() const const
bool isStartElement() const const
qint64 lineNumber() const const
QStringView name() const const
QString readElementText(ReadElementTextBehaviour behaviour)
TokenType readNext()
bool readNextStartElement()
QStringView text() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:19:29 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.