KSyntaxHighlighting

katehighlightingindexer.cpp
1/*
2 SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org>
3 SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com>
4
5 SPDX-License-Identifier: MIT
6*/
7
8#include <QCborValue>
9#include <QCoreApplication>
10#include <QDebug>
11#include <QFile>
12#include <QFileInfo>
13#include <QMutableMapIterator>
14#include <QRegularExpression>
15#include <QScopeGuard>
16#include <QString>
17#include <QVariant>
18#include <QXmlStreamReader>
19
20#ifdef HAS_XERCESC
21
22#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
23
24#include <xercesc/parsers/SAX2XMLReaderImpl.hpp>
25
26#include <xercesc/sax/ErrorHandler.hpp>
27#include <xercesc/sax/SAXParseException.hpp>
28
29#include <xercesc/util/PlatformUtils.hpp>
30#include <xercesc/util/XMLString.hpp>
31#include <xercesc/util/XMLUni.hpp>
32
33#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
34#include <xercesc/validators/common/Grammar.hpp>
35
36using namespace xercesc;
37
38/*
39 * Ideas taken from:
40 *
41 * author : Boris Kolpackov <boris@codesynthesis.com>
42 * copyright : not copyrighted - public domain
43 *
44 * This program uses Xerces-C++ SAX2 parser to load a set of schema files
45 * and then to validate a set of XML documents against these schemas. To
46 * build this program you will need Xerces-C++ 3.0.0 or later. For more
47 * information, see:
48 *
49 * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/
50 */
51
52/**
53 * Error handler object used during xml schema validation.
54 */
55class CustomErrorHandler : public ErrorHandler
56{
57public:
58 /**
59 * Constructor
60 * @param messages Pointer to the error message string to fill.
61 */
62 CustomErrorHandler(QString *messages)
63 : m_messages(messages)
64 {
65 }
66
67 /**
68 * Check global success/fail state.
69 * @return True if there was a failure, false otherwise.
70 */
71 bool failed() const
72 {
73 return m_failed;
74 }
75
76private:
77 /**
78 * Severity classes for error messages.
79 */
80 enum severity { s_warning, s_error, s_fatal };
81
82 /**
83 * Wrapper for warning exceptions.
84 * @param e Exception to handle.
85 */
86 void warning(const SAXParseException &e) override
87 {
88 m_failed = true; // be strict, warnings are evil, too!
89 handle(e, s_warning);
90 }
91
92 /**
93 * Wrapper for error exceptions.
94 * @param e Exception to handle.
95 */
96 void error(const SAXParseException &e) override
97 {
98 m_failed = true;
99 handle(e, s_error);
100 }
101
102 /**
103 * Wrapper for fatal error exceptions.
104 * @param e Exception to handle.
105 */
106 void fatalError(const SAXParseException &e) override
107 {
108 m_failed = true;
109 handle(e, s_fatal);
110 }
111
112 /**
113 * Reset the error status to "no error".
114 */
115 void resetErrors() override
116 {
117 m_failed = false;
118 }
119
120 /**
121 * Generic handler for error/warning/fatal error message exceptions.
122 * @param e Exception to handle.
123 * @param s Enum value encoding the message severtity.
124 */
125 void handle(const SAXParseException &e, severity s)
126 {
127 // get id to print
128 const XMLCh *xid(e.getPublicId());
129 if (!xid)
130 xid = e.getSystemId();
131
132 m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: ")
133 << QString::fromUtf16(e.getMessage()) << Qt::endl;
134 }
135
136private:
137 /**
138 * Storage for created error messages in this handler.
139 */
140 QTextStream m_messages;
141
142 /**
143 * Global error state. True if there was an error, false otherwise.
144 */
145 bool m_failed = false;
146};
147
148void init_parser(SAX2XMLReaderImpl &parser)
149{
150 // Commonly useful configuration.
151 //
152 parser.setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
153 parser.setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
154 parser.setFeature(XMLUni::fgSAX2CoreValidation, true);
155
156 // Enable validation.
157 //
158 parser.setFeature(XMLUni::fgXercesSchema, true);
159 parser.setFeature(XMLUni::fgXercesSchemaFullChecking, true);
160 parser.setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
161
162 // Use the loaded grammar during parsing.
163 //
164 parser.setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true);
165
166 // Don't load schemas from any other source (e.g., from XML document's
167 // xsi:schemaLocation attributes).
168 //
169 parser.setFeature(XMLUni::fgXercesLoadSchema, false);
170
171 // Xerces-C++ 3.1.0 is the first version with working multi import
172 // support.
173 //
174 parser.setFeature(XMLUni::fgXercesHandleMultipleImports, true);
175}
176
177#endif
178
179#include "../lib/worddelimiters_p.h"
180#include "../lib/xml_p.h"
181
182#include <array>
183
184using KSyntaxHighlighting::WordDelimiters;
185using KSyntaxHighlighting::Xml::attrToBool;
186using Qt::operator""_s;
187
188class HlFilesChecker
189{
190public:
191 void setDefinition(QStringView verStr, const QString &filename, const QString &name, const QStringList &alternativeNames)
192 {
193 m_currentDefinition = &*m_definitions.insert(name, Definition{});
194 m_currentDefinition->languageName = name;
195 m_currentDefinition->filename = filename;
196 m_currentDefinition->kateVersionStr = verStr.toString();
197 m_currentKeywords = nullptr;
198 m_currentContext = nullptr;
199
200 const auto idx = verStr.indexOf(QLatin1Char('.'));
201 if (idx <= 0) {
202 qWarning() << filename << "invalid kateversion" << verStr;
203 m_success = false;
204 } else {
205 m_currentDefinition->kateVersion = {verStr.sliced(0, idx).toInt(), verStr.sliced(idx + 1).toInt()};
206 }
207
208 auto checkName = [this, &filename](char const *nameType, const QString &name) {
209 auto it = m_names.find(name);
210 if (it != m_names.end()) {
211 qWarning() << filename << "duplicate" << nameType << "with" << it.value();
212 m_success = false;
213 } else {
214 m_names.insert(name, filename);
215 }
216 };
217 checkName("name", name);
218 for (auto alternativeName : alternativeNames) {
219 checkName("alternative name", alternativeName);
220 }
221 }
222
223 void processElement(QXmlStreamReader &xml)
224 {
225 if (xml.isStartElement()) {
226 if (m_currentContext) {
227 m_currentContext->rules.push_back(Context::Rule{});
228 auto &rule = m_currentContext->rules.back();
229 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success;
230 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
231 } else if (m_currentKeywords) {
232 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml) && m_success;
233 } else if (xml.name() == QStringLiteral("context")) {
234 processContextElement(xml);
235 } else if (xml.name() == QStringLiteral("list")) {
236 processListElement(xml);
237 } else if (xml.name() == QStringLiteral("keywords")) {
238 m_success = m_currentDefinition->parseKeywords(xml) && m_success;
239 } else if (xml.name() == QStringLiteral("emptyLine")) {
240 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success;
241 } else if (xml.name() == QStringLiteral("itemData")) {
242 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success;
243 }
244 } else if (xml.isEndElement()) {
245 if (m_currentContext && xml.name() == QStringLiteral("context")) {
246 m_currentContext = nullptr;
247 } else if (m_currentKeywords && xml.name() == QStringLiteral("list")) {
248 m_currentKeywords = nullptr;
249 }
250 }
251 }
252
253 //! Resolve context attribute and include tag
254 void resolveContexts()
255 {
257 while (def.hasNext()) {
258 def.next();
259 auto &definition = def.value();
260 auto &contexts = definition.contexts;
261
262 if (contexts.isEmpty()) {
263 qWarning() << definition.filename << "has no context";
264 m_success = false;
265 continue;
266 }
267
268 auto markAsUsedContext = [](ContextName &contextName) {
269 if (!contextName.stay && contextName.context) {
270 contextName.context->isOnlyIncluded = false;
271 }
272 };
273
274 QMutableMapIterator<QString, Context> contextIt(contexts);
275 while (contextIt.hasNext()) {
276 contextIt.next();
277 auto &context = contextIt.value();
278 resolveContextName(definition, context, context.lineEndContext, context.line);
279 resolveContextName(definition, context, context.lineEmptyContext, context.line);
280 resolveContextName(definition, context, context.fallthroughContext, context.line);
281 markAsUsedContext(context.lineEndContext);
282 markAsUsedContext(context.lineEmptyContext);
283 markAsUsedContext(context.fallthroughContext);
284 for (auto &rule : context.rules) {
285 rule.parentContext = &context;
286 resolveContextName(definition, context, rule.context, rule.line);
287 if (rule.type != Context::Rule::Type::IncludeRules) {
288 markAsUsedContext(rule.context);
289 } else if (rule.includeAttrib == XmlBool::True && rule.context.context) {
290 rule.context.context->referencedWithIncludeAttrib = true;
291 }
292 }
293 }
294
295 auto *firstContext = &*definition.contexts.find(definition.firstContextName);
296 firstContext->isOnlyIncluded = false;
297 definition.firstContext = firstContext;
298 }
299
300 resolveIncludeRules();
301 }
302
303 bool check() const
304 {
305 bool success = m_success;
306
307 const auto usedContexts = extractUsedContexts();
308
309 QMap<const Definition *, const Definition *> maxVersionByDefinitions;
311
312 QMapIterator<QString, Definition> def(m_definitions);
313 while (def.hasNext()) {
314 def.next();
315 const auto &definition = def.value();
316 const auto &filename = definition.filename;
317
318 auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
319 if (maxDef != &definition) {
320 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr
321 << ". Please, increase kateversion.";
322 success = false;
323 }
324
325 QSet<ItemDatas::Style> usedAttributeNames;
326 QSet<ItemDatas::Style> ignoredAttributeNames;
327 success = checkKeywordsList(definition) && success;
328 success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
329
330 // search for non-existing itemDatas.
331 const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
332 for (const auto &styleName : invalidNames) {
333 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name;
334 success = false;
335 }
336
337 // search for existing itemDatas, but unusable.
338 const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
339 for (const auto &styleName : ignoredNames) {
340 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name
341 << "is never used. All uses are with lookAhead=true or <IncludeRules/>";
342 success = false;
343 }
344
345 // search for unused itemDatas.
346 auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
347 unusedNames -= ignoredNames;
348 for (const auto &styleName : std::as_const(unusedNames)) {
349 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name;
350 success = false;
351 }
352 }
353
354 QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
355 while (unreachableIncludedRuleIt.hasNext()) {
356 unreachableIncludedRuleIt.next();
357 IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
358 if (unreachableRulesBy.alwaysUnreachable) {
359 auto *rule = unreachableIncludedRuleIt.key();
360
361 if (!rule->parentContext->isOnlyIncluded) {
362 continue;
363 }
364
365 // remove duplicates rules
367 auto &unreachableBy = unreachableRulesBy.unreachableBy;
368 unreachableBy.erase(std::remove_if(unreachableBy.begin(),
369 unreachableBy.end(),
370 [&](const RuleAndInclude &ruleAndInclude) {
371 if (rules.contains(ruleAndInclude.rule)) {
372 return true;
373 }
374 rules.insert(ruleAndInclude.rule);
375 return false;
376 }),
377 unreachableBy.end());
378
379 QString message;
380 message.reserve(128);
381 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
382 message += QStringLiteral("line ");
383 message += QString::number(ruleAndInclude.rule->line);
384 message += QStringLiteral(" [");
385 message += ruleAndInclude.rule->parentContext->name;
386 if (rule->filename != ruleAndInclude.rule->filename) {
387 message += QStringLiteral(" (");
388 message += ruleAndInclude.rule->filename;
389 message += QLatin1Char(')');
390 }
391 if (ruleAndInclude.includeRules) {
392 message += QStringLiteral(" via line ");
393 message += QString::number(ruleAndInclude.includeRules->line);
394 }
395 message += QStringLiteral("], ");
396 }
397 message.chop(2);
398
399 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message;
400 success = false;
401 }
402 }
403
404 return success;
405 }
406
407private:
408 enum class XmlBool {
410 False,
411 True,
412 };
413
414 struct Context;
415
416 struct ContextName {
418 int popCount = 0;
419 bool stay = false;
420
421 Context *context = nullptr;
422 };
423
424 struct Parser {
425 const QString &filename;
426 QXmlStreamReader &xml;
428 bool success;
429
430 //! Read a string type attribute, \c success = \c false when \p str is not empty
431 //! \return \c true when attr.name() == attrName, otherwise false
432 bool extractString(QString &str, const QString &attrName)
433 {
434 if (attr.name() != attrName) {
435 return false;
436 }
437
438 str = attr.value().toString();
439 if (str.isEmpty()) {
440 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty";
441 success = false;
442 }
443
444 return true;
445 }
446
447 //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified.
448 //! \return \c true when attr.name() == attrName, otherwise false
449 bool extractXmlBool(XmlBool &xmlBool, const QString &attrName)
450 {
451 if (attr.name() != attrName) {
452 return false;
453 }
454
455 xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False;
456
457 return true;
458 }
459
460 //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0
461 //! \return \c true when attr.name() == attrName, otherwise false
462 bool extractPositive(int &positive, const QString &attrName)
463 {
464 if (attr.name() != attrName) {
465 return false;
466 }
467
468 bool ok = true;
469 positive = attr.value().toInt(&ok);
470
471 if (!ok || positive < 0) {
472 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value();
473 success = false;
474 }
475
476 return true;
477 }
478
479 //! Read a color, \c success = \c false when \p color is already greater than or equal to 0
480 //! \return \c true when attr.name() == attrName, otherwise false
481 bool checkColor(const QString &attrName)
482 {
483 if (attr.name() != attrName) {
484 return false;
485 }
486
487 const auto value = attr.value();
488 if (value.isEmpty() /*|| QColor(value).isValid()*/) {
489 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value;
490 success = false;
491 }
492
493 return true;
494 }
495
496 //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char
497 //! \return \c true when attr.name() == attrName, otherwise false
498 bool extractChar(QChar &c, const QString &attrName)
499 {
500 if (attr.name() != attrName) {
501 return false;
502 }
503
504 if (attr.value().size() == 1) {
505 c = attr.value()[0];
506 } else {
507 c = QLatin1Char('_');
508 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value();
509 success = false;
510 }
511
512 return true;
513 }
514
515 //! \return parsing status when \p isExtracted is \c true, otherwise \c false
516 bool checkIfExtracted(bool isExtracted)
517 {
518 if (isExtracted) {
519 return success;
520 }
521
522 qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name();
523 return false;
524 }
525 };
526
527 struct Keywords {
528 struct Items {
529 struct Item {
530 QString content;
531 int line;
532
533 friend size_t qHash(const Item &item, size_t seed = 0)
534 {
535 return qHash(item.content, seed);
536 }
537
538 friend bool operator==(const Item &item0, const Item &item1)
539 {
540 return item0.content == item1.content;
541 }
542 };
543
544 QList<Item> keywords;
545 QSet<Item> includes;
546
547 bool parseElement(const QString &filename, QXmlStreamReader &xml)
548 {
549 bool success = true;
550
551 const int line = xml.lineNumber();
552 QString content = xml.readElementText();
553
554 if (content.isEmpty()) {
555 qWarning() << filename << "line" << line << "is empty:" << xml.name();
556 success = false;
557 }
558
559 if (xml.name() == QStringLiteral("include")) {
560 includes.insert({content, line});
561 } else if (xml.name() == QStringLiteral("item")) {
562 keywords.append({content, line});
563 } else {
564 qWarning() << filename << "line" << line << "invalid element:" << xml.name();
565 success = false;
566 }
567
568 return success;
569 }
570 };
571
573 Items items;
574 int line;
575
576 bool parseElement(const QString &filename, QXmlStreamReader &xml)
577 {
578 line = xml.lineNumber();
579
580 bool success = true;
581 for (auto &attr : xml.attributes()) {
582 Parser parser{filename, xml, attr, success};
583
584 const bool isExtracted = parser.extractString(name, QStringLiteral("name"));
585
586 success = parser.checkIfExtracted(isExtracted);
587 }
588 return success;
589 }
590 };
591
592 struct Context {
593 struct Rule {
594 enum class Type {
595 Unknown,
596 AnyChar,
597 Detect2Chars,
598 DetectChar,
599 DetectIdentifier,
600 DetectSpaces,
601 Float,
602 HlCChar,
603 HlCHex,
604 HlCOct,
605 HlCStringChar,
606 IncludeRules,
607 Int,
608 LineContinue,
609 RangeDetect,
610 RegExpr,
611 StringDetect,
612 WordDetect,
613 keyword,
614 };
615
616 Type type{};
617
618 bool isDotRegex = false;
619 int line = -1;
620
621 // commonAttributes
622 QString attribute;
623 ContextName context;
624 QString beginRegion;
625 QString endRegion;
626 int column = -1;
627 XmlBool lookAhead{};
628 XmlBool firstNonSpace{};
629
630 // StringDetect, WordDetect, keyword
631 XmlBool insensitive{};
632
633 // DetectChar, StringDetect, RegExpr, keyword
634 XmlBool dynamic{};
635
636 // Regex
637 XmlBool minimal{};
638
639 // IncludeRule
640 XmlBool includeAttrib{};
641
642 // DetectChar, Detect2Chars, LineContinue, RangeDetect
643 QChar char0;
644 // Detect2Chars, RangeDetect
645 QChar char1;
646
647 // AnyChar, DetectChar, StringDetect, RegExpr, WordDetect, keyword
648 QString string;
649 // RegExpr without .* as suffix
650 QString sanitizedString;
651
652 // Float, HlCHex, HlCOct, Int, WordDetect, keyword
653 QString additionalDeliminator;
654 QString weakDeliminator;
655
656 // rules included by IncludeRules (without IncludeRule)
657 QList<const Rule *> includedRules;
658
659 // IncludeRules included by IncludeRules
660 QSet<const Rule *> includedIncludeRules;
661
662 Context const *parentContext = nullptr;
663
664 QString filename;
665
666 bool parseElement(const QString &filename, QXmlStreamReader &xml)
667 {
668 this->filename = filename;
669 line = xml.lineNumber();
670
671 using Pair = QPair<QString, Type>;
672 static const auto pairs = {
673 Pair{QStringLiteral("AnyChar"), Type::AnyChar},
674 Pair{QStringLiteral("Detect2Chars"), Type::Detect2Chars},
675 Pair{QStringLiteral("DetectChar"), Type::DetectChar},
676 Pair{QStringLiteral("DetectIdentifier"), Type::DetectIdentifier},
677 Pair{QStringLiteral("DetectSpaces"), Type::DetectSpaces},
678 Pair{QStringLiteral("Float"), Type::Float},
679 Pair{QStringLiteral("HlCChar"), Type::HlCChar},
680 Pair{QStringLiteral("HlCHex"), Type::HlCHex},
681 Pair{QStringLiteral("HlCOct"), Type::HlCOct},
682 Pair{QStringLiteral("HlCStringChar"), Type::HlCStringChar},
683 Pair{QStringLiteral("IncludeRules"), Type::IncludeRules},
684 Pair{QStringLiteral("Int"), Type::Int},
685 Pair{QStringLiteral("LineContinue"), Type::LineContinue},
686 Pair{QStringLiteral("RangeDetect"), Type::RangeDetect},
687 Pair{QStringLiteral("RegExpr"), Type::RegExpr},
688 Pair{QStringLiteral("StringDetect"), Type::StringDetect},
689 Pair{QStringLiteral("WordDetect"), Type::WordDetect},
690 Pair{QStringLiteral("keyword"), Type::keyword},
691 };
692
693 for (auto pair : pairs) {
694 if (xml.name() == pair.first) {
695 type = pair.second;
696 bool success = parseAttributes(filename, xml);
697 success = checkMandoryAttributes(filename, xml) && success;
698 if (success && type == Type::RegExpr) {
699 // ., (.) followed by *, +, {1} or nothing
700 static const QRegularExpression isDot(QStringLiteral(R"(^\‍(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
701 // remove "(?:" and ")"
702 static const QRegularExpression removeParentheses(QStringLiteral(R"(\‍((?:\?:)?|\))"));
703 // remove parentheses on a copy of string
704 auto reg = QString(string).replace(removeParentheses, QString());
705 isDotRegex = reg.contains(isDot);
706
707 // Remove .* and .*$ suffix.
708 static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$"));
709 sanitizedString = string;
710 sanitizedString.replace(allSuffix, QString());
711 // string is a catch-all, do not sanitize
712 if (sanitizedString.isEmpty() || sanitizedString == QStringLiteral("^")) {
713 sanitizedString = string;
714 }
715 }
716 return success;
717 }
718 }
719
720 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name();
721 return false;
722 }
723
724 private:
725 bool parseAttributes(const QString &filename, QXmlStreamReader &xml)
726 {
727 bool success = true;
728
729 for (auto &attr : xml.attributes()) {
730 Parser parser{filename, xml, attr, success};
731
732 // clang-format off
733 const bool isExtracted
734 = parser.extractString(attribute, QStringLiteral("attribute"))
735 || parser.extractString(context.name, QStringLiteral("context"))
736 || parser.extractXmlBool(lookAhead, QStringLiteral("lookAhead"))
737 || parser.extractXmlBool(firstNonSpace, QStringLiteral("firstNonSpace"))
738 || parser.extractString(beginRegion, QStringLiteral("beginRegion"))
739 || parser.extractString(endRegion, QStringLiteral("endRegion"))
740 || parser.extractPositive(column, QStringLiteral("column"))
741 || ((type == Type::RegExpr
742 || type == Type::StringDetect
743 || type == Type::WordDetect
744 || type == Type::keyword
745 ) && parser.extractXmlBool(insensitive, QStringLiteral("insensitive")))
746 || ((type == Type::DetectChar
747 || type == Type::RegExpr
748 || type == Type::StringDetect
749 || type == Type::keyword
750 ) && parser.extractXmlBool(dynamic, QStringLiteral("dynamic")))
751 || ((type == Type::RegExpr)
752 && parser.extractXmlBool(minimal, QStringLiteral("minimal")))
753 || ((type == Type::DetectChar
754 || type == Type::Detect2Chars
755 || type == Type::LineContinue
756 || type == Type::RangeDetect
757 ) && parser.extractChar(char0, QStringLiteral("char")))
758 || ((type == Type::Detect2Chars
759 || type == Type::RangeDetect
760 ) && parser.extractChar(char1, QStringLiteral("char1")))
761 || ((type == Type::AnyChar
762 || type == Type::RegExpr
763 || type == Type::StringDetect
764 || type == Type::WordDetect
765 || type == Type::keyword
766 ) && parser.extractString(string, QStringLiteral("String")))
767 || ((type == Type::IncludeRules)
768 && parser.extractXmlBool(includeAttrib, QStringLiteral("includeAttrib")))
769 || ((type == Type::Float
770 || type == Type::HlCHex
771 || type == Type::HlCOct
772 || type == Type::Int
773 || type == Type::keyword
774 || type == Type::WordDetect
775 ) && (parser.extractString(additionalDeliminator, QStringLiteral("additionalDeliminator"))
776 || parser.extractString(weakDeliminator, QStringLiteral("weakDeliminator"))))
777 ;
778 // clang-format on
779
780 success = parser.checkIfExtracted(isExtracted);
781
782 if (type == Type::LineContinue && char0 == QLatin1Char('\0')) {
783 char0 = QLatin1Char('\\');
784 }
785 }
786
787 return success;
788 }
789
790 bool checkMandoryAttributes(const QString &filename, QXmlStreamReader &xml)
791 {
792 QString missingAttr;
793
794 switch (type) {
795 case Type::Unknown:
796 return false;
797
798 case Type::AnyChar:
799 case Type::RegExpr:
800 case Type::StringDetect:
801 case Type::WordDetect:
802 case Type::keyword:
803 missingAttr = string.isEmpty() ? QStringLiteral("String") : QString();
804 break;
805
806 case Type::DetectChar:
807 missingAttr = !char0.unicode() ? QStringLiteral("char") : QString();
808 break;
809
810 case Type::Detect2Chars:
811 case Type::RangeDetect:
812 missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1")
813 : !char0.unicode() ? QStringLiteral("char")
814 : !char1.unicode() ? QStringLiteral("char1")
815 : QString();
816 break;
817
818 case Type::IncludeRules:
819 missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString();
820 break;
821
822 case Type::DetectIdentifier:
823 case Type::DetectSpaces:
824 case Type::Float:
825 case Type::HlCChar:
826 case Type::HlCHex:
827 case Type::HlCOct:
828 case Type::HlCStringChar:
829 case Type::Int:
830 case Type::LineContinue:
831 break;
832 }
833
834 if (!missingAttr.isEmpty()) {
835 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr;
836 return false;
837 }
838
839 return true;
840 }
841 };
842
843 int line;
844 // becomes false when a context (except includeRule) refers to it
845 bool isOnlyIncluded = true;
846 // becomes true when an includedRule refers to it with includeAttrib=true
847 bool referencedWithIncludeAttrib = false;
848 bool hasDynamicRule = false;
850 QString attribute;
851 ContextName lineEndContext;
852 ContextName lineEmptyContext;
853 ContextName fallthroughContext;
854 QList<Rule> rules;
855 XmlBool dynamic{};
856 XmlBool fallthrough{};
857 XmlBool stopEmptyLineContextSwitchLoop{};
858
859 bool parseElement(const QString &filename, QXmlStreamReader &xml)
860 {
861 line = xml.lineNumber();
862
863 bool success = true;
864
865 for (auto &attr : xml.attributes()) {
866 Parser parser{filename, xml, attr, success};
867 XmlBool noIndentationBasedFolding{};
868
869 // clang-format off
870 const bool isExtracted = parser.extractString(name, QStringLiteral("name"))
871 || parser.extractString(attribute, QStringLiteral("attribute"))
872 || parser.extractString(lineEndContext.name, QStringLiteral("lineEndContext"))
873 || parser.extractString(lineEmptyContext.name, QStringLiteral("lineEmptyContext"))
874 || parser.extractString(fallthroughContext.name, QStringLiteral("fallthroughContext"))
875 || parser.extractXmlBool(dynamic, QStringLiteral("dynamic"))
876 || parser.extractXmlBool(fallthrough, QStringLiteral("fallthrough"))
877 || parser.extractXmlBool(stopEmptyLineContextSwitchLoop, QStringLiteral("stopEmptyLineContextSwitchLoop"))
878 || parser.extractXmlBool(noIndentationBasedFolding, QStringLiteral("noIndentationBasedFolding"));
879 // clang-format on
880
881 success = parser.checkIfExtracted(isExtracted);
882 }
883
884 if (name.isEmpty()) {
885 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name";
886 success = false;
887 }
888
889 if (attribute.isEmpty()) {
890 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute";
891 success = false;
892 }
893
894 return success;
895 }
896 };
897
898 struct Version {
899 int majorRevision;
900 int minorRevision;
901
902 Version(int majorRevision = 0, int minorRevision = 0)
903 : majorRevision(majorRevision)
904 , minorRevision(minorRevision)
905 {
906 }
907
908 bool operator<(const Version &version) const
909 {
910 return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision);
911 }
912 };
913
914 struct ItemDatas {
915 struct Style {
917 int line;
918
919 friend size_t qHash(const Style &style, size_t seed = 0)
920 {
921 return qHash(style.name, seed);
922 }
923
924 friend bool operator==(const Style &style0, const Style &style1)
925 {
926 return style0.name == style1.name;
927 }
928 };
929
930 QSet<Style> styleNames;
931
932 bool parseElement(const QString &filename, QXmlStreamReader &xml)
933 {
934 bool success = true;
935
937 QString defStyleNum;
938 XmlBool boolean;
939
940 for (auto &attr : xml.attributes()) {
941 Parser parser{filename, xml, attr, success};
942
943 const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(defStyleNum, QStringLiteral("defStyleNum"))
944 || parser.extractXmlBool(boolean, QStringLiteral("bold")) || parser.extractXmlBool(boolean, QStringLiteral("italic"))
945 || parser.extractXmlBool(boolean, QStringLiteral("underline")) || parser.extractXmlBool(boolean, QStringLiteral("strikeOut"))
946 || parser.extractXmlBool(boolean, QStringLiteral("spellChecking")) || parser.checkColor(QStringLiteral("color"))
947 || parser.checkColor(QStringLiteral("selColor")) || parser.checkColor(QStringLiteral("backgroundColor"))
948 || parser.checkColor(QStringLiteral("selBackgroundColor"));
949
950 success = parser.checkIfExtracted(isExtracted);
951 }
952
953 if (!name.isEmpty()) {
954 const auto len = styleNames.size();
955 styleNames.insert({name, int(xml.lineNumber())});
956 if (len == styleNames.size()) {
957 qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name;
958 success = false;
959 }
960 }
961
962 return success;
963 }
964 };
965
966 struct Definition {
967 QMap<QString, Keywords> keywordsList;
968 QMap<QString, Context> contexts;
969 ItemDatas itemDatas;
970 QString firstContextName;
971 const Context *firstContext = nullptr;
972 QString filename;
973 WordDelimiters wordDelimiters;
974 Version kateVersion{};
975 QString kateVersionStr;
976 QString languageName;
977 QSet<const Definition *> referencedDefinitions;
978
979 // Parse <keywords ...>
980 bool parseKeywords(QXmlStreamReader &xml)
981 {
982 wordDelimiters.append(xml.attributes().value(QStringLiteral("additionalDeliminator")));
983 wordDelimiters.remove(xml.attributes().value(QStringLiteral("weakDeliminator")));
984 return true;
985 }
986 };
987
988 // Parse <context>
989 void processContextElement(QXmlStreamReader &xml)
990 {
991 Context context;
992 m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success;
993 if (m_currentDefinition->firstContextName.isEmpty()) {
994 m_currentDefinition->firstContextName = context.name;
995 }
996 if (m_currentDefinition->contexts.contains(context.name)) {
997 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name;
998 m_success = false;
999 }
1000 m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context);
1001 }
1002
1003 // Parse <list name="...">
1004 void processListElement(QXmlStreamReader &xml)
1005 {
1006 Keywords keywords;
1007 m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success;
1008 if (m_currentDefinition->keywordsList.contains(keywords.name)) {
1009 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name;
1010 m_success = false;
1011 }
1012 m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords);
1013 }
1014
1015 const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const
1016 {
1017 auto it = maxVersionByDefinitions.find(&definition);
1018 if (it != maxVersionByDefinitions.end()) {
1019 return it.value();
1020 } else {
1021 auto it = maxVersionByDefinitions.insert(&definition, &definition);
1022 for (const auto &referencedDef : definition.referencedDefinitions) {
1023 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions);
1024 if (it.value()->kateVersion < maxDef->kateVersion) {
1025 it.value() = maxDef;
1026 }
1027 }
1028 return it.value();
1029 }
1030 }
1031
1032 // Initialize the referenced rules (Rule::includedRules)
1033 void resolveIncludeRules()
1034 {
1035 QSet<const Context *> usedContexts;
1036 QList<const Context *> contexts;
1037
1039 while (def.hasNext()) {
1040 def.next();
1041 auto &definition = def.value();
1042 QMutableMapIterator<QString, Context> contextIt(definition.contexts);
1043 while (contextIt.hasNext()) {
1044 contextIt.next();
1045 auto &currentContext = contextIt.value();
1046 for (auto &rule : currentContext.rules) {
1047 if (rule.type != Context::Rule::Type::IncludeRules) {
1048 continue;
1049 }
1050
1051 if (rule.context.stay) {
1052 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself";
1053 m_success = false;
1054 continue;
1055 }
1056
1057 if (rule.context.popCount) {
1058 qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix";
1059 m_success = false;
1060 }
1061
1062 if (!rule.context.context) {
1063 m_success = false;
1064 continue;
1065 }
1066
1067 // resolve includedRules and includedIncludeRules
1068
1069 usedContexts.clear();
1070 usedContexts.insert(rule.context.context);
1071 contexts.clear();
1072 contexts.append(rule.context.context);
1073
1074 for (int i = 0; i < contexts.size(); ++i) {
1075 currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
1076 for (const auto &includedRule : contexts[i]->rules) {
1077 if (includedRule.type != Context::Rule::Type::IncludeRules) {
1078 rule.includedRules.append(&includedRule);
1079 } else if (&rule == &includedRule) {
1080 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity";
1081 m_success = false;
1082 } else {
1083 rule.includedIncludeRules.insert(&includedRule);
1084
1085 if (includedRule.includedRules.isEmpty()) {
1086 const auto *context = includedRule.context.context;
1087 if (context && !usedContexts.contains(context)) {
1088 contexts.append(context);
1089 usedContexts.insert(context);
1090 }
1091 } else {
1092 rule.includedRules.append(includedRule.includedRules);
1093 }
1094 }
1095 }
1096 }
1097 }
1098 }
1099 }
1100 }
1101
1102 //! Recursively extracts the contexts used from the first context of the definitions.
1103 //! This method detects groups of contexts which are only used among themselves.
1104 QSet<const Context *> extractUsedContexts() const
1105 {
1106 QSet<const Context *> usedContexts;
1107 QList<const Context *> contexts;
1108
1109 QMapIterator<QString, Definition> def(m_definitions);
1110 while (def.hasNext()) {
1111 def.next();
1112 const auto &definition = def.value();
1113
1114 if (definition.firstContext) {
1115 usedContexts.insert(definition.firstContext);
1116 contexts.clear();
1117 contexts.append(definition.firstContext);
1118
1119 for (int i = 0; i < contexts.size(); ++i) {
1120 auto appendContext = [&](const Context *context) {
1121 if (context && !usedContexts.contains(context)) {
1122 contexts.append(context);
1123 usedContexts.insert(context);
1124 }
1125 };
1126
1127 const auto *context = contexts[i];
1128 appendContext(context->lineEndContext.context);
1129 appendContext(context->lineEmptyContext.context);
1130 appendContext(context->fallthroughContext.context);
1131
1132 for (auto &rule : context->rules) {
1133 appendContext(rule.context.context);
1134 }
1135 }
1136 }
1137 }
1138
1139 return usedContexts;
1140 }
1141
1142 struct RuleAndInclude {
1143 const Context::Rule *rule;
1144 const Context::Rule *includeRules;
1145
1146 explicit operator bool() const
1147 {
1148 return rule;
1149 }
1150 };
1151
1152 struct IncludedRuleUnreachableBy {
1153 QList<RuleAndInclude> unreachableBy;
1154 bool alwaysUnreachable = true;
1155 };
1156
1157 //! Check contexts and rules
1158 bool checkContexts(const Definition &definition,
1159 QSet<ItemDatas::Style> &usedAttributeNames,
1160 QSet<ItemDatas::Style> &ignoredAttributeNames,
1161 const QSet<const Context *> &usedContexts,
1162 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1163 {
1164 bool success = true;
1165
1166 QMapIterator<QString, Context> contextIt(definition.contexts);
1167 while (contextIt.hasNext()) {
1168 contextIt.next();
1169
1170 const auto &context = contextIt.value();
1171 const auto &filename = definition.filename;
1172
1173 if (!usedContexts.contains(&context)) {
1174 qWarning() << filename << "line" << context.line << "unused context:" << context.name;
1175 success = false;
1176 continue;
1177 }
1178
1179 if (context.name.startsWith(QStringLiteral("#pop"))) {
1180 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name;
1181 success = false;
1182 }
1183
1184 if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1185 usedAttributeNames.insert({context.attribute, context.line});
1186 }
1187
1188 success = checkContextAttribute(definition, context) && success;
1189 success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success;
1190 success = suggestRuleMerger(definition.filename, context) && success;
1191
1192 for (const auto &rule : context.rules) {
1193 if (!rule.attribute.isEmpty()) {
1194 if (rule.lookAhead != XmlBool::True) {
1195 usedAttributeNames.insert({rule.attribute, rule.line});
1196 } else {
1197 ignoredAttributeNames.insert({rule.attribute, rule.line});
1198 }
1199 }
1200 success = checkLookAhead(rule) && success;
1201 success = checkStringDetect(rule) && success;
1202 success = checkKeyword(definition, rule) && success;
1203 success = checkRegExpr(filename, rule, context) && success;
1204 success = checkDelimiters(definition, rule) && success;
1205 }
1206 }
1207
1208 return success;
1209 }
1210
1211 //! Check that a regular expression in a RegExpr rule:
1212 //! - isValid()
1213 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1214 //! - dynamic=true but no place holder used?
1215 //! - is not . with lookAhead="1"
1216 //! - is not ^... without column ou firstNonSpace attribute
1217 //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar
1218 //! - has no unused captures
1219 //! - has no unnecessary quantifier with lookAhead
1220 bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const
1221 {
1222 // ignore empty regex because the error is raised during xml parsing
1223 if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) {
1224 const QRegularExpression regexp(rule.string);
1225 if (!checkRegularExpression(rule.filename, regexp, rule.line)) {
1226 return false;
1227 }
1228
1229 // dynamic == true and no place holder?
1230 if (rule.dynamic == XmlBool::True) {
1231 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1232 if (!rule.string.contains(placeHolder)) {
1233 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1234 return false;
1235 }
1236 }
1237
1238 if (rule.lookAhead == XmlBool::True && (rule.string.endsWith(QStringLiteral(".*$")) || rule.string.endsWith(QStringLiteral(".*")))
1239 && -1 == rule.string.indexOf(u'|')) {
1240 qWarning() << rule.filename << "line" << rule.line << "RegExpr with lookAhead=1 doesn't need to end with '.*' or '.*$':" << rule.string;
1241 return false;
1242 }
1243
1244 auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1245 if (rule.lookAhead == XmlBool::True) {
1246 static const QRegularExpression removeAllSuffix(QStringLiteral(
1247 R"(((?<!\\)\\‍(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1248 reg.replace(removeAllSuffix, QString());
1249 }
1250
1251 reg.replace(QStringLiteral("{1}"), QString());
1252 reg.replace(QStringLiteral("{1,1}"), QString());
1253
1254 // is DetectSpaces
1255 // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, +
1256 static const QRegularExpression isDetectSpaces(
1257 QStringLiteral(R"(^\^?(?:\‍((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1258 if (rule.string.contains(isDetectSpaces)) {
1259 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1260 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":"
1261 << rule.string;
1262 return false;
1263 }
1264
1265#define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1266#define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1267
1268 // is RangeDetect
1269 static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:"
1270 "\\.\\*[?+]?" REG_CHAR "|"
1271 "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1"
1272 ")$"));
1273 if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(QStringLiteral(".*?"))
1274 || rule.string.contains(QStringLiteral("[^")))
1275 && reg.contains(isRange)) {
1276 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string;
1277 return false;
1278 }
1279
1280 // is AnyChar
1281 static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\‍((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)"));
1282 if (rule.string.contains(isAnyChar)) {
1283 auto extra = (reg[0] == QLatin1Char('^') || reg[1] == QLatin1Char('^')) ? "with column=\"0\"" : "";
1284 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra;
1285 return false;
1286 }
1287
1288 // is LineContinue
1289 static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$"));
1290 if (reg.contains(isLineContinue)) {
1291 auto extra = (reg[0] == QLatin1Char('^')) ? "with column=\"0\"" : "";
1292 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra;
1293 return false;
1294 }
1295
1296#define REG_DIGIT uR"((\[(0-9|\\d)\]|\\d))"
1297#define REG_DIGITS REG_DIGIT u"([+]|" REG_DIGIT u"[*])"
1298#define REG_DOT uR"((\\[.]|\[.\]))"
1299 // is Int, check \b[0-9]+
1300 static const QRegularExpression isInt(uR"(^(\‍((\?:)?)*\\b(\‍((\?:)?)*)" REG_DIGITS uR"(\)*$)"_s);
1301 if (reg.contains(isInt)) {
1302 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Int:" << rule.string;
1303 return false;
1304 }
1305
1306 // is Float, check (\b[0-9]+\.[0-9]*|\.[0-9]+)([eE][-+]?[0-9]+)?
1307 static const QRegularExpression isFloat(
1308 uR"(^(\\b|\‍((\?:)?)*)" REG_DIGITS REG_DOT
1309 REG_DIGIT u"[*][|]" REG_DOT REG_DIGITS uR"(\)+\‍((\?:)?\[[eE]+\]\[(\\?-\\?\+|\\?\+\\?-)\]\?)" REG_DIGITS uR"(\)\?\)*$)"_s);
1310 if (reg.contains(isFloat)) {
1311 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Float:" << rule.string;
1312 return false;
1313 }
1314#undef REG_DOT
1315#undef REG_DIGIT
1316#undef REG_DIGITS
1317
1318 // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _
1319 static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR));
1320 reg.replace(sanitize1, QStringLiteral("_"));
1321
1322#undef REG_CHAR
1323#undef REG_ESCAPE_CHAR
1324
1325 // use minimal or lazy operator
1326 static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1327 static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]"));
1328
1329 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy)
1330 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0)
1331 && (reg.back() != QLatin1Char('$') || reg.contains(QLatin1Char('|')))) {
1332 qWarning() << rule.filename << "line" << rule.line
1333 << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1334 return false;
1335 }
1336
1337 // replace [:...:] with ___
1338 static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])"));
1339 reg.replace(sanitize2, QStringLiteral("___"));
1340
1341 // replace [ccc...], [special] with ...
1342 static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1343 reg.replace(sanitize3, QStringLiteral("...\\1"));
1344
1345 // replace [c] with _
1346 static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])"));
1347 reg.replace(sanitize4, QStringLiteral("_"));
1348
1349 const int len = reg.size();
1350 // replace [cC] with _
1351 static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])"));
1352 reg = reg.toUpper();
1353 reg.replace(toInsensitive, QString());
1354
1355 // is StringDetect
1356 // ignore (?:, ) and {n}
1357 static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\‍(\?:)+$)"));
1358 if (reg.contains(isStringDetect)) {
1359 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1360 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1361 << ":" << rule.string;
1362 if (len != reg.size()) {
1363 qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string;
1364 }
1365 return false;
1366 }
1367
1368 // column="0"
1369 if (rule.column == -1) {
1370 // ^ without |
1371 // (^sas*) -> ok
1372 // (^sa|s*) -> ko
1373 // (^(sa|s*)) -> ok
1374 auto first = std::as_const(reg).begin();
1375 auto last = std::as_const(reg).end();
1376 int depth = 0;
1377
1378 while (QLatin1Char('(') == *first) {
1379 ++depth;
1380 ++first;
1381 if (QLatin1Char('?') == *first || QLatin1Char(':') == first[1]) {
1382 first += 2;
1383 }
1384 }
1385
1386 if (QLatin1Char('^') == *first) {
1387 const int bolDepth = depth;
1388 bool replace = true;
1389
1390 while (++first != last) {
1391 if (QLatin1Char('(') == *first) {
1392 ++depth;
1393 } else if (QLatin1Char(')') == *first) {
1394 --depth;
1395 if (depth < bolDepth) {
1396 // (^a)? === (^a|) -> ko
1397 if (first + 1 != last && QStringLiteral("*?").contains(first[1])) {
1398 replace = false;
1399 break;
1400 }
1401 }
1402 } else if (QLatin1Char('|') == *first) {
1403 // ignore '|' within subgroup
1404 if (depth <= bolDepth) {
1405 replace = false;
1406 break;
1407 }
1408 }
1409 }
1410
1411 if (replace) {
1412 qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string;
1413 return false;
1414 }
1415 }
1416 }
1417
1418 // add ^ with column=0
1419 if (rule.column == 0 && !rule.isDotRegex) {
1420 bool hasStartOfLine = false;
1421 auto first = std::as_const(reg).begin();
1422 auto last = std::as_const(reg).end();
1423 for (; first != last; ++first) {
1424 if (*first == QLatin1Char('^')) {
1425 hasStartOfLine = true;
1426 break;
1427 } else if (*first == QLatin1Char('(')) {
1428 if (last - first >= 3 && first[1] == QLatin1Char('?') && first[2] == QLatin1Char(':')) {
1429 first += 2;
1430 }
1431 } else {
1432 break;
1433 }
1434 }
1435
1436 if (!hasStartOfLine) {
1437 qWarning() << rule.filename << "line" << rule.line
1438 << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1439 return false;
1440 }
1441 }
1442
1443 bool useCapture = false;
1444
1445 // detection of unnecessary capture
1446 if (regexp.captureCount()) {
1447 auto maximalCapture = [](const QString(&referenceNames)[9], const QString &s) {
1448 int maxCapture = 9;
1449 while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) {
1450 --maxCapture;
1451 }
1452 return maxCapture;
1453 };
1454
1455 int maxCaptureUsed = 0;
1456 // maximal dynamic reference
1457 if (rule.context.context && !rule.context.stay) {
1458 for (const auto &nextRule : rule.context.context->rules) {
1459 if (nextRule.dynamic == XmlBool::True) {
1460 static const QString cap[]{
1461 QStringLiteral("%1"),
1462 QStringLiteral("%2"),
1463 QStringLiteral("%3"),
1464 QStringLiteral("%4"),
1465 QStringLiteral("%5"),
1466 QStringLiteral("%6"),
1467 QStringLiteral("%7"),
1468 QStringLiteral("%8"),
1469 QStringLiteral("%9"),
1470 };
1471 int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1472 maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture);
1473 }
1474 }
1475 }
1476
1477 static const QString num1[]{
1478 QStringLiteral("\\1"),
1479 QStringLiteral("\\2"),
1480 QStringLiteral("\\3"),
1481 QStringLiteral("\\4"),
1482 QStringLiteral("\\5"),
1483 QStringLiteral("\\6"),
1484 QStringLiteral("\\7"),
1485 QStringLiteral("\\8"),
1486 QStringLiteral("\\9"),
1487 };
1488 static const QString num2[]{
1489 QStringLiteral("\\g1"),
1490 QStringLiteral("\\g2"),
1491 QStringLiteral("\\g3"),
1492 QStringLiteral("\\g4"),
1493 QStringLiteral("\\g5"),
1494 QStringLiteral("\\g6"),
1495 QStringLiteral("\\g7"),
1496 QStringLiteral("\\g8"),
1497 QStringLiteral("\\g9"),
1498 };
1499 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num1, rule.string));
1500
1501 const int maxCapture = std::max(maxCaptureUsed, maxBackReference);
1502
1503 if (maxCapture && regexp.captureCount() > maxCapture) {
1504 qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture
1505 << "are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1506 return false;
1507 }
1508
1509 useCapture = maxCapture;
1510 }
1511
1512 if (!useCapture) {
1513 // is DetectIdentifier
1514 static const QRegularExpression isDetectIdentifier(
1515 QStringLiteral(R"(^(\‍((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)"));
1516 if (rule.string.contains(isDetectIdentifier)) {
1517 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string;
1518 return false;
1519 }
1520 }
1521
1522 if (rule.isDotRegex) {
1523 // search next rule with same column or firstNonSpace
1524 int i = &rule - context.rules.data() + 1;
1525 const bool hasColumn = (rule.column != -1);
1526 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1527 const bool isSpecial = (hasColumn || hasFirstNonSpace);
1528 for (; i < context.rules.size(); ++i) {
1529 auto &rule2 = context.rules[i];
1530 if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1531 i = context.rules.size();
1532 break;
1533 }
1534
1535 const bool hasColumn2 = (rule2.column != -1);
1536 const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1537 if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1538 || (hasFirstNonSpace && hasFirstNonSpace2)) {
1539 break;
1540 }
1541 }
1542
1543 auto ruleFilename = (filename == rule.filename) ? QString() : QStringLiteral("in ") + rule.filename;
1544 if (i == context.rules.size()) {
1545 if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1546 && rule.endRegion.isEmpty() && !useCapture) {
1547 qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename
1548 << "should be replaced by fallthroughContext:" << rule.string;
1549 }
1550 } else {
1551 auto &nextRule = context.rules[i];
1552 auto nextRuleFilename = (filename == nextRule.filename) ? QString() : QStringLiteral("in ") + nextRule.filename;
1553 qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename
1554 << "because a dot RegExpr is used line" << rule.line << ruleFilename;
1555 }
1556
1557 // unnecessary quantifier
1558 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)"));
1559 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)"));
1560 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1561 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) {
1562 qWarning() << rule.filename << "line" << rule.line
1563 << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1564 return false;
1565 }
1566 }
1567 }
1568
1569 return true;
1570 }
1571
1572 // Parse and check <emptyLine>
1573 bool parseEmptyLine(const QString &filename, QXmlStreamReader &xml)
1574 {
1575 bool success = true;
1576
1577 QString pattern;
1578 XmlBool casesensitive{};
1579
1580 for (auto &attr : xml.attributes()) {
1581 Parser parser{filename, xml, attr, success};
1582
1583 const bool isExtracted =
1584 parser.extractString(pattern, QStringLiteral("regexpr")) || parser.extractXmlBool(casesensitive, QStringLiteral("casesensitive"));
1585
1586 success = parser.checkIfExtracted(isExtracted);
1587 }
1588
1589 if (pattern.isEmpty()) {
1590 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr";
1591 success = false;
1592 } else {
1593 success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber());
1594 }
1595
1596 return success;
1597 }
1598
1599 //! Check that a regular expression:
1600 //! - isValid()
1601 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1602 bool checkRegularExpression(const QString &filename, const QRegularExpression &regexp, int line) const
1603 {
1604 const auto pattern = regexp.pattern();
1605
1606 // validate regexp
1607 if (!regexp.isValid()) {
1608 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset"
1609 << regexp.patternErrorOffset();
1610 return false;
1611 }
1612
1613 // catch possible case typos: [A-z] or [a-Z]
1614 const int azOffset = std::max(pattern.indexOf(QStringLiteral("A-z")), pattern.indexOf(QStringLiteral("a-Z")));
1615 if (azOffset >= 0) {
1616 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset;
1617 return false;
1618 }
1619
1620 return true;
1621 }
1622
1623 //! Check fallthrough and fallthroughContext.
1624 //! Check kateversion for stopEmptyLineContextSwitchLoop.
1625 bool checkContextAttribute(const Definition &definition, const Context &context) const
1626 {
1627 bool success = true;
1628
1629 if (!context.fallthroughContext.name.isEmpty()) {
1630 const bool mandatoryFallthroughAttribute = definition.kateVersion < Version{5, 62};
1631 if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1632 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1633 << context.name;
1634 success = false;
1635 } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1636 qWarning() << definition.filename << "line" << context.line
1637 << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1638 << context.name;
1639 success = false;
1640 }
1641 }
1642
1643 if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < Version{5, 103}) {
1644 qWarning() << definition.filename << "line" << context.line
1645 << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name;
1646 success = false;
1647 }
1648
1649 return success;
1650 }
1651
1652 //! Search for additionalDeliminator/weakDeliminator which has no effect.
1653 bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const
1654 {
1655 if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1656 return true;
1657 }
1658
1659 bool success = true;
1660
1661 if (definition.kateVersion < Version{5, 79}) {
1662 qWarning() << definition.filename << "line" << rule.line
1663 << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1664 success = false;
1665 }
1666
1667 for (QChar c : rule.additionalDeliminator) {
1668 if (!definition.wordDelimiters.contains(c)) {
1669 return success;
1670 }
1671 }
1672
1673 for (QChar c : rule.weakDeliminator) {
1674 if (definition.wordDelimiters.contains(c)) {
1675 return success;
1676 }
1677 }
1678
1679 qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1680 return false;
1681 }
1682
1683 //! Check that keyword rule reference an existing keyword list.
1684 bool checkKeyword(const Definition &definition, const Context::Rule &rule) const
1685 {
1686 if (rule.type == Context::Rule::Type::keyword) {
1687 auto it = definition.keywordsList.find(rule.string);
1688 if (it == definition.keywordsList.end()) {
1689 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string;
1690 return false;
1691 }
1692 }
1693 return true;
1694 }
1695
1696 //! Search for rules with lookAhead="true" and context="#stay".
1697 //! This would cause an infinite loop.
1698 bool checkLookAhead(const Context::Rule &rule) const
1699 {
1700 if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1701 qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay";
1702 }
1703 return true;
1704 }
1705
1706 //! Check that StringDetect contains a placeHolder when dynamic="1"
1707 bool checkStringDetect(const Context::Rule &rule) const
1708 {
1709 if (rule.type == Context::Rule::Type::StringDetect) {
1710 // dynamic == true and no place holder?
1711 if (rule.dynamic == XmlBool::True) {
1712 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1713 if (!rule.string.contains(placeHolder)) {
1714 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1715 return false;
1716 }
1717 }
1718 }
1719 return true;
1720 }
1721
1722 //! Check <include> and delimiter in a keyword list
1723 bool checkKeywordsList(const Definition &definition) const
1724 {
1725 bool success = true;
1726
1727 bool includeNotSupport = (definition.kateVersion < Version{5, 53});
1728 QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList);
1729 while (keywordsIt.hasNext()) {
1730 keywordsIt.next();
1731
1732 for (const auto &include : keywordsIt.value().items.includes) {
1733 if (includeNotSupport) {
1734 qWarning() << definition.filename << "line" << include.line
1735 << "<include> is only available since version \"5.53\". Please, increase kateversion.";
1736 success = false;
1737 }
1738 success = checkKeywordInclude(definition, include) && success;
1739 }
1740
1741 // Check that keyword list items do not have deliminator character
1742#if 0
1743 for (const auto& keyword : keywordsIt.value().items.keywords) {
1744 for (QChar c : keyword.content) {
1745 if (definition.wordDelimiters.contains(c)) {
1746 qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content;
1747 success = false;
1748 }
1749 }
1750 }
1751#endif
1752 }
1753
1754 return success;
1755 }
1756
1757 //! Search for non-existing keyword include.
1758 bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const
1759 {
1760 bool containsKeywordName = true;
1761 int const idx = include.content.indexOf(QStringLiteral("##"));
1762 if (idx == -1) {
1763 auto it = definition.keywordsList.find(include.content);
1764 containsKeywordName = (it != definition.keywordsList.end());
1765 } else {
1766 auto defName = include.content.sliced(idx + 2);
1767 auto listName = include.content.sliced(0, idx);
1768 auto it = m_definitions.find(defName);
1769 if (it == m_definitions.end()) {
1770 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content;
1771 return false;
1772 }
1773 containsKeywordName = it->keywordsList.contains(listName);
1774 }
1775
1776 if (!containsKeywordName) {
1777 qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content;
1778 }
1779
1780 return containsKeywordName;
1781 }
1782
1783 //! Check if a rule is hidden by another
1784 //! - rule hidden by DetectChar or AnyChar
1785 //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar
1786 //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings
1787 //! - duplicate rule (Int, Float, keyword with same String, etc)
1788 //! - Rule hidden by a dot regex
1789 bool checkUreachableRules(const QString &filename,
1790 const Context &context,
1791 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1792 {
1793 if (context.isOnlyIncluded) {
1794 return true;
1795 }
1796
1797 struct Rule4 {
1798 RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr)
1799 {
1800 auto set = [&](RuleAndInclude &ruleAndInclude) {
1801 auto old = ruleAndInclude;
1802 ruleAndInclude = {&rule, includeRules};
1803 return old;
1804 };
1805
1806 if (rule.firstNonSpace == XmlBool::True) {
1807 return set(firstNonSpace);
1808 } else if (rule.column == 0) {
1809 return set(column0);
1810 } else if (rule.column > 0) {
1811 return set(columnGreaterThan0[rule.column]);
1812 } else {
1813 return set(normal);
1814 }
1815 }
1816
1817 private:
1818 RuleAndInclude normal;
1819 RuleAndInclude column0;
1820 QMap<int, RuleAndInclude> columnGreaterThan0;
1821 RuleAndInclude firstNonSpace;
1822 };
1823
1824 // Associate QChar with RuleAndInclude
1825 struct CharTable {
1826 /// Search RuleAndInclude associated with @p c.
1827 RuleAndInclude find(QChar c) const
1828 {
1829 if (c.unicode() < 128) {
1830 return m_asciiMap[c.unicode()];
1831 }
1832 auto it = m_utf8Map.find(c);
1833 return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value();
1834 }
1835
1836 /// Search RuleAndInclude associated with the characters of @p s.
1837 /// \return an empty QList when at least one character is not found.
1839 {
1840 QList<RuleAndInclude> result;
1841
1842 for (QChar c : s) {
1843 if (!find(c)) {
1844 return result;
1845 }
1846 }
1847
1848 for (QChar c : s) {
1849 result.append(find(c));
1850 }
1851
1852 return result;
1853 }
1854
1855 /// Associates @p c with a rule.
1856 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1857 {
1858 if (c.unicode() < 128) {
1859 m_asciiMap[c.unicode()] = {&rule, includeRule};
1860 } else {
1861 m_utf8Map[c] = {&rule, includeRule};
1862 }
1863 }
1864
1865 /// Associates each character of @p s with a rule.
1866 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1867 {
1868 for (QChar c : s) {
1869 append(c, rule, includeRule);
1870 }
1871 }
1872
1873 private:
1874 RuleAndInclude m_asciiMap[127]{};
1876 };
1877
1878 struct Char4Tables {
1879 CharTable chars;
1880 CharTable charsColumn0;
1881 QMap<int, CharTable> charsColumnGreaterThan0;
1882 CharTable charsFirstNonSpace;
1883 };
1884
1885 // View on Char4Tables members
1886 struct CharTableArray {
1887 // Append Char4Tables members that satisfies firstNonSpace and column.
1888 // Char4Tables::char is always added.
1889 CharTableArray(Char4Tables &tables, const Context::Rule &rule)
1890 {
1891 if (rule.firstNonSpace == XmlBool::True) {
1892 appendTable(tables.charsFirstNonSpace);
1893 }
1894
1895 if (rule.column == 0) {
1896 appendTable(tables.charsColumn0);
1897 } else if (rule.column > 0) {
1898 appendTable(tables.charsColumnGreaterThan0[rule.column]);
1899 }
1900
1901 appendTable(tables.chars);
1902 }
1903
1904 // Removes Char4Tables::chars when the rule contains firstNonSpace or column
1905 void removeNonSpecialWhenSpecial()
1906 {
1907 if (m_size > 1) {
1908 --m_size;
1909 }
1910 }
1911
1912 /// Search RuleAndInclude associated with @p c.
1913 RuleAndInclude find(QChar c) const
1914 {
1915 for (int i = 0; i < m_size; ++i) {
1916 if (auto ruleAndInclude = m_charTables[i]->find(c)) {
1917 return ruleAndInclude;
1918 }
1919 }
1920 return RuleAndInclude{nullptr, nullptr};
1921 }
1922
1923 /// Search RuleAndInclude associated with the characters of @p s.
1924 /// \return an empty QList when at least one character is not found.
1926 {
1927 for (int i = 0; i < m_size; ++i) {
1928 auto result = m_charTables[i]->find(s);
1929 if (result.size()) {
1930 while (++i < m_size) {
1931 result.append(m_charTables[i]->find(s));
1932 }
1933 return result;
1934 }
1935 }
1936 return QList<RuleAndInclude>();
1937 }
1938
1939 /// Associates @p c with a rule.
1940 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1941 {
1942 for (int i = 0; i < m_size; ++i) {
1943 m_charTables[i]->append(c, rule, includeRule);
1944 }
1945 }
1946
1947 /// Associates each character of @p s with a rule.
1948 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1949 {
1950 for (int i = 0; i < m_size; ++i) {
1951 m_charTables[i]->append(s, rule, includeRule);
1952 }
1953 }
1954
1955 private:
1956 void appendTable(CharTable &t)
1957 {
1958 m_charTables[m_size] = &t;
1959 ++m_size;
1960 }
1961
1962 CharTable *m_charTables[3];
1963 int m_size = 0;
1964 };
1965
1966 struct ObservableRule {
1967 const Context::Rule *rule;
1968 const Context::Rule *includeRules;
1969
1970 bool hasResolvedIncludeRules() const
1971 {
1972 return rule == includeRules;
1973 }
1974 };
1975
1976 // Iterates over all the rules, including those in includedRules
1977 struct RuleIterator {
1978 RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule)
1979 : m_end(&endRule - rules.data())
1980 , m_rules(rules)
1981 {
1982 }
1983
1984 /// \return next rule or nullptr
1985 const Context::Rule *next()
1986 {
1987 // if in includedRules
1988 if (m_includedRules) {
1989 ++m_i2;
1990 if (m_i2 != m_includedRules->size()) {
1991 return (*m_includedRules)[m_i2];
1992 }
1993 ++m_i;
1994 m_includedRules = nullptr;
1995 }
1996
1997 // if is a includedRules
1998 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
1999 if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
2000 m_i2 = 0;
2001 m_includedRules = &m_rules[m_i].rule->includedRules;
2002 return (*m_includedRules)[m_i2];
2003 }
2004 ++m_i;
2005 }
2006
2007 if (m_i < m_end) {
2008 ++m_i;
2009 return m_rules[m_i - 1].rule;
2010 }
2011
2012 return nullptr;
2013 }
2014
2015 /// \return current IncludeRules or nullptr
2016 const Context::Rule *currentIncludeRules() const
2017 {
2018 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
2019 }
2020
2021 private:
2022 int m_i = 0;
2023 int m_i2 = 0;
2024 const int m_end;
2025 const QList<ObservableRule> &m_rules;
2026 const QList<const Context::Rule *> *m_includedRules = nullptr;
2027 };
2028
2029 // Dot regex container that satisfies firstNonSpace and column.
2030 struct DotRegex {
2031 /// Append a dot regex rule.
2032 void append(const Context::Rule &rule, const Context::Rule *includedRule)
2033 {
2034 auto array = extractDotRegexes(rule);
2035 if (array[0]) {
2036 *array[0] = {&rule, includedRule};
2037 }
2038 if (array[1]) {
2039 *array[1] = {&rule, includedRule};
2040 }
2041 }
2042
2043 /// Search dot regex which hides @p rule
2044 RuleAndInclude find(const Context::Rule &rule)
2045 {
2046 auto array = extractDotRegexes(rule);
2047 if (array[0]) {
2048 return *array[0];
2049 }
2050 if (array[1]) {
2051 return *array[1];
2052 }
2053 return RuleAndInclude{};
2054 }
2055
2056 private:
2057 using Array = std::array<RuleAndInclude *, 2>;
2058
2059 Array extractDotRegexes(const Context::Rule &rule)
2060 {
2061 Array ret{};
2062
2063 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
2064 ret[0] = &dotRegex;
2065 } else {
2066 if (rule.firstNonSpace == XmlBool::True) {
2067 ret[0] = &dotRegexFirstNonSpace;
2068 }
2069
2070 if (rule.column == 0) {
2071 ret[1] = &dotRegexColumn0;
2072 } else if (rule.column > 0) {
2073 ret[1] = &dotRegexColumnGreaterThan0[rule.column];
2074 }
2075 }
2076
2077 return ret;
2078 }
2079
2080 RuleAndInclude dotRegex{};
2081 RuleAndInclude dotRegexColumn0{};
2082 QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
2083 RuleAndInclude dotRegexFirstNonSpace{};
2084 };
2085
2086 bool success = true;
2087
2088 // characters of DetectChar/AnyChar
2089 Char4Tables detectChars;
2090 // characters of dynamic DetectChar
2091 Char4Tables dynamicDetectChars;
2092 // characters of LineContinue
2093 Char4Tables lineContinueChars;
2094
2095 Rule4 intRule{};
2096 Rule4 floatRule{};
2097 Rule4 hlCCharRule{};
2098 Rule4 hlCOctRule{};
2099 Rule4 hlCHexRule{};
2100 Rule4 hlCStringCharRule{};
2101 Rule4 detectIdentifierRule{};
2102
2103 // Contains includedRules and included includedRules
2105
2106 DotRegex dotRegex;
2107
2108 QList<ObservableRule> observedRules;
2109 observedRules.reserve(context.rules.size());
2110 for (const Context::Rule &rule : context.rules) {
2111 const Context::Rule *includeRule = nullptr;
2112 if (rule.type == Context::Rule::Type::IncludeRules) {
2113 auto *context = rule.context.context;
2114 if (context && context->isOnlyIncluded) {
2115 includeRule = &rule;
2116 }
2117 }
2118
2119 observedRules.push_back({&rule, includeRule});
2120 if (includeRule) {
2121 for (const Context::Rule *rule2 : rule.includedRules) {
2122 observedRules.push_back({rule2, includeRule});
2123 }
2124 }
2125 }
2126
2127 for (auto &observedRule : observedRules) {
2128 const Context::Rule &rule = *observedRule.rule;
2129 bool isUnreachable = false;
2130 QList<RuleAndInclude> unreachableBy;
2131
2132 // declare rule as unreachable if ruleAndInclude is not empty
2133 auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
2134 if (ruleAndInclude) {
2135 isUnreachable = true;
2136 unreachableBy.append(ruleAndInclude);
2137 }
2138 };
2139
2140 // declare rule as unreachable if ruleAndIncludes is not empty
2141 auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) {
2142 if (!ruleAndIncludes.isEmpty()) {
2143 isUnreachable = true;
2144 unreachableBy.append(ruleAndIncludes);
2145 }
2146 };
2147
2148 // check if rule2.firstNonSpace/column is compatible with those of rule
2149 auto isCompatible = [&rule](Context::Rule const &rule2) {
2150 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
2151 || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
2152 };
2153
2154 updateUnreachable1(dotRegex.find(rule));
2155
2156 switch (rule.type) {
2157 // checks if hidden by DetectChar/AnyChar
2158 // then add the characters to detectChars
2159 case Context::Rule::Type::AnyChar: {
2160 auto tables = CharTableArray(detectChars, rule);
2161 updateUnreachable2(tables.find(rule.string));
2162 tables.removeNonSpecialWhenSpecial();
2163 tables.append(rule.string, rule);
2164 break;
2165 }
2166
2167 // check if is hidden by DetectChar/AnyChar
2168 // then add the characters to detectChars or dynamicDetectChars
2169 case Context::Rule::Type::DetectChar: {
2170 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2171 auto tables = CharTableArray(chars4, rule);
2172 updateUnreachable1(tables.find(rule.char0));
2173 tables.removeNonSpecialWhenSpecial();
2174 tables.append(rule.char0, rule);
2175 break;
2176 }
2177
2178 // check if hidden by DetectChar/AnyChar
2179 // then add spaces characters to detectChars
2180 case Context::Rule::Type::DetectSpaces: {
2181 auto tables = CharTableArray(detectChars, rule);
2182 updateUnreachable2(tables.find(QStringLiteral(" \t")));
2183 tables.removeNonSpecialWhenSpecial();
2184 tables.append(QLatin1Char(' '), rule);
2185 tables.append(QLatin1Char('\t'), rule);
2186 break;
2187 }
2188
2189 // check if hidden by DetectChar/AnyChar
2190 case Context::Rule::Type::HlCChar:
2191 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\'')));
2192 updateUnreachable1(hlCCharRule.setRule(rule));
2193 break;
2194
2195 // check if hidden by DetectChar/AnyChar
2196 case Context::Rule::Type::HlCHex:
2197 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2198 updateUnreachable1(hlCHexRule.setRule(rule));
2199 break;
2200
2201 // check if hidden by DetectChar/AnyChar
2202 case Context::Rule::Type::HlCOct:
2203 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2204 updateUnreachable1(hlCOctRule.setRule(rule));
2205 break;
2206
2207 // check if hidden by DetectChar/AnyChar
2208 case Context::Rule::Type::HlCStringChar:
2209 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\\')));
2210 updateUnreachable1(hlCStringCharRule.setRule(rule));
2211 break;
2212
2213 // check if hidden by DetectChar/AnyChar
2214 case Context::Rule::Type::Int:
2215 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789")));
2216 updateUnreachable1(intRule.setRule(rule));
2217 break;
2218
2219 // check if hidden by DetectChar/AnyChar
2220 case Context::Rule::Type::Float:
2221 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789.")));
2222 updateUnreachable1(floatRule.setRule(rule));
2223 // check that Float is before Int
2224 updateUnreachable1(Rule4(intRule).setRule(rule));
2225 break;
2226
2227 // check if hidden by another DetectIdentifier rule
2228 case Context::Rule::Type::DetectIdentifier:
2229 updateUnreachable1(detectIdentifierRule.setRule(rule));
2230 break;
2231
2232 // check if hidden by DetectChar/AnyChar or another LineContinue
2233 case Context::Rule::Type::LineContinue: {
2234 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2235
2236 auto tables = CharTableArray(lineContinueChars, rule);
2237 updateUnreachable1(tables.find(rule.char0));
2238 tables.removeNonSpecialWhenSpecial();
2239 tables.append(rule.char0, rule);
2240 break;
2241 }
2242
2243 // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect
2244 case Context::Rule::Type::Detect2Chars:
2245 case Context::Rule::Type::RangeDetect:
2246 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2247 if (!isUnreachable) {
2248 RuleIterator ruleIterator(observedRules, observedRule);
2249 while (const auto *rulePtr = ruleIterator.next()) {
2250 if (isUnreachable) {
2251 break;
2252 }
2253 const auto &rule2 = *rulePtr;
2254 if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2255 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2256 }
2257 }
2258 }
2259 break;
2260
2261 case Context::Rule::Type::RegExpr: {
2262 if (rule.isDotRegex) {
2263 dotRegex.append(rule, nullptr);
2264 break;
2265 }
2266
2267 // check that `rule` does not have another RegExpr as a prefix
2268 RuleIterator ruleIterator(observedRules, observedRule);
2269 while (const auto *rulePtr = ruleIterator.next()) {
2270 if (isUnreachable) {
2271 break;
2272 }
2273 const auto &rule2 = *rulePtr;
2274 if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2275 && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) {
2276 bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2277 if (!add) {
2278 // \s.* (sanitized = \s) is considered hiding \s*\S
2279 // we check the quantifiers to see if this is the case
2280 auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2281 auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2282 auto c3 = rule2.sanitizedString.back().unicode();
2283 if (c3 == '*' || c3 == '?' || c3 == '+') {
2284 add = true;
2285 } else if (c1 == '*' || c1 == '?') {
2286 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2287 } else {
2288 add = true;
2289 }
2290 }
2291 if (add) {
2292 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2293 }
2294 }
2295 }
2296
2297 Q_FALLTHROUGH();
2298 }
2299 // check if a rule does not have another rule as a prefix
2300 case Context::Rule::Type::WordDetect:
2301 case Context::Rule::Type::StringDetect: {
2302 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix
2303 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2304 RuleIterator ruleIterator(observedRules, observedRule);
2305 while (const auto *rulePtr = ruleIterator.next()) {
2306 if (isUnreachable) {
2307 break;
2308 }
2309
2310 const auto &rule2 = *rulePtr;
2311 if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2312 continue;
2313 }
2314
2315 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2316 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2317 if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) {
2318 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2319 }
2320 }
2321 }
2322
2323 // string used for comparison and truncated from "dynamic" part
2324 QStringView s = rule.string;
2325
2326 // truncate to '%' with dynamic rules
2327 if (rule.dynamic == XmlBool::True) {
2328 static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)"));
2329 auto result = dynamicPosition.match(rule.string);
2330 s = s.sliced(0, result.capturedLength());
2331 }
2332
2333 QString sanitizedRegex;
2334 // truncate to special character with RegExpr.
2335 // If regexp contains '|', `s` becomes empty.
2336 if (rule.type == Context::Rule::Type::RegExpr) {
2337 static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2338 static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\‍([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2339 const qsizetype result = regularChars.match(rule.string).capturedLength();
2340 const qsizetype pos = qMin(result, s.size());
2341 if (rule.string.indexOf(QLatin1Char('|'), pos) < pos) {
2342 sanitizedRegex = rule.string.sliced(0, qMin(result, s.size()));
2343 sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1"));
2344 s = sanitizedRegex;
2345 } else {
2346 s = QStringView();
2347 }
2348 }
2349
2350 // check if hidden by DetectChar/AnyChar
2351 if (s.size() > 0) {
2352 auto t = CharTableArray(detectChars, rule);
2353 if (rule.insensitive != XmlBool::True) {
2354 updateUnreachable1(t.find(s[0]));
2355 } else {
2356 QChar c2[]{s[0].toLower(), s[0].toUpper()};
2357 updateUnreachable2(t.find(QStringView(c2, 2)));
2358 }
2359 }
2360
2361 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s
2362 if (s.size() > 0 && !isUnreachable) {
2363 // combination of uppercase and lowercase
2364 RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2365
2366 RuleIterator ruleIterator(observedRules, observedRule);
2367 while (const auto *rulePtr = ruleIterator.next()) {
2368 if (isUnreachable) {
2369 break;
2370 }
2371 const auto &rule2 = *rulePtr;
2372 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2373 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2374
2375 switch (rule2.type) {
2376 // check that it is not a detectChars prefix
2377 case Context::Rule::Type::Detect2Chars:
2378 if (isCompatible(rule2) && s.size() >= 2) {
2379 if (rule.insensitive != XmlBool::True) {
2380 if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2381 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2382 }
2383 } else {
2384 // when the string is case insensitive,
2385 // all 4 upper/lower case combinations must be found
2386 auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) {
2387 if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2388 x = {&rule2, ruleIterator.currentIncludeRules()};
2389 }
2390 };
2391 set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2392 set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2393 set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2394 set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2395
2396 if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2397 && detect2CharsInsensitives[3]) {
2398 isUnreachable = true;
2399 unreachableBy.append(detect2CharsInsensitives[0]);
2400 unreachableBy.append(detect2CharsInsensitives[1]);
2401 unreachableBy.append(detect2CharsInsensitives[2]);
2402 unreachableBy.append(detect2CharsInsensitives[3]);
2403 }
2404 }
2405 }
2406 break;
2407
2408 // check that it is not a StringDetect prefix
2409 case Context::Rule::Type::StringDetect:
2410 if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2411 && s.startsWith(rule2.string, caseSensitivity)) {
2412 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2413 }
2414 break;
2415
2416 // check if a WordDetect is hidden by another WordDetect
2417 case Context::Rule::Type::WordDetect:
2418 if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2419 && 0 == rule.string.compare(rule2.string, caseSensitivity)) {
2420 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2421 }
2422 break;
2423
2424 default:;
2425 }
2426 }
2427 }
2428
2429 break;
2430 }
2431
2432 // check if hidden by another keyword rule
2433 case Context::Rule::Type::keyword: {
2434 RuleIterator ruleIterator(observedRules, observedRule);
2435 while (const auto *rulePtr = ruleIterator.next()) {
2436 if (isUnreachable) {
2437 break;
2438 }
2439 const auto &rule2 = *rulePtr;
2440 if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2441 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2442 }
2443 }
2444 // TODO check that all keywords are hidden by another rules
2445 break;
2446 }
2447
2448 // add characters in those used but without checking if they are already.
2449 // <DetectChar char="}" />
2450 // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked
2451 // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added
2452 // <DetectChar char="{" /> <- hidden by previous rule
2453 case Context::Rule::Type::IncludeRules:
2454 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2455 break;
2456 }
2457
2458 if (auto &ruleAndInclude = includeContexts[rule.context.context]) {
2459 updateUnreachable1(ruleAndInclude);
2460 } else {
2461 ruleAndInclude.rule = &rule;
2462 }
2463
2464 for (const auto *rulePtr : rule.includedIncludeRules) {
2465 includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule});
2466 }
2467
2468 if (observedRule.includeRules) {
2469 break;
2470 }
2471
2472 for (const auto *rulePtr : rule.includedRules) {
2473 const auto &rule2 = *rulePtr;
2474 switch (rule2.type) {
2475 case Context::Rule::Type::AnyChar: {
2476 auto tables = CharTableArray(detectChars, rule2);
2477 tables.removeNonSpecialWhenSpecial();
2478 tables.append(rule2.string, rule2, &rule);
2479 break;
2480 }
2481
2482 case Context::Rule::Type::DetectChar: {
2483 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2484 auto tables = CharTableArray(chars4, rule2);
2485 tables.removeNonSpecialWhenSpecial();
2486 tables.append(rule2.char0, rule2, &rule);
2487 break;
2488 }
2489
2490 case Context::Rule::Type::DetectSpaces: {
2491 auto tables = CharTableArray(detectChars, rule2);
2492 tables.removeNonSpecialWhenSpecial();
2493 tables.append(QLatin1Char(' '), rule2, &rule);
2494 tables.append(QLatin1Char('\t'), rule2, &rule);
2495 break;
2496 }
2497
2498 case Context::Rule::Type::HlCChar:
2499 hlCCharRule.setRule(rule2, &rule);
2500 break;
2501
2502 case Context::Rule::Type::HlCHex:
2503 hlCHexRule.setRule(rule2, &rule);
2504 break;
2505
2506 case Context::Rule::Type::HlCOct:
2507 hlCOctRule.setRule(rule2, &rule);
2508 break;
2509
2510 case Context::Rule::Type::HlCStringChar:
2511 hlCStringCharRule.setRule(rule2, &rule);
2512 break;
2513
2514 case Context::Rule::Type::Int:
2515 intRule.setRule(rule2, &rule);
2516 break;
2517
2518 case Context::Rule::Type::Float:
2519 floatRule.setRule(rule2, &rule);
2520 break;
2521
2522 case Context::Rule::Type::LineContinue: {
2523 auto tables = CharTableArray(lineContinueChars, rule2);
2524 tables.removeNonSpecialWhenSpecial();
2525 tables.append(rule2.char0, rule2, &rule);
2526 break;
2527 }
2528
2529 case Context::Rule::Type::RegExpr:
2530 if (rule2.isDotRegex) {
2531 dotRegex.append(rule2, &rule);
2532 }
2533 break;
2534
2535 case Context::Rule::Type::WordDetect:
2536 case Context::Rule::Type::StringDetect:
2537 case Context::Rule::Type::Detect2Chars:
2538 case Context::Rule::Type::IncludeRules:
2539 case Context::Rule::Type::DetectIdentifier:
2540 case Context::Rule::Type::keyword:
2541 case Context::Rule::Type::Unknown:
2542 case Context::Rule::Type::RangeDetect:
2543 break;
2544 }
2545 }
2546 break;
2547
2548 case Context::Rule::Type::Unknown:
2549 break;
2550 }
2551
2552 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2553 auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2554 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2555 unreachableIncludedRule.unreachableBy.append(unreachableBy);
2556 } else {
2557 unreachableIncludedRule.alwaysUnreachable = false;
2558 }
2559 } else if (isUnreachable) {
2560 success = false;
2561 QString message;
2562 message.reserve(128);
2563 for (auto &ruleAndInclude : unreachableBy) {
2564 message += QStringLiteral("line ");
2565 if (ruleAndInclude.includeRules) {
2566 message += QString::number(ruleAndInclude.includeRules->line);
2567 message += QStringLiteral(" [by '");
2568 message += ruleAndInclude.includeRules->context.name;
2569 message += QStringLiteral("' line ");
2570 message += QString::number(ruleAndInclude.rule->line);
2571 if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2572 message += QStringLiteral(" (");
2573 message += ruleAndInclude.rule->filename;
2574 message += QLatin1Char(')');
2575 }
2576 message += QLatin1Char(']');
2577 } else {
2578 message += QString::number(ruleAndInclude.rule->line);
2579 }
2580 message += QStringLiteral(", ");
2581 }
2582 message.chop(2);
2583 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message;
2584 }
2585 }
2586
2587 return success;
2588 }
2589
2590 //! Proposes to merge certain rule sequences
2591 //! - several DetectChar/AnyChar into AnyChar
2592 //! - several RegExpr into one RegExpr
2593 bool suggestRuleMerger(const QString &filename, const Context &context) const
2594 {
2595 bool success = true;
2596
2597 if (context.rules.isEmpty()) {
2598 return success;
2599 }
2600
2601 auto it = context.rules.begin();
2602 const auto end = context.rules.end() - 1;
2603
2604 for (; it < end; ++it) {
2605 auto &rule1 = *it;
2606 auto &rule2 = it[1];
2607
2608 auto isCommonCompatible = [&] {
2609 if (rule1.lookAhead != rule2.lookAhead) {
2610 return false;
2611 }
2612 // ignore attribute when lookAhead is true
2613 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2614 return false;
2615 }
2616 // clang-format off
2617 return rule1.beginRegion == rule2.beginRegion
2618 && rule1.endRegion == rule2.endRegion
2619 && rule1.firstNonSpace == rule2.firstNonSpace
2620 && rule1.context.context == rule2.context.context
2621 && rule1.context.popCount == rule2.context.popCount;
2622 // clang-format on
2623 };
2624
2625 switch (rule1.type) {
2626 // request to merge AnyChar/DetectChar
2627 case Context::Rule::Type::AnyChar:
2628 case Context::Rule::Type::DetectChar:
2629 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar) && isCommonCompatible()
2630 && rule1.column == rule2.column) {
2631 qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule";
2632 success = false;
2633 }
2634 break;
2635
2636 // request to merge multiple RegExpr
2637 case Context::Rule::Type::RegExpr:
2638 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2639 && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2640 qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule";
2641 success = false;
2642 }
2643 break;
2644
2645 case Context::Rule::Type::DetectSpaces:
2646 case Context::Rule::Type::HlCChar:
2647 case Context::Rule::Type::HlCHex:
2648 case Context::Rule::Type::HlCOct:
2649 case Context::Rule::Type::HlCStringChar:
2650 case Context::Rule::Type::Int:
2651 case Context::Rule::Type::Float:
2652 case Context::Rule::Type::LineContinue:
2653 case Context::Rule::Type::WordDetect:
2654 case Context::Rule::Type::StringDetect:
2655 case Context::Rule::Type::Detect2Chars:
2656 case Context::Rule::Type::IncludeRules:
2657 case Context::Rule::Type::DetectIdentifier:
2658 case Context::Rule::Type::keyword:
2659 case Context::Rule::Type::Unknown:
2660 case Context::Rule::Type::RangeDetect:
2661 break;
2662 }
2663 }
2664
2665 return success;
2666 }
2667
2668 //! Initialize the referenced context (ContextName::context)
2669 //! Some input / output examples are:
2670 //! - "#stay" -> ""
2671 //! - "#pop" -> ""
2672 //! - "Comment" -> "Comment"
2673 //! - "#pop!Comment" -> "Comment"
2674 //! - "##ISO C++" -> ""
2675 //! - "Comment##ISO C++"-> "Comment" in ISO C++
2676 void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line)
2677 {
2678 QStringView name = contextName.name;
2679 if (name.isEmpty()) {
2680 contextName.stay = true;
2681 } else if (name.startsWith(QStringLiteral("#stay"))) {
2682 name = name.sliced(5);
2683 contextName.stay = true;
2684 contextName.context = &context;
2685 if (!name.isEmpty()) {
2686 qWarning() << definition.filename << "line" << line << "invalid context in" << context.name;
2687 m_success = false;
2688 }
2689 } else {
2690 while (name.startsWith(QStringLiteral("#pop"))) {
2691 name = name.sliced(4);
2692 ++contextName.popCount;
2693 }
2694
2695 if (contextName.popCount && !name.isEmpty()) {
2696 if (name.startsWith(QLatin1Char('!')) && name.size() > 1) {
2697 name = name.sliced(1);
2698 } else {
2699 qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name;
2700 m_success = false;
2701 }
2702 }
2703
2704 if (!name.isEmpty()) {
2705 const int idx = name.indexOf(QStringLiteral("##"));
2706 if (idx == -1) {
2707 auto it = definition.contexts.find(name.toString());
2708 if (it != definition.contexts.end()) {
2709 contextName.context = &*it;
2710 }
2711 } else {
2712 auto defName = name.sliced(idx + 2);
2713 auto it = m_definitions.find(defName.toString());
2714 if (it != m_definitions.end()) {
2715 auto listName = name.sliced(0, idx).toString();
2716 definition.referencedDefinitions.insert(&*it);
2717 auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName);
2718 if (ctxIt != it->contexts.end()) {
2719 contextName.context = &*ctxIt;
2720 }
2721 } else {
2722 qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name;
2723 m_success = false;
2724 }
2725 }
2726
2727 if (!contextName.context) {
2728 qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name;
2729 m_success = false;
2730 }
2731 }
2732 }
2733 }
2734
2735 QMap<QString, Definition> m_definitions;
2737 Definition *m_currentDefinition = nullptr;
2738 Keywords *m_currentKeywords = nullptr;
2739 Context *m_currentContext = nullptr;
2740 bool m_success = true;
2741};
2742
2743namespace
2744{
2745QStringList readListing(const QString &fileName)
2746{
2747 QFile file(fileName);
2748 if (!file.open(QIODevice::ReadOnly)) {
2749 return QStringList();
2750 }
2751
2752 QXmlStreamReader xml(&file);
2753 QStringList listing;
2754 while (!xml.atEnd()) {
2755 xml.readNext();
2756
2757 // add only .xml files, no .json or stuff
2758 if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) {
2759 listing.append(xml.text().toString());
2760 }
2761 }
2762
2763 if (xml.hasError()) {
2764 qWarning() << "XML error while reading" << fileName << " - " << qPrintable(xml.errorString()) << "@ offset" << xml.characterOffset();
2765 listing.clear();
2766 }
2767
2768 return listing;
2769}
2770
2771/**
2772 * check if the "extensions" attribute have valid wildcards
2773 * @param extensions extensions string to check
2774 * @return valid?
2775 */
2776bool checkExtensions(QStringView extensions)
2777{
2778 // get list of extensions
2779 const QList<QStringView> extensionParts = extensions.split(QLatin1Char(';'), Qt::SkipEmptyParts);
2780
2781 // ok if empty
2782 if (extensionParts.isEmpty()) {
2783 return true;
2784 }
2785
2786 // check that only valid wildcard things are inside the parts
2787 for (const auto &extension : extensionParts) {
2788 for (const auto c : extension) {
2789 // eat normal things
2790 if (c.isDigit() || c.isLetter()) {
2791 continue;
2792 }
2793
2794 // allow some special characters
2795 if (c == QLatin1Char('.') || c == QLatin1Char('-') || c == QLatin1Char('_') || c == QLatin1Char('+')) {
2796 continue;
2797 }
2798
2799 // only allowed wildcard things: '?' and '*'
2800 if (c == QLatin1Char('?') || c == QLatin1Char('*')) {
2801 continue;
2802 }
2803
2804 qWarning() << "invalid character" << c << "seen in extensions wildcard";
2805 return false;
2806 }
2807 }
2808
2809 // all checks passed
2810 return true;
2811}
2812
2813}
2814
2815int main(int argc, char *argv[])
2816{
2817 // get app instance
2818 QCoreApplication app(argc, argv);
2819
2820 // ensure enough arguments are passed
2821 if (app.arguments().size() < 3) {
2822 return 1;
2823 }
2824
2825#ifdef HAS_XERCESC
2826 // care for proper init and cleanup
2827 XMLPlatformUtils::Initialize();
2828 auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate);
2829
2830 /*
2831 * parse XSD first time and cache it
2832 */
2833 XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager);
2834
2835 // create parser for the XSD
2836 SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd);
2837 init_parser(parser);
2838 QString messages;
2839 CustomErrorHandler eh(&messages);
2840 parser.setErrorHandler(&eh);
2841
2842 // load grammar into the pool, on error just abort
2843 const auto xsdFile = app.arguments().at(2);
2844 if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || eh.failed()) {
2845 qWarning("Failed to parse XSD %s: %s", qPrintable(xsdFile), qPrintable(messages));
2846 return 2;
2847 }
2848
2849 // lock the pool, no later modifications wanted!
2850 xsd.lockPool();
2851#endif
2852
2853 const QString hlFilenamesListing = app.arguments().value(3);
2854 if (hlFilenamesListing.isEmpty()) {
2855 return 1;
2856 }
2857
2858 QStringList hlFilenames = readListing(hlFilenamesListing);
2859 if (hlFilenames.isEmpty()) {
2860 qWarning("Failed to read %s", qPrintable(hlFilenamesListing));
2861 return 3;
2862 }
2863
2864 // text attributes
2865 const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("alternativeNames") << QStringLiteral("section")
2866 << QStringLiteral("mimetype") << QStringLiteral("extensions") << QStringLiteral("style")
2867 << QStringLiteral("author") << QStringLiteral("license") << QStringLiteral("indenter");
2868
2869 // index all given highlightings
2870 HlFilesChecker filesChecker;
2871 QVariantMap hls;
2872 int anyError = 0;
2873 for (const QString &hlFilename : std::as_const(hlFilenames)) {
2874 QFile hlFile(hlFilename);
2875 if (!hlFile.open(QIODevice::ReadOnly)) {
2876 qWarning("Failed to open %s", qPrintable(hlFilename));
2877 anyError = 3;
2878 continue;
2879 }
2880
2881#ifdef HAS_XERCESC
2882 // create parser
2883 SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd);
2884 init_parser(parser);
2885 QString messages;
2886 CustomErrorHandler eh(&messages);
2887 parser.setErrorHandler(&eh);
2888
2889 // parse the XML file
2890 parser.parse((const char16_t *)hlFile.fileName().utf16());
2891
2892 // report issues
2893 if (eh.failed()) {
2894 qWarning("Failed to validate XML %s: %s", qPrintable(hlFile.fileName()), qPrintable(messages));
2895 anyError = 4;
2896 continue;
2897 }
2898#endif
2899
2900 // read the needed attributes from toplevel language tag
2901 hlFile.reset();
2902 QXmlStreamReader xml(&hlFile);
2903 if (xml.readNextStartElement()) {
2904 if (xml.name() != QLatin1String("language")) {
2905 anyError = 5;
2906 continue;
2907 }
2908 } else {
2909 anyError = 6;
2910 continue;
2911 }
2912
2913 // map to store hl info
2914 QVariantMap hl;
2915
2916 // transfer text attributes
2917 for (const QString &attribute : std::as_const(textAttributes)) {
2918 hl[attribute] = xml.attributes().value(attribute).toString();
2919 }
2920
2921 // check if extensions have the right format
2922 if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) {
2923 qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString();
2924 anyError = 23;
2925 }
2926
2927 // numerical attributes
2928 hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt();
2929 hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt();
2930
2931 // add boolean one
2932 hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden")));
2933
2934 // keep some strings as UTF-8 for faster translations
2935 hl[QStringLiteral("nameUtf8")] = hl[QStringLiteral("name")].toString().toUtf8();
2936 hl[QStringLiteral("sectionUtf8")] = hl[QStringLiteral("section")].toString().toUtf8();
2937
2938 // remember hl
2939 hls[QFileInfo(hlFile).fileName()] = hl;
2940
2941 const QString hlName = hl[QStringLiteral("name")].toString();
2942 const QString hlAlternativeNames = hl[QStringLiteral("alternativeNames")].toString();
2943
2944 filesChecker.setDefinition(xml.attributes().value(QStringLiteral("kateversion")),
2945 hlFilename,
2946 hlName,
2947 hlAlternativeNames.split(u';', Qt::SkipEmptyParts));
2948
2949 // scan for broken regex or keywords with spaces
2950 while (!xml.atEnd()) {
2951 xml.readNext();
2952 filesChecker.processElement(xml);
2953 }
2954
2955 if (xml.hasError()) {
2956 anyError = 33;
2957 qWarning() << hlFilename << "-" << xml.errorString() << "@ offset" << xml.characterOffset();
2958 }
2959 }
2960
2961 filesChecker.resolveContexts();
2962
2963 if (!filesChecker.check()) {
2964 anyError = 7;
2965 }
2966
2967 // bail out if any problem was seen
2968 if (anyError) {
2969 return anyError;
2970 }
2971
2972 // create outfile, after all has worked!
2973 QFile outFile(app.arguments().at(1));
2974 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
2975 return 9;
2976 }
2977
2978 // write out json
2979 outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor());
2980
2981 // be done
2982 return 0;
2983}
Type type(const QSqlDatabase &db)
char * toString(const EngineQuery &query)
KDB_EXPORT KDbVersionInfo version()
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
KIOCORE_EXPORT void add(const QString &fileClass, const QString &directory)
QAction * replace(const QObject *recvr, const char *slot, QObject *parent)
QString name(StandardAction id)
QAction * next(const QObject *recvr, const char *slot, QObject *parent)
QAction * find(const QObject *recvr, const char *slot, QObject *parent)
const QList< QKeySequence > & end()
KTEXTEDITOR_EXPORT size_t qHash(KTextEditor::Cursor cursor, size_t seed=0) noexcept
QCborValue fromVariant(const QVariant &variant)
bool isDigit(char32_t ucs4)
bool isLetter(char32_t ucs4)
char16_t & unicode()
QString fileName() const const
void append(QList< T > &&value)
void clear()
bool isEmpty() const const
void push_back(parameter_type value)
void reserve(qsizetype size)
qsizetype size() const const
iterator end()
iterator find(const Key &key)
iterator insert(const Key &key, const T &value)
QString errorString() const const
bool isValid() const const
QString pattern() const const
qsizetype patternErrorOffset() const const
void clear()
bool contains(const QSet< T > &other) const const
iterator erase(const_iterator pos)
iterator insert(const T &value)
qsizetype size() const const
void chop(qsizetype n)
QString fromUtf16(const char16_t *unicode, qsizetype size)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString number(double n, char format, int precision)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
void reserve(qsizetype size)
qsizetype size() const const
QString sliced(qsizetype pos) const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
bool contains(QChar c, Qt::CaseSensitivity cs) const const
QChar first() const const
qsizetype indexOf(QChar c, qsizetype from, Qt::CaseSensitivity cs) const const
bool isNull() const const
qsizetype size() const const
QStringView sliced(qsizetype pos) const const
QList< QStringView > split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar ch) const const
int toInt(bool *ok, int base) const const
QString toString() const const
bool operator==(const QGraphicsApiFilter &reference, const QGraphicsApiFilter &sample)
CaseInsensitive
SkipEmptyParts
QTextStream & endl(QTextStream &stream)
QStringView name() const const
QStringView value() const const
QStringView value(QAnyStringView namespaceUri, QAnyStringView name) const const
bool atEnd() const const
QXmlStreamAttributes attributes() const const
qint64 characterOffset() const const
QString errorString() const const
bool hasError() const const
bool isCharacters() const const
bool isEndElement() const const
bool isStartElement() const const
qint64 lineNumber() const const
QStringView name() const const
QString readElementText(ReadElementTextBehaviour behaviour)
TokenType readNext()
bool readNextStartElement()
QStringView text() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Sep 6 2024 11:58:52 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.