KSyntaxHighlighting

katehighlightingindexer.cpp
1/*
2 SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org>
3 SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com>
4
5 SPDX-License-Identifier: MIT
6*/
7
8#include <QBuffer>
9#include <QCborValue>
10#include <QCoreApplication>
11#include <QDebug>
12#include <QFile>
13#include <QFileInfo>
14#include <QMutableMapIterator>
15#include <QRegularExpression>
16#include <QScopeGuard>
17#include <QString>
18#include <QVariant>
19#include <QXmlStreamReader>
20
21#ifdef HAS_XERCESC
22
23#include <xercesc/framework/MemBufInputSource.hpp>
24#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
25
26#include <xercesc/parsers/SAX2XMLReaderImpl.hpp>
27
28#include <xercesc/sax/ErrorHandler.hpp>
29#include <xercesc/sax/SAXParseException.hpp>
30
31#include <xercesc/util/PlatformUtils.hpp>
32#include <xercesc/util/XMLString.hpp>
33#include <xercesc/util/XMLUni.hpp>
34
35#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
36#include <xercesc/validators/common/Grammar.hpp>
37
38using namespace xercesc;
39
40/*
41 * Ideas taken from:
42 *
43 * author : Boris Kolpackov <boris@codesynthesis.com>
44 * copyright : not copyrighted - public domain
45 *
46 * This program uses Xerces-C++ SAX2 parser to load a set of schema files
47 * and then to validate a set of XML documents against these schemas. To
48 * build this program you will need Xerces-C++ 3.0.0 or later. For more
49 * information, see:
50 *
51 * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/
52 */
53
54/**
55 * Error handler object used during xml schema validation.
56 */
57class CustomErrorHandler : public ErrorHandler
58{
59public:
60 /**
61 * Constructor
62 * @param messages Pointer to the error message string to fill.
63 */
64 CustomErrorHandler(QString *messages)
65 : m_messages(messages)
66 {
67 }
68
69 /**
70 * Check global success/fail state.
71 * @return True if there was a failure, false otherwise.
72 */
73 bool failed() const
74 {
75 return m_failed;
76 }
77
78private:
79 /**
80 * Severity classes for error messages.
81 */
82 enum severity { s_warning, s_error, s_fatal };
83
84 /**
85 * Wrapper for warning exceptions.
86 * @param e Exception to handle.
87 */
88 void warning(const SAXParseException &e) override
89 {
90 m_failed = true; // be strict, warnings are evil, too!
91 handle(e, s_warning);
92 }
93
94 /**
95 * Wrapper for error exceptions.
96 * @param e Exception to handle.
97 */
98 void error(const SAXParseException &e) override
99 {
100 m_failed = true;
101 handle(e, s_error);
102 }
103
104 /**
105 * Wrapper for fatal error exceptions.
106 * @param e Exception to handle.
107 */
108 void fatalError(const SAXParseException &e) override
109 {
110 m_failed = true;
111 handle(e, s_fatal);
112 }
113
114 /**
115 * Reset the error status to "no error".
116 */
117 void resetErrors() override
118 {
119 m_failed = false;
120 }
121
122 /**
123 * Generic handler for error/warning/fatal error message exceptions.
124 * @param e Exception to handle.
125 * @param s Enum value encoding the message severtity.
126 */
127 void handle(const SAXParseException &e, severity s)
128 {
129 // get id to print
130 const XMLCh *xid(e.getPublicId());
131 if (!xid)
132 xid = e.getSystemId();
133
134 m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: ")
135 << QString::fromUtf16(e.getMessage()) << Qt::endl;
136 }
137
138private:
139 /**
140 * Storage for created error messages in this handler.
141 */
142 QTextStream m_messages;
143
144 /**
145 * Global error state. True if there was an error, false otherwise.
146 */
147 bool m_failed = false;
148};
149
150class CustomXMLValidator : public SAX2XMLReaderImpl
151{
152public:
153 QString messages;
154 CustomErrorHandler eh{&messages};
155
156 CustomXMLValidator(XMLGrammarPool *xsd)
157 : SAX2XMLReaderImpl(XMLPlatformUtils::fgMemoryManager, xsd)
158 {
159 // Commonly useful configuration.
160 //
161 setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
162 setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
163 setFeature(XMLUni::fgSAX2CoreValidation, true);
164
165 // Enable validation.
166 //
167 setFeature(XMLUni::fgXercesSchema, true);
168 setFeature(XMLUni::fgXercesSchemaFullChecking, true);
169 setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
170
171 // Use the loaded grammar during parsing.
172 //
173 setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true);
174
175 // Don't load schemas from any other source (e.g., from XML document's
176 // xsi:schemaLocation attributes).
177 //
178 setFeature(XMLUni::fgXercesLoadSchema, false);
179
180 // Xerces-C++ 3.1.0 is the first version with working multi import
181 // support.
182 //
183 setFeature(XMLUni::fgXercesHandleMultipleImports, true);
184
185 setErrorHandler(&eh);
186 }
187};
188
189#endif
190
191#include "../lib/worddelimiters_p.h"
192#include "../lib/xml_p.h"
193
194#include <array>
195
196using KSyntaxHighlighting::WordDelimiters;
197using KSyntaxHighlighting::Xml::attrToBool;
198
199using namespace Qt::Literals::StringLiterals;
200
201static constexpr QStringView operator""_sv(const char16_t *s, std::size_t n)
202{
203 return QStringView(s, s + n);
204}
205
206namespace
207{
208
209struct KateVersion {
210 int majorRevision;
211 int minorRevision;
212
213 KateVersion(int majorRevision = 0, int minorRevision = 0)
214 : majorRevision(majorRevision)
215 , minorRevision(minorRevision)
216 {
217 }
218
219 bool operator<(const KateVersion &version) const
220 {
221 return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision);
222 }
223};
224
225class HlFilesChecker
226{
227public:
228 void setDefinition(QStringView verStr, const QString &filename, const QString &name, const QStringList &alternativeNames)
229 {
230 m_currentDefinition = &*m_definitions.insert(name, Definition{});
231 m_currentDefinition->languageName = name;
232 m_currentDefinition->filename = filename;
233 m_currentDefinition->kateVersionStr = verStr.toString();
234 m_currentKeywords = nullptr;
235 m_currentContext = nullptr;
236
237 const auto idx = verStr.indexOf(u'.');
238 if (idx <= 0) {
239 qWarning() << filename << "invalid kateversion" << verStr;
240 m_success = false;
241 } else {
242 m_currentDefinition->kateVersion = {verStr.sliced(0, idx).toInt(), verStr.sliced(idx + 1).toInt()};
243 }
244
245 auto checkName = [this, &filename](char const *nameType, const QString &name) {
246 auto it = m_names.find(name);
247 if (it != m_names.end()) {
248 qWarning() << filename << "duplicate" << nameType << "with" << it.value();
249 m_success = false;
250 } else {
251 m_names.insert(name, filename);
252 }
253 };
254 checkName("name", name);
255 for (const auto &alternativeName : alternativeNames) {
256 checkName("alternative name", alternativeName);
257 }
258 }
259
260 KateVersion currentVersion() const
261 {
262 return m_currentDefinition->kateVersion;
263 }
264
265 void processElement(const QXmlStreamReader &xml)
266 {
267 switch (xml.tokenType()) {
269 if (m_currentContext) {
270 m_currentContext->rules.push_back(Context::Rule{});
271 auto &rule = m_currentContext->rules.back();
272 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success;
273 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
274 } else if (m_currentKeywords) {
275 m_inKeywordItem = true;
276 } else if (xml.name() == u"context"_sv) {
277 processContextElement(xml);
278 } else if (xml.name() == u"list"_sv) {
279 processListElement(xml);
280 } else if (xml.name() == u"keywords"_sv) {
281 m_success = m_currentDefinition->parseKeywords(xml) && m_success;
282 } else if (xml.name() == u"emptyLine"_sv) {
283 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success;
284 } else if (xml.name() == u"itemData"_sv) {
285 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success;
286 }
287 break;
288
290 if (m_currentContext && xml.name() == u"context"_sv) {
291 m_currentContext = nullptr;
292 } else if (m_currentKeywords && xml.name() == u"list"_sv) {
293 m_currentKeywords = nullptr;
294 } else if (m_currentKeywords) {
295 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml, m_textContent) && m_success;
296 m_textContent.clear();
297 m_inKeywordItem = false;
298 }
299 break;
300
303 if (m_inKeywordItem) {
304 m_textContent += xml.text();
305 }
306 break;
307
308 default:;
309 }
310 }
311
312 //! Resolve context attribute and include tag
313 void resolveContexts()
314 {
316 while (def.hasNext()) {
317 def.next();
318 auto &definition = def.value();
319 auto &contexts = definition.contexts;
320
321 if (contexts.isEmpty()) {
322 qWarning() << definition.filename << "has no context";
323 m_success = false;
324 continue;
325 }
326
327 auto markAsUsedContext = [](ContextName &contextName) {
328 if (!contextName.stay && contextName.context) {
329 contextName.context->isOnlyIncluded = false;
330 }
331 };
332
333 QMutableMapIterator<QString, Context> contextIt(contexts);
334 while (contextIt.hasNext()) {
335 contextIt.next();
336 auto &context = contextIt.value();
337 resolveContextName(definition, context, context.lineEndContext, context.line);
338 resolveContextName(definition, context, context.lineEmptyContext, context.line);
339 resolveContextName(definition, context, context.fallthroughContext, context.line);
340 markAsUsedContext(context.lineEndContext);
341 markAsUsedContext(context.lineEmptyContext);
342 markAsUsedContext(context.fallthroughContext);
343 for (auto &rule : context.rules) {
344 rule.parentContext = &context;
345 resolveContextName(definition, context, rule.context, rule.line);
346 if (rule.type != Context::Rule::Type::IncludeRules) {
347 markAsUsedContext(rule.context);
348 } else if (rule.includeAttrib == XmlBool::True && rule.context.context) {
349 rule.context.context->referencedWithIncludeAttrib = true;
350 }
351 }
352 }
353
354 auto *firstContext = &*definition.contexts.find(definition.firstContextName);
355 firstContext->isOnlyIncluded = false;
356 definition.firstContext = firstContext;
357 }
358
359 resolveIncludeRules();
360 }
361
362 bool check() const
363 {
364 bool success = m_success;
365
366 const auto usedContexts = extractUsedContexts();
367
368 QMap<const Definition *, const Definition *> maxVersionByDefinitions;
370
371 QMapIterator<QString, Definition> def(m_definitions);
372 while (def.hasNext()) {
373 def.next();
374 const auto &definition = def.value();
375 const auto &filename = definition.filename;
376
377 auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
378 if (maxDef != &definition) {
379 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr
380 << ". Please, increase kateversion.";
381 success = false;
382 }
383
384 QSet<ItemDatas::Style> usedAttributeNames;
385 QSet<ItemDatas::Style> ignoredAttributeNames;
386 success = checkKeywordsList(definition) && success;
387 success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
388
389 // search for non-existing itemDatas.
390 const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
391 for (const auto &styleName : invalidNames) {
392 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name;
393 success = false;
394 }
395
396 // search for existing itemDatas, but unusable.
397 const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
398 for (const auto &styleName : ignoredNames) {
399 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name
400 << "is never used. All uses are with lookAhead=true or <IncludeRules/>";
401 success = false;
402 }
403
404 // search for unused itemDatas.
405 auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
406 unusedNames -= ignoredNames;
407 for (const auto &styleName : std::as_const(unusedNames)) {
408 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name;
409 success = false;
410 }
411 }
412
413 QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
414 while (unreachableIncludedRuleIt.hasNext()) {
415 unreachableIncludedRuleIt.next();
416 IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
417 if (unreachableRulesBy.alwaysUnreachable) {
418 auto *rule = unreachableIncludedRuleIt.key();
419
420 if (!rule->parentContext->isOnlyIncluded) {
421 continue;
422 }
423
424 // remove duplicates rules
426 auto &unreachableBy = unreachableRulesBy.unreachableBy;
427 unreachableBy.erase(std::remove_if(unreachableBy.begin(),
428 unreachableBy.end(),
429 [&](const RuleAndInclude &ruleAndInclude) {
430 if (rules.contains(ruleAndInclude.rule)) {
431 return true;
432 }
433 rules.insert(ruleAndInclude.rule);
434 return false;
435 }),
436 unreachableBy.end());
437
438 QString message;
439 message.reserve(128);
440 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
441 message += u"line "_sv;
442 message += QString::number(ruleAndInclude.rule->line);
443 message += u" ["_sv;
444 message += ruleAndInclude.rule->parentContext->name;
445 if (rule->filename != ruleAndInclude.rule->filename) {
446 message += u" ("_sv;
447 message += ruleAndInclude.rule->filename;
448 message += u')';
449 }
450 if (ruleAndInclude.includeRules) {
451 message += u" via line "_sv;
452 message += QString::number(ruleAndInclude.includeRules->line);
453 }
454 message += u"], "_sv;
455 }
456 message.chop(2);
457
458 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message;
459 success = false;
460 }
461 }
462
463 return success;
464 }
465
466private:
467 enum class XmlBool {
469 False,
470 True,
471 };
472
473 struct Context;
474
475 struct ContextName {
477 int popCount = 0;
478 bool stay = false;
479
480 Context *context = nullptr;
481 };
482
483 struct Parser {
484 const QString &filename;
485 const QXmlStreamReader &xml;
486 const QXmlStreamAttribute &attr;
487 bool success;
488
489 //! Read a string type attribute, \c success = \c false when \p str is not empty
490 //! \return \c true when attr.name() == attrName, otherwise false
491 bool extractString(QString &str, QStringView attrName)
492 {
493 if (attr.name() != attrName) {
494 return false;
495 }
496
497 str = attr.value().toString();
498 if (str.isEmpty()) {
499 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty";
500 success = false;
501 }
502
503 return true;
504 }
505
506 //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified.
507 //! \return \c true when attr.name() == attrName, otherwise false
508 bool extractXmlBool(XmlBool &xmlBool, QStringView attrName)
509 {
510 if (attr.name() != attrName) {
511 return false;
512 }
513
514 xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False;
515
516 return true;
517 }
518
519 //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0
520 //! \return \c true when attr.name() == attrName, otherwise false
521 bool extractPositive(int &positive, QStringView attrName)
522 {
523 if (attr.name() != attrName) {
524 return false;
525 }
526
527 bool ok = true;
528 positive = attr.value().toInt(&ok);
529
530 if (!ok || positive < 0) {
531 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value();
532 success = false;
533 }
534
535 return true;
536 }
537
538 //! Read a color, \c success = \c false when \p color is already greater than or equal to 0
539 //! \return \c true when attr.name() == attrName, otherwise false
540 bool checkColor(QStringView attrName)
541 {
542 if (attr.name() != attrName) {
543 return false;
544 }
545
546 const auto value = attr.value();
547 if (value.isEmpty() /*|| QColor(value).isValid()*/) {
548 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value;
549 success = false;
550 }
551
552 return true;
553 }
554
555 //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char
556 //! \return \c true when attr.name() == attrName, otherwise false
557 bool extractChar(QChar &c, QStringView attrName)
558 {
559 if (attr.name() != attrName) {
560 return false;
561 }
562
563 if (attr.value().size() == 1) {
564 c = attr.value()[0];
565 } else {
566 c = u'_';
567 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value();
568 success = false;
569 }
570
571 return true;
572 }
573
574 //! \return parsing status when \p isExtracted is \c true, otherwise \c false
575 bool checkIfExtracted(bool isExtracted)
576 {
577 if (isExtracted) {
578 return success;
579 }
580
581 qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name();
582 return false;
583 }
584 };
585
586 struct Keywords {
587 struct Items {
588 struct Item {
589 QString content;
590 int line;
591
592 friend size_t qHash(const Item &item, size_t seed = 0)
593 {
594 return qHash(item.content, seed);
595 }
596
597 friend bool operator==(const Item &item0, const Item &item1)
598 {
599 return item0.content == item1.content;
600 }
601 };
602
603 QList<Item> keywords;
604 QSet<Item> includes;
605
606 bool parseElement(const QString &filename, const QXmlStreamReader &xml, const QString &content)
607 {
608 bool success = true;
609
610 const int line = xml.lineNumber();
611
612 if (content.isEmpty()) {
613 qWarning() << filename << "line" << line << "is empty:" << xml.name();
614 success = false;
615 }
616
617 if (xml.name() == u"include"_sv) {
618 includes.insert({content, line});
619 } else if (xml.name() == u"item"_sv) {
620 keywords.append({content, line});
621 } else {
622 qWarning() << filename << "line" << line << "invalid element:" << xml.name();
623 success = false;
624 }
625
626 return success;
627 }
628 };
629
631 Items items;
632 int line;
633
634 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
635 {
636 line = xml.lineNumber();
637
638 bool success = true;
639 const auto attrs = xml.attributes();
640 for (const auto &attr : attrs) {
641 Parser parser{filename, xml, attr, success};
642
643 const bool isExtracted = parser.extractString(name, u"name"_sv);
644
645 success = parser.checkIfExtracted(isExtracted);
646 }
647 return success;
648 }
649 };
650
651 struct Context {
652 struct Rule {
653 enum class Type {
654 Unknown,
655 AnyChar,
656 Detect2Chars,
657 DetectChar,
658 DetectIdentifier,
659 DetectSpaces,
660 Float,
661 HlCChar,
662 HlCHex,
663 HlCOct,
664 HlCStringChar,
665 IncludeRules,
666 Int,
667 LineContinue,
668 RangeDetect,
669 RegExpr,
670 StringDetect,
671 WordDetect,
672 keyword,
673 };
674
675 Type type{};
676
677 bool isDotRegex = false;
678 int line = -1;
679
680 // commonAttributes
681 QString attribute;
682 ContextName context;
683 QString beginRegion;
684 QString endRegion;
685 int column = -1;
686 XmlBool lookAhead{};
687 XmlBool firstNonSpace{};
688
689 // StringDetect, WordDetect, keyword
690 XmlBool insensitive{};
691
692 // DetectChar, StringDetect, RegExpr, keyword
693 XmlBool dynamic{};
694
695 // Regex
696 XmlBool minimal{};
697
698 // IncludeRule
699 XmlBool includeAttrib{};
700
701 // DetectChar, Detect2Chars, LineContinue, RangeDetect
702 QChar char0;
703 // Detect2Chars, RangeDetect
704 QChar char1;
705
706 // AnyChar, StringDetect, RegExpr, WordDetect, keyword
707 QString string;
708 // RegExpr without .* as suffix
709 QString sanitizedString;
710
711 // Float, HlCHex, HlCOct, Int, WordDetect, keyword
712 QString additionalDeliminator;
713 QString weakDeliminator;
714
715 // rules included by IncludeRules (without IncludeRule)
716 QList<const Rule *> includedRules;
717
718 // IncludeRules included by IncludeRules
719 QSet<const Rule *> includedIncludeRules;
720
721 Context const *parentContext = nullptr;
722
723 QString filename;
724
725 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
726 {
727 this->filename = filename;
728 line = xml.lineNumber();
729
730 using Pair = QPair<QStringView, Type>;
731 static const auto pairs = {
732 Pair{u"AnyChar"_sv, Type::AnyChar},
733 Pair{u"Detect2Chars"_sv, Type::Detect2Chars},
734 Pair{u"DetectChar"_sv, Type::DetectChar},
735 Pair{u"DetectIdentifier"_sv, Type::DetectIdentifier},
736 Pair{u"DetectSpaces"_sv, Type::DetectSpaces},
737 Pair{u"Float"_sv, Type::Float},
738 Pair{u"HlCChar"_sv, Type::HlCChar},
739 Pair{u"HlCHex"_sv, Type::HlCHex},
740 Pair{u"HlCOct"_sv, Type::HlCOct},
741 Pair{u"HlCStringChar"_sv, Type::HlCStringChar},
742 Pair{u"IncludeRules"_sv, Type::IncludeRules},
743 Pair{u"Int"_sv, Type::Int},
744 Pair{u"LineContinue"_sv, Type::LineContinue},
745 Pair{u"RangeDetect"_sv, Type::RangeDetect},
746 Pair{u"RegExpr"_sv, Type::RegExpr},
747 Pair{u"StringDetect"_sv, Type::StringDetect},
748 Pair{u"WordDetect"_sv, Type::WordDetect},
749 Pair{u"keyword", Type::keyword},
750 };
751
752 for (auto pair : pairs) {
753 if (xml.name() == pair.first) {
754 type = pair.second;
755 bool success = parseAttributes(filename, xml);
756 success = checkMandoryAttributes(filename, xml) && success;
757 if (success && type == Type::RegExpr) {
758 // ., (.) followed by *, +, {1} or nothing
759 static const QRegularExpression isDot(QStringLiteral(R"(^\‍(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
760 // remove "(?:" and ")"
761 static const QRegularExpression removeParentheses(QStringLiteral(R"(\‍((?:\?:)?|\))"));
762 // remove parentheses on a copy of string
763 auto reg = QString(string).replace(removeParentheses, QString());
764 isDotRegex = reg.contains(isDot);
765
766 // Remove .* and .*$ suffix.
767 static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$"));
768 sanitizedString = string;
769 sanitizedString.replace(allSuffix, QString());
770 // string is a catch-all, do not sanitize
771 if (sanitizedString.isEmpty() || sanitizedString == u"^"_sv) {
772 sanitizedString = string;
773 }
774 }
775 return success;
776 }
777 }
778
779 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name();
780 return false;
781 }
782
783 private:
784 bool parseAttributes(const QString &filename, const QXmlStreamReader &xml)
785 {
786 bool success = true;
787
788 const auto attrs = xml.attributes();
789 for (const auto &attr : attrs) {
790 Parser parser{filename, xml, attr, success};
791
792 // clang-format off
793 const bool isExtracted
794 = parser.extractString(attribute, u"attribute"_sv)
795 || parser.extractString(context.name, u"context"_sv)
796 || parser.extractXmlBool(lookAhead, u"lookAhead"_sv)
797 || parser.extractXmlBool(firstNonSpace, u"firstNonSpace"_sv)
798 || parser.extractString(beginRegion, u"beginRegion"_sv)
799 || parser.extractString(endRegion, u"endRegion"_sv)
800 || parser.extractPositive(column, u"column"_sv)
801 || ((type == Type::RegExpr
802 || type == Type::StringDetect
803 || type == Type::WordDetect
804 || type == Type::keyword
805 ) && parser.extractXmlBool(insensitive, u"insensitive"_sv))
806 || ((type == Type::DetectChar
807 || type == Type::RegExpr
808 || type == Type::StringDetect
809 || type == Type::keyword
810 ) && parser.extractXmlBool(dynamic, u"dynamic"_sv))
811 || ((type == Type::RegExpr)
812 && parser.extractXmlBool(minimal, u"minimal"_sv))
813 || ((type == Type::DetectChar
814 || type == Type::Detect2Chars
815 || type == Type::LineContinue
816 || type == Type::RangeDetect
817 ) && parser.extractChar(char0, u"char"_sv))
818 || ((type == Type::Detect2Chars
819 || type == Type::RangeDetect
820 ) && parser.extractChar(char1, u"char1"_sv))
821 || ((type == Type::AnyChar
822 || type == Type::RegExpr
823 || type == Type::StringDetect
824 || type == Type::WordDetect
825 || type == Type::keyword
826 ) && parser.extractString(string, u"String"_sv))
827 || ((type == Type::IncludeRules)
828 && parser.extractXmlBool(includeAttrib, u"includeAttrib"_sv))
829 || ((type == Type::Float
830 || type == Type::HlCHex
831 || type == Type::HlCOct
832 || type == Type::Int
833 || type == Type::keyword
834 || type == Type::WordDetect
835 ) && (parser.extractString(additionalDeliminator, u"additionalDeliminator"_sv)
836 || parser.extractString(weakDeliminator, u"weakDeliminator"_sv)))
837 ;
838 // clang-format on
839
840 success = parser.checkIfExtracted(isExtracted);
841 }
842
843 if (type == Type::LineContinue && char0 == u'\0') {
844 char0 = u'\\';
845 }
846
847 return success;
848 }
849
850 bool checkMandoryAttributes(const QString &filename, const QXmlStreamReader &xml)
851 {
852 QString missingAttr;
853
854 switch (type) {
855 case Type::Unknown:
856 return false;
857
858 case Type::AnyChar:
859 case Type::RegExpr:
860 case Type::StringDetect:
861 case Type::WordDetect:
862 case Type::keyword:
863 missingAttr = string.isEmpty() ? QStringLiteral("String") : QString();
864 break;
865
866 case Type::DetectChar:
867 missingAttr = !char0.unicode() ? QStringLiteral("char") : QString();
868 break;
869
870 case Type::Detect2Chars:
871 case Type::RangeDetect:
872 missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1")
873 : !char0.unicode() ? QStringLiteral("char")
874 : !char1.unicode() ? QStringLiteral("char1")
875 : QString();
876 break;
877
878 case Type::IncludeRules:
879 missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString();
880 break;
881
882 case Type::DetectIdentifier:
883 case Type::DetectSpaces:
884 case Type::Float:
885 case Type::HlCChar:
886 case Type::HlCHex:
887 case Type::HlCOct:
888 case Type::HlCStringChar:
889 case Type::Int:
890 case Type::LineContinue:
891 break;
892 }
893
894 if (!missingAttr.isEmpty()) {
895 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr;
896 return false;
897 }
898
899 return true;
900 }
901 };
902
903 int line;
904 // becomes false when a context (except includeRule) refers to it
905 bool isOnlyIncluded = true;
906 // becomes true when an includedRule refers to it with includeAttrib=true
907 bool referencedWithIncludeAttrib = false;
908 bool hasDynamicRule = false;
910 QString attribute;
911 ContextName lineEndContext;
912 ContextName lineEmptyContext;
913 ContextName fallthroughContext;
914 QList<Rule> rules;
915 XmlBool dynamic{};
916 XmlBool fallthrough{};
917 XmlBool stopEmptyLineContextSwitchLoop{};
918
919 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
920 {
921 line = xml.lineNumber();
922
923 bool success = true;
924
925 const auto attrs = xml.attributes();
926 for (const auto &attr : attrs) {
927 Parser parser{filename, xml, attr, success};
928 XmlBool noIndentationBasedFolding{};
929
930 // clang-format off
931 const bool isExtracted = parser.extractString(name, u"name"_sv)
932 || parser.extractString(attribute, u"attribute"_sv)
933 || parser.extractString(lineEndContext.name, u"lineEndContext"_sv)
934 || parser.extractString(lineEmptyContext.name, u"lineEmptyContext"_sv)
935 || parser.extractString(fallthroughContext.name, u"fallthroughContext"_sv)
936 || parser.extractXmlBool(dynamic, u"dynamic"_sv)
937 || parser.extractXmlBool(fallthrough, u"fallthrough"_sv)
938 || parser.extractXmlBool(stopEmptyLineContextSwitchLoop, u"stopEmptyLineContextSwitchLoop"_sv)
939 || parser.extractXmlBool(noIndentationBasedFolding, u"noIndentationBasedFolding"_sv);
940 // clang-format on
941
942 success = parser.checkIfExtracted(isExtracted);
943 }
944
945 if (name.isEmpty()) {
946 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name";
947 success = false;
948 }
949
950 if (attribute.isEmpty()) {
951 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute";
952 success = false;
953 }
954
955 return success;
956 }
957 };
958
959 struct ItemDatas {
960 struct Style {
962 int line;
963
964 friend size_t qHash(const Style &style, size_t seed = 0)
965 {
966 return qHash(style.name, seed);
967 }
968
969 friend bool operator==(const Style &style0, const Style &style1)
970 {
971 return style0.name == style1.name;
972 }
973 };
974
975 QSet<Style> styleNames;
976
977 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
978 {
979 bool success = true;
980
982 QString defStyleNum;
983 XmlBool boolean;
984
985 const auto attrs = xml.attributes();
986 for (const auto &attr : attrs) {
987 Parser parser{filename, xml, attr, success};
988
989 // clang-format off
990 const bool isExtracted
991 = parser.extractString(name, u"name"_sv)
992 || parser.extractString(defStyleNum, u"defStyleNum"_sv)
993 || parser.extractXmlBool(boolean, u"bold"_sv)
994 || parser.extractXmlBool(boolean, u"italic"_sv)
995 || parser.extractXmlBool(boolean, u"underline"_sv)
996 || parser.extractXmlBool(boolean, u"strikeOut"_sv)
997 || parser.extractXmlBool(boolean, u"spellChecking"_sv)
998 || parser.checkColor(u"color"_sv)
999 || parser.checkColor(u"selColor"_sv)
1000 || parser.checkColor(u"backgroundColor"_sv)
1001 || parser.checkColor(u"selBackgroundColor"_sv);
1002 // clang-format on
1003
1004 success = parser.checkIfExtracted(isExtracted);
1005 }
1006
1007 if (!name.isEmpty()) {
1008 const auto len = styleNames.size();
1009 styleNames.insert({name, int(xml.lineNumber())});
1010 if (len == styleNames.size()) {
1011 qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name;
1012 success = false;
1013 }
1014 }
1015
1016 return success;
1017 }
1018 };
1019
1020 struct Definition {
1021 QMap<QString, Keywords> keywordsList;
1022 QMap<QString, Context> contexts;
1023 ItemDatas itemDatas;
1024 QString firstContextName;
1025 const Context *firstContext = nullptr;
1026 QString filename;
1027 WordDelimiters wordDelimiters;
1028 KateVersion kateVersion{};
1029 QString kateVersionStr;
1030 QString languageName;
1031 QSet<const Definition *> referencedDefinitions;
1032
1033 // Parse <keywords ...>
1034 bool parseKeywords(const QXmlStreamReader &xml)
1035 {
1036 wordDelimiters.append(xml.attributes().value(u"additionalDeliminator"_sv));
1037 wordDelimiters.remove(xml.attributes().value(u"weakDeliminator"_sv));
1038 return true;
1039 }
1040 };
1041
1042 // Parse <context>
1043 void processContextElement(const QXmlStreamReader &xml)
1044 {
1045 Context context;
1046 m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success;
1047 if (m_currentDefinition->firstContextName.isEmpty()) {
1048 m_currentDefinition->firstContextName = context.name;
1049 }
1050 if (m_currentDefinition->contexts.contains(context.name)) {
1051 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name;
1052 m_success = false;
1053 }
1054 m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context);
1055 }
1056
1057 // Parse <list name="...">
1058 void processListElement(const QXmlStreamReader &xml)
1059 {
1060 Keywords keywords;
1061 m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success;
1062 if (m_currentDefinition->keywordsList.contains(keywords.name)) {
1063 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name;
1064 m_success = false;
1065 }
1066 m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords);
1067 }
1068
1069 const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const
1070 {
1071 auto it = maxVersionByDefinitions.find(&definition);
1072 if (it != maxVersionByDefinitions.end()) {
1073 return it.value();
1074 } else {
1075 auto it = maxVersionByDefinitions.insert(&definition, &definition);
1076 for (const auto &referencedDef : definition.referencedDefinitions) {
1077 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions);
1078 if (it.value()->kateVersion < maxDef->kateVersion) {
1079 it.value() = maxDef;
1080 }
1081 }
1082 return it.value();
1083 }
1084 }
1085
1086 // Initialize the referenced rules (Rule::includedRules)
1087 void resolveIncludeRules()
1088 {
1089 QSet<const Context *> usedContexts;
1090 QList<const Context *> contexts;
1091
1093 while (def.hasNext()) {
1094 def.next();
1095 auto &definition = def.value();
1096 QMutableMapIterator<QString, Context> contextIt(definition.contexts);
1097 while (contextIt.hasNext()) {
1098 contextIt.next();
1099 auto &currentContext = contextIt.value();
1100 for (auto &rule : currentContext.rules) {
1101 if (rule.type != Context::Rule::Type::IncludeRules) {
1102 continue;
1103 }
1104
1105 if (rule.context.stay) {
1106 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself";
1107 m_success = false;
1108 continue;
1109 }
1110
1111 if (rule.context.popCount) {
1112 qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix";
1113 m_success = false;
1114 }
1115
1116 if (!rule.context.context) {
1117 m_success = false;
1118 continue;
1119 }
1120
1121 // resolve includedRules and includedIncludeRules
1122
1123 usedContexts.clear();
1124 usedContexts.insert(rule.context.context);
1125 contexts.clear();
1126 contexts.append(rule.context.context);
1127
1128 for (int i = 0; i < contexts.size(); ++i) {
1129 currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
1130 for (const auto &includedRule : contexts[i]->rules) {
1131 if (includedRule.type != Context::Rule::Type::IncludeRules) {
1132 rule.includedRules.append(&includedRule);
1133 } else if (&rule == &includedRule) {
1134 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity";
1135 m_success = false;
1136 } else {
1137 rule.includedIncludeRules.insert(&includedRule);
1138
1139 if (includedRule.includedRules.isEmpty()) {
1140 const auto *context = includedRule.context.context;
1141 if (context && !usedContexts.contains(context)) {
1142 contexts.append(context);
1143 usedContexts.insert(context);
1144 }
1145 } else {
1146 rule.includedRules.append(includedRule.includedRules);
1147 }
1148 }
1149 }
1150 }
1151 }
1152 }
1153 }
1154 }
1155
1156 //! Recursively extracts the contexts used from the first context of the definitions.
1157 //! This method detects groups of contexts which are only used among themselves.
1158 QSet<const Context *> extractUsedContexts() const
1159 {
1160 QSet<const Context *> usedContexts;
1161 QList<const Context *> contexts;
1162
1163 QMapIterator<QString, Definition> def(m_definitions);
1164 while (def.hasNext()) {
1165 def.next();
1166 const auto &definition = def.value();
1167
1168 if (definition.firstContext) {
1169 usedContexts.insert(definition.firstContext);
1170 contexts.clear();
1171 contexts.append(definition.firstContext);
1172
1173 for (int i = 0; i < contexts.size(); ++i) {
1174 auto appendContext = [&](const Context *context) {
1175 if (context && !usedContexts.contains(context)) {
1176 contexts.append(context);
1177 usedContexts.insert(context);
1178 }
1179 };
1180
1181 const auto *context = contexts[i];
1182 appendContext(context->lineEndContext.context);
1183 appendContext(context->lineEmptyContext.context);
1184 appendContext(context->fallthroughContext.context);
1185
1186 for (auto &rule : context->rules) {
1187 appendContext(rule.context.context);
1188 }
1189 }
1190 }
1191 }
1192
1193 return usedContexts;
1194 }
1195
1196 struct RuleAndInclude {
1197 const Context::Rule *rule;
1198 const Context::Rule *includeRules;
1199
1200 explicit operator bool() const
1201 {
1202 return rule;
1203 }
1204 };
1205
1206 struct IncludedRuleUnreachableBy {
1207 QList<RuleAndInclude> unreachableBy;
1208 bool alwaysUnreachable = true;
1209 };
1210
1211 //! Check contexts and rules
1212 bool checkContexts(const Definition &definition,
1213 QSet<ItemDatas::Style> &usedAttributeNames,
1214 QSet<ItemDatas::Style> &ignoredAttributeNames,
1215 const QSet<const Context *> &usedContexts,
1216 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1217 {
1218 bool success = true;
1219
1220 QMapIterator<QString, Context> contextIt(definition.contexts);
1221 while (contextIt.hasNext()) {
1222 contextIt.next();
1223
1224 const auto &context = contextIt.value();
1225 const auto &filename = definition.filename;
1226
1227 if (!usedContexts.contains(&context)) {
1228 qWarning() << filename << "line" << context.line << "unused context:" << context.name;
1229 success = false;
1230 continue;
1231 }
1232
1233 if (context.name.startsWith(u"#pop"_sv)) {
1234 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name;
1235 success = false;
1236 }
1237
1238 if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1239 usedAttributeNames.insert({context.attribute, context.line});
1240 }
1241
1242 success = checkContextAttribute(definition, context) && success;
1243 success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success;
1244 success = suggestRuleMerger(definition.filename, context) && success;
1245
1246 for (const auto &rule : context.rules) {
1247 if (!rule.attribute.isEmpty()) {
1248 if (rule.lookAhead != XmlBool::True) {
1249 usedAttributeNames.insert({rule.attribute, rule.line});
1250 } else {
1251 ignoredAttributeNames.insert({rule.attribute, rule.line});
1252 }
1253 }
1254 success = checkLookAhead(rule) && success;
1255 success = checkStringDetect(rule) && success;
1256 success = checkWordDetect(rule) && success;
1257 success = checkKeyword(definition, rule) && success;
1258 success = checkRegExpr(filename, rule, context) && success;
1259 success = checkDelimiters(definition, rule) && success;
1260 }
1261 }
1262
1263 return success;
1264 }
1265
1266 //! Check that a regular expression in a RegExpr rule:
1267 //! - isValid()
1268 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1269 //! - dynamic=true but no place holder used?
1270 //! - is not . with lookAhead="1"
1271 //! - is not ^... without column ou firstNonSpace attribute
1272 //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar
1273 //! - has no unused captures
1274 //! - has no unnecessary quantifier with lookAhead
1275 bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const
1276 {
1277 // ignore empty regex because the error is raised during xml parsing
1278 if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) {
1279 const QRegularExpression regexp(rule.string);
1280 if (!checkRegularExpression(rule.filename, regexp, rule.line)) {
1281 return false;
1282 }
1283
1284 // dynamic == true and no place holder?
1285 if (rule.dynamic == XmlBool::True) {
1286 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1287 if (!rule.string.contains(placeHolder)) {
1288 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1289 return false;
1290 }
1291 }
1292
1293 if (rule.lookAhead == XmlBool::True && (rule.string.endsWith(u".*$"_sv) || rule.string.endsWith(u".*"_sv)) && -1 == rule.string.indexOf(u'|')) {
1294 qWarning() << rule.filename << "line" << rule.line << "RegExpr with lookAhead=1 doesn't need to end with '.*' or '.*$':" << rule.string;
1295 return false;
1296 }
1297
1298 auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1299 if (rule.lookAhead == XmlBool::True) {
1300 static const QRegularExpression removeAllSuffix(QStringLiteral(
1301 R"(((?<!\\)\\‍(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1302 reg.replace(removeAllSuffix, QString());
1303 }
1304
1305 reg.replace(QStringLiteral("{1}"), QString());
1306 reg.replace(QStringLiteral("{1,1}"), QString());
1307
1308 // is DetectSpaces
1309 // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, +
1310 static const QRegularExpression isDetectSpaces(
1311 QStringLiteral(R"(^\^?(?:\‍((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1312 if (rule.string.contains(isDetectSpaces)) {
1313 char const *extraMsg = rule.string.contains(u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1314 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":"
1315 << rule.string;
1316 return false;
1317 }
1318
1319#define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1320#define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1321
1322 // is RangeDetect
1323 static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:"
1324 "\\.\\*[?+]?" REG_CHAR "|"
1325 "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1"
1326 ")$"));
1327 if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(u".*?"_sv) || rule.string.contains(u"[^"_sv))
1328 && reg.contains(isRange)) {
1329 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string;
1330 return false;
1331 }
1332
1333 // is AnyChar
1334 static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\‍((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)"));
1335 if (rule.string.contains(isAnyChar)) {
1336 auto extra = (reg[0] == u'^' || reg[1] == u'^') ? "with column=\"0\"" : "";
1337 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra;
1338 return false;
1339 }
1340
1341 // is LineContinue
1342 static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$"));
1343 if (reg.contains(isLineContinue)) {
1344 auto extra = (reg[0] == u'^') ? "with column=\"0\"" : "";
1345 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra;
1346 return false;
1347 }
1348
1349#define REG_DIGIT uR"((\[(0-9|\\d)\]|\\d))"
1350#define REG_DIGITS REG_DIGIT u"([+]|" REG_DIGIT u"[*])"
1351#define REG_DOT uR"((\\[.]|\[.\]))"
1352 // is Int, check \b[0-9]+
1353 static const QRegularExpression isInt(uR"(^(\‍((\?:)?)*\\b(\‍((\?:)?)*)" REG_DIGITS uR"(\)*$)"_s);
1354 if (reg.contains(isInt)) {
1355 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Int:" << rule.string;
1356 return false;
1357 }
1358
1359 // is Float, check (\b[0-9]+\.[0-9]*|\.[0-9]+)([eE][-+]?[0-9]+)?
1360 static const QRegularExpression isFloat(
1361 uR"(^(\\b|\‍((\?:)?)*)" REG_DIGITS REG_DOT
1362 REG_DIGIT u"[*][|]" REG_DOT REG_DIGITS uR"(\)+\‍((\?:)?\[[eE]+\]\[(\\?-\\?\+|\\?\+\\?-)\]\?)" REG_DIGITS uR"(\)\?\)*$)"_s);
1363 if (reg.contains(isFloat)) {
1364 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Float:" << rule.string;
1365 return false;
1366 }
1367#undef REG_DOT
1368#undef REG_DIGIT
1369#undef REG_DIGITS
1370
1371 // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _
1372 static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR));
1373 reg.replace(sanitize1, QStringLiteral("_"));
1374
1375#undef REG_CHAR
1376#undef REG_ESCAPE_CHAR
1377
1378 // use minimal or lazy operator
1379 static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1380 static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]"));
1381
1382 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy)
1383 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0)
1384 && (reg.back() != u'$' || reg.contains(u'|'))) {
1385 qWarning() << rule.filename << "line" << rule.line
1386 << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1387 return false;
1388 }
1389
1390 // replace [:...:] with ___
1391 static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])"));
1392 reg.replace(sanitize2, QStringLiteral("___"));
1393
1394 // replace [ccc...], [special] with ...
1395 static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1396 reg.replace(sanitize3, QStringLiteral("...\\1"));
1397
1398 // replace [c] with _
1399 static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])"));
1400 reg.replace(sanitize4, QStringLiteral("_"));
1401
1402 const int len = reg.size();
1403 // replace [cC] with _
1404 static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])"));
1405 reg = reg.toUpper();
1406 reg.replace(toInsensitive, QString());
1407
1408 // is StringDetect
1409 // ignore (?:, ) and {n}
1410 static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\‍(\?:)+$)"));
1411 if (reg.contains(isStringDetect)) {
1412 char const *extraMsg = rule.string.contains(u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1413 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1414 << ":" << rule.string;
1415 if (len != reg.size()) {
1416 qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string;
1417 }
1418 return false;
1419 }
1420
1421 // column="0"
1422 if (rule.column == -1) {
1423 // ^ without |
1424 // (^sas*) -> ok
1425 // (^sa|s*) -> ko
1426 // (^(sa|s*)) -> ok
1427 auto first = std::as_const(reg).begin();
1428 auto last = std::as_const(reg).end();
1429 int depth = 0;
1430
1431 while (u'(' == *first) {
1432 ++depth;
1433 ++first;
1434 if (u'?' == *first || u':' == first[1]) {
1435 first += 2;
1436 }
1437 }
1438
1439 if (u'^' == *first) {
1440 const int bolDepth = depth;
1441 bool replace = true;
1442
1443 while (++first != last) {
1444 if (u'(' == *first) {
1445 ++depth;
1446 } else if (u')' == *first) {
1447 --depth;
1448 if (depth < bolDepth) {
1449 // (^a)? === (^a|) -> ko
1450 if (first + 1 != last && u"*?"_sv.contains(first[1])) {
1451 replace = false;
1452 break;
1453 }
1454 }
1455 } else if (u'|' == *first) {
1456 // ignore '|' within subgroup
1457 if (depth <= bolDepth) {
1458 replace = false;
1459 break;
1460 }
1461 }
1462 }
1463
1464 if (replace) {
1465 qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string;
1466 return false;
1467 }
1468 }
1469 }
1470
1471 // add ^ with column=0
1472 if (rule.column == 0 && !rule.isDotRegex) {
1473 bool hasStartOfLine = false;
1474 auto first = std::as_const(reg).begin();
1475 auto last = std::as_const(reg).end();
1476 for (; first != last; ++first) {
1477 if (*first == u'^') {
1478 hasStartOfLine = true;
1479 break;
1480 } else if (*first == u'(') {
1481 if (last - first >= 3 && first[1] == u'?' && first[2] == u':') {
1482 first += 2;
1483 }
1484 } else {
1485 break;
1486 }
1487 }
1488
1489 if (!hasStartOfLine) {
1490 qWarning() << rule.filename << "line" << rule.line
1491 << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1492 return false;
1493 }
1494 }
1495
1496 bool useCapture = false;
1497
1498 // detection of unnecessary capture
1499 if (regexp.captureCount()) {
1500 auto maximalCapture = [](const QStringView(&referenceNames)[9], const QString &s) {
1501 int maxCapture = 9;
1502 while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) {
1503 --maxCapture;
1504 }
1505 return maxCapture;
1506 };
1507
1508 int maxCaptureUsed = 0;
1509 // maximal dynamic reference
1510 if (rule.context.context && !rule.context.stay) {
1511 for (const auto &nextRule : std::as_const(rule.context.context->rules)) {
1512 if (nextRule.dynamic == XmlBool::True) {
1513 static const QStringView cap[]{
1514 u"%1"_sv,
1515 u"%2"_sv,
1516 u"%3"_sv,
1517 u"%4"_sv,
1518 u"%5"_sv,
1519 u"%6"_sv,
1520 u"%7"_sv,
1521 u"%8"_sv,
1522 u"%9"_sv,
1523 };
1524 int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1525 maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture);
1526 }
1527 }
1528 }
1529
1530 static const QStringView num1[]{
1531 u"\\1"_sv,
1532 u"\\2"_sv,
1533 u"\\3"_sv,
1534 u"\\4"_sv,
1535 u"\\5"_sv,
1536 u"\\6"_sv,
1537 u"\\7"_sv,
1538 u"\\8"_sv,
1539 u"\\9"_sv,
1540 };
1541 static const QStringView num2[]{
1542 u"\\g1"_sv,
1543 u"\\g2"_sv,
1544 u"\\g3"_sv,
1545 u"\\g4"_sv,
1546 u"\\g5"_sv,
1547 u"\\g6"_sv,
1548 u"\\g7"_sv,
1549 u"\\g8"_sv,
1550 u"\\g9"_sv,
1551 };
1552 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num2, rule.string));
1553
1554 const int maxCapture = std::max(maxCaptureUsed, maxBackReference);
1555
1556 if (maxCapture && regexp.captureCount() > maxCapture) {
1557 qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture
1558 << "are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1559 return false;
1560 }
1561
1562 useCapture = maxCapture;
1563 }
1564
1565 if (!useCapture) {
1566 // is DetectIdentifier
1567 static const QRegularExpression isDetectIdentifier(
1568 QStringLiteral(R"(^(\‍((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)"));
1569 if (rule.string.contains(isDetectIdentifier)) {
1570 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string;
1571 return false;
1572 }
1573 }
1574
1575 if (rule.isDotRegex) {
1576 // search next rule with same column or firstNonSpace
1577 int i = &rule - context.rules.data() + 1;
1578 const bool hasColumn = (rule.column != -1);
1579 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1580 const bool isSpecial = (hasColumn || hasFirstNonSpace);
1581 for (; i < context.rules.size(); ++i) {
1582 auto &rule2 = context.rules[i];
1583 if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1584 i = context.rules.size();
1585 break;
1586 }
1587
1588 const bool hasColumn2 = (rule2.column != -1);
1589 const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1590 if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1591 || (hasFirstNonSpace && hasFirstNonSpace2)) {
1592 break;
1593 }
1594 }
1595
1596 auto ruleFilename = (filename == rule.filename) ? QString() : u"in "_sv + rule.filename;
1597 if (i == context.rules.size()) {
1598 if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1599 && rule.endRegion.isEmpty() && !useCapture) {
1600 qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename
1601 << "should be replaced by fallthroughContext:" << rule.string;
1602 }
1603 } else {
1604 auto &nextRule = context.rules[i];
1605 auto nextRuleFilename = (filename == nextRule.filename) ? QString() : u"in "_sv + nextRule.filename;
1606 qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename
1607 << "because a dot RegExpr is used line" << rule.line << ruleFilename;
1608 }
1609
1610 // unnecessary quantifier
1611 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)"));
1612 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)"));
1613 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1614 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) {
1615 qWarning() << rule.filename << "line" << rule.line
1616 << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1617 return false;
1618 }
1619 }
1620 }
1621
1622 return true;
1623 }
1624
1625 // Parse and check <emptyLine>
1626 bool parseEmptyLine(const QString &filename, const QXmlStreamReader &xml)
1627 {
1628 bool success = true;
1629
1630 QString pattern;
1631 XmlBool casesensitive{};
1632
1633 const auto attrs = xml.attributes();
1634 for (auto &attr : attrs) {
1635 Parser parser{filename, xml, attr, success};
1636
1637 const bool isExtracted = parser.extractString(pattern, u"regexpr"_sv) || parser.extractXmlBool(casesensitive, u"casesensitive"_sv);
1638
1639 success = parser.checkIfExtracted(isExtracted);
1640 }
1641
1642 if (pattern.isEmpty()) {
1643 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr";
1644 success = false;
1645 } else {
1646 success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber());
1647 }
1648
1649 return success;
1650 }
1651
1652 //! Check that a regular expression:
1653 //! - isValid()
1654 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1655 bool checkRegularExpression(const QString &filename, const QRegularExpression &regexp, int line) const
1656 {
1657 const auto pattern = regexp.pattern();
1658
1659 // validate regexp
1660 if (!regexp.isValid()) {
1661 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset"
1662 << regexp.patternErrorOffset();
1663 return false;
1664 }
1665
1666 // catch possible case typos: [A-z] or [a-Z]
1667 const int azOffset = std::max(pattern.indexOf(u"A-z"_sv), pattern.indexOf(u"a-Z"_sv));
1668 if (azOffset >= 0) {
1669 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset;
1670 return false;
1671 }
1672
1673 return true;
1674 }
1675
1676 //! Check fallthrough and fallthroughContext.
1677 //! Check kateversion for stopEmptyLineContextSwitchLoop.
1678 bool checkContextAttribute(const Definition &definition, const Context &context) const
1679 {
1680 bool success = true;
1681
1682 if (!context.fallthroughContext.name.isEmpty()) {
1683 const bool mandatoryFallthroughAttribute = definition.kateVersion < KateVersion{5, 62};
1684 if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1685 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1686 << context.name;
1687 success = false;
1688 } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1689 qWarning() << definition.filename << "line" << context.line
1690 << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1691 << context.name;
1692 success = false;
1693 }
1694 }
1695
1696 if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < KateVersion{5, 103}) {
1697 qWarning() << definition.filename << "line" << context.line
1698 << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name;
1699 success = false;
1700 }
1701
1702 return success;
1703 }
1704
1705 //! Search for additionalDeliminator/weakDeliminator which has no effect.
1706 bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const
1707 {
1708 if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1709 return true;
1710 }
1711
1712 bool success = true;
1713
1714 if (definition.kateVersion < KateVersion{5, 79}) {
1715 qWarning() << definition.filename << "line" << rule.line
1716 << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1717 success = false;
1718 }
1719
1720 for (QChar c : rule.additionalDeliminator) {
1721 if (!definition.wordDelimiters.contains(c)) {
1722 return success;
1723 }
1724 }
1725
1726 for (QChar c : rule.weakDeliminator) {
1727 if (definition.wordDelimiters.contains(c)) {
1728 return success;
1729 }
1730 }
1731
1732 qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1733 return false;
1734 }
1735
1736 //! Check that keyword rule reference an existing keyword list.
1737 bool checkKeyword(const Definition &definition, const Context::Rule &rule) const
1738 {
1739 if (rule.type == Context::Rule::Type::keyword) {
1740 auto it = definition.keywordsList.find(rule.string);
1741 if (it == definition.keywordsList.end()) {
1742 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string;
1743 return false;
1744 }
1745 }
1746 return true;
1747 }
1748
1749 //! Search for rules with lookAhead="true" and context="#stay".
1750 //! This would cause an infinite loop.
1751 bool checkLookAhead(const Context::Rule &rule) const
1752 {
1753 if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1754 qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay";
1755 }
1756 return true;
1757 }
1758
1759 //! Check that StringDetect contains a placeHolder when dynamic="1"
1760 bool checkStringDetect(const Context::Rule &rule) const
1761 {
1762 if (rule.type == Context::Rule::Type::StringDetect) {
1763 // dynamic == true and no place holder?
1764 if (rule.dynamic == XmlBool::True) {
1765 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1766 if (!rule.string.contains(placeHolder)) {
1767 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1768 return false;
1769 }
1770 }
1771 }
1772 return true;
1773 }
1774
1775 //! Check that WordDetect does not contain spaces at the beginning and end of text.
1776 bool checkWordDetect(const Context::Rule &rule) const
1777 {
1778 if (rule.type == Context::Rule::Type::WordDetect) {
1779 if (!rule.string.isEmpty() && (rule.string.front().isSpace() || rule.string.back().isSpace())) {
1780 qWarning() << rule.filename << "line" << rule.line << "contains a space at the beginning or end of the string:" << rule.string;
1781 return false;
1782 }
1783 }
1784 return true;
1785 }
1786
1787 //! Check <include> and delimiter in a keyword list
1788 bool checkKeywordsList(const Definition &definition) const
1789 {
1790 bool success = true;
1791
1792 bool includeNotSupport = (definition.kateVersion < KateVersion{5, 53});
1793 QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList);
1794 while (keywordsIt.hasNext()) {
1795 keywordsIt.next();
1796
1797 for (const auto &include : keywordsIt.value().items.includes) {
1798 if (includeNotSupport) {
1799 qWarning() << definition.filename << "line" << include.line
1800 << "<include> is only available since version \"5.53\". Please, increase kateversion.";
1801 success = false;
1802 }
1803 success = checkKeywordInclude(definition, include) && success;
1804 }
1805
1806 // Check that keyword list items do not have deliminator character
1807#if 0
1808 for (const auto& keyword : keywordsIt.value().items.keywords) {
1809 for (QChar c : keyword.content) {
1810 if (definition.wordDelimiters.contains(c)) {
1811 qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content;
1812 success = false;
1813 }
1814 }
1815 }
1816#endif
1817 }
1818
1819 return success;
1820 }
1821
1822 //! Search for non-existing keyword include.
1823 bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const
1824 {
1825 bool containsKeywordName = true;
1826 int const idx = include.content.indexOf(u"##"_sv);
1827 if (idx == -1) {
1828 auto it = definition.keywordsList.find(include.content);
1829 containsKeywordName = (it != definition.keywordsList.end());
1830 } else {
1831 auto defName = include.content.sliced(idx + 2);
1832 auto listName = include.content.sliced(0, idx);
1833 auto it = m_definitions.find(defName);
1834 if (it == m_definitions.end()) {
1835 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content;
1836 return false;
1837 }
1838 containsKeywordName = it->keywordsList.contains(listName);
1839 }
1840
1841 if (!containsKeywordName) {
1842 qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content;
1843 }
1844
1845 return containsKeywordName;
1846 }
1847
1848 //! Check if a rule is hidden by another
1849 //! - rule hidden by DetectChar or AnyChar
1850 //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar
1851 //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings
1852 //! - duplicate rule (Int, Float, keyword with same String, etc)
1853 //! - Rule hidden by a dot regex
1854 bool checkUreachableRules(const QString &filename,
1855 const Context &context,
1856 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1857 {
1858 if (context.isOnlyIncluded) {
1859 return true;
1860 }
1861
1862 struct Rule4 {
1863 RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr)
1864 {
1865 auto set = [&](RuleAndInclude &ruleAndInclude) {
1866 auto old = ruleAndInclude;
1867 ruleAndInclude = {&rule, includeRules};
1868 return old;
1869 };
1870
1871 if (rule.firstNonSpace == XmlBool::True) {
1872 return set(firstNonSpace);
1873 } else if (rule.column == 0) {
1874 return set(column0);
1875 } else if (rule.column > 0) {
1876 return set(columnGreaterThan0[rule.column]);
1877 } else {
1878 return set(normal);
1879 }
1880 }
1881
1882 private:
1883 RuleAndInclude normal;
1884 RuleAndInclude column0;
1885 QMap<int, RuleAndInclude> columnGreaterThan0;
1886 RuleAndInclude firstNonSpace;
1887 };
1888
1889 // Associate QChar with RuleAndInclude
1890 struct CharTable {
1891 /// Search RuleAndInclude associated with @p c.
1892 RuleAndInclude find(QChar c) const
1893 {
1894 if (c.unicode() < 128) {
1895 return m_asciiMap[c.unicode()];
1896 }
1897 auto it = m_utf8Map.find(c);
1898 return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value();
1899 }
1900
1901 /// Search RuleAndInclude associated with the characters of @p s.
1902 /// \return an empty QList when at least one character is not found.
1904 {
1905 QList<RuleAndInclude> result;
1906
1907 for (QChar c : s) {
1908 if (!find(c)) {
1909 return result;
1910 }
1911 }
1912
1913 for (QChar c : s) {
1914 result.append(find(c));
1915 }
1916
1917 return result;
1918 }
1919
1920 /// Associates @p c with a rule.
1921 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1922 {
1923 if (c.unicode() < 128) {
1924 m_asciiMap[c.unicode()] = {&rule, includeRule};
1925 } else {
1926 m_utf8Map[c] = {&rule, includeRule};
1927 }
1928 }
1929
1930 /// Associates each character of @p s with a rule.
1931 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1932 {
1933 for (QChar c : s) {
1934 append(c, rule, includeRule);
1935 }
1936 }
1937
1938 private:
1939 RuleAndInclude m_asciiMap[127]{};
1941 };
1942
1943 struct Char4Tables {
1944 CharTable chars;
1945 CharTable charsColumn0;
1946 QMap<int, CharTable> charsColumnGreaterThan0;
1947 CharTable charsFirstNonSpace;
1948 };
1949
1950 // View on Char4Tables members
1951 struct CharTableArray {
1952 // Append Char4Tables members that satisfies firstNonSpace and column.
1953 // Char4Tables::char is always added.
1954 CharTableArray(Char4Tables &tables, const Context::Rule &rule)
1955 {
1956 if (rule.firstNonSpace == XmlBool::True) {
1957 appendTable(tables.charsFirstNonSpace);
1958 }
1959
1960 if (rule.column == 0) {
1961 appendTable(tables.charsColumn0);
1962 } else if (rule.column > 0) {
1963 appendTable(tables.charsColumnGreaterThan0[rule.column]);
1964 }
1965
1966 appendTable(tables.chars);
1967 }
1968
1969 // Removes Char4Tables::chars when the rule contains firstNonSpace or column
1970 void removeNonSpecialWhenSpecial()
1971 {
1972 if (m_size > 1) {
1973 --m_size;
1974 }
1975 }
1976
1977 /// Search RuleAndInclude associated with @p c.
1978 RuleAndInclude find(QChar c) const
1979 {
1980 for (int i = 0; i < m_size; ++i) {
1981 if (auto ruleAndInclude = m_charTables[i]->find(c)) {
1982 return ruleAndInclude;
1983 }
1984 }
1985 return RuleAndInclude{nullptr, nullptr};
1986 }
1987
1988 /// Search RuleAndInclude associated with the characters of @p s.
1989 /// \return an empty QList when at least one character is not found.
1991 {
1992 for (int i = 0; i < m_size; ++i) {
1993 auto result = m_charTables[i]->find(s);
1994 if (result.size()) {
1995 while (++i < m_size) {
1996 result.append(m_charTables[i]->find(s));
1997 }
1998 return result;
1999 }
2000 }
2001 return QList<RuleAndInclude>();
2002 }
2003
2004 /// Associates @p c with a rule.
2005 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
2006 {
2007 for (int i = 0; i < m_size; ++i) {
2008 m_charTables[i]->append(c, rule, includeRule);
2009 }
2010 }
2011
2012 /// Associates each character of @p s with a rule.
2013 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
2014 {
2015 for (int i = 0; i < m_size; ++i) {
2016 m_charTables[i]->append(s, rule, includeRule);
2017 }
2018 }
2019
2020 private:
2021 void appendTable(CharTable &t)
2022 {
2023 m_charTables[m_size] = &t;
2024 ++m_size;
2025 }
2026
2027 CharTable *m_charTables[3];
2028 int m_size = 0;
2029 };
2030
2031 struct ObservableRule {
2032 const Context::Rule *rule;
2033 const Context::Rule *includeRules;
2034
2035 bool hasResolvedIncludeRules() const
2036 {
2037 return rule == includeRules;
2038 }
2039 };
2040
2041 // Iterates over all the rules, including those in includedRules
2042 struct RuleIterator {
2043 RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule)
2044 : m_end(&endRule - rules.data())
2045 , m_rules(rules)
2046 {
2047 }
2048
2049 /// \return next rule or nullptr
2050 const Context::Rule *next()
2051 {
2052 // if in includedRules
2053 if (m_includedRules) {
2054 ++m_i2;
2055 if (m_i2 != m_includedRules->size()) {
2056 return (*m_includedRules)[m_i2];
2057 }
2058 ++m_i;
2059 m_includedRules = nullptr;
2060 }
2061
2062 // if is a includedRules
2063 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
2064 if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
2065 m_i2 = 0;
2066 m_includedRules = &m_rules[m_i].rule->includedRules;
2067 return (*m_includedRules)[m_i2];
2068 }
2069 ++m_i;
2070 }
2071
2072 if (m_i < m_end) {
2073 ++m_i;
2074 return m_rules[m_i - 1].rule;
2075 }
2076
2077 return nullptr;
2078 }
2079
2080 /// \return current IncludeRules or nullptr
2081 const Context::Rule *currentIncludeRules() const
2082 {
2083 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
2084 }
2085
2086 private:
2087 int m_i = 0;
2088 int m_i2 = 0;
2089 const int m_end;
2090 const QList<ObservableRule> &m_rules;
2091 const QList<const Context::Rule *> *m_includedRules = nullptr;
2092 };
2093
2094 // Dot regex container that satisfies firstNonSpace and column.
2095 struct DotRegex {
2096 /// Append a dot regex rule.
2097 void append(const Context::Rule &rule, const Context::Rule *includedRule)
2098 {
2099 auto array = extractDotRegexes(rule);
2100 if (array[0]) {
2101 *array[0] = {&rule, includedRule};
2102 }
2103 if (array[1]) {
2104 *array[1] = {&rule, includedRule};
2105 }
2106 }
2107
2108 /// Search dot regex which hides @p rule
2109 RuleAndInclude find(const Context::Rule &rule)
2110 {
2111 auto array = extractDotRegexes(rule);
2112 if (array[0]) {
2113 return *array[0];
2114 }
2115 if (array[1]) {
2116 return *array[1];
2117 }
2118 return RuleAndInclude{};
2119 }
2120
2121 private:
2122 using Array = std::array<RuleAndInclude *, 2>;
2123
2124 Array extractDotRegexes(const Context::Rule &rule)
2125 {
2126 Array ret{};
2127
2128 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
2129 ret[0] = &dotRegex;
2130 } else {
2131 if (rule.firstNonSpace == XmlBool::True) {
2132 ret[0] = &dotRegexFirstNonSpace;
2133 }
2134
2135 if (rule.column == 0) {
2136 ret[1] = &dotRegexColumn0;
2137 } else if (rule.column > 0) {
2138 ret[1] = &dotRegexColumnGreaterThan0[rule.column];
2139 }
2140 }
2141
2142 return ret;
2143 }
2144
2145 RuleAndInclude dotRegex{};
2146 RuleAndInclude dotRegexColumn0{};
2147 QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
2148 RuleAndInclude dotRegexFirstNonSpace{};
2149 };
2150
2151 bool success = true;
2152
2153 // characters of DetectChar/AnyChar
2154 Char4Tables detectChars;
2155 // characters of dynamic DetectChar
2156 Char4Tables dynamicDetectChars;
2157 // characters of LineContinue
2158 Char4Tables lineContinueChars;
2159
2160 Rule4 intRule{};
2161 Rule4 floatRule{};
2162 Rule4 hlCCharRule{};
2163 Rule4 hlCOctRule{};
2164 Rule4 hlCHexRule{};
2165 Rule4 hlCStringCharRule{};
2166 Rule4 detectIdentifierRule{};
2167
2168 // Contains includedRules and included includedRules
2170
2171 DotRegex dotRegex;
2172
2173 QList<ObservableRule> observedRules;
2174 observedRules.reserve(context.rules.size());
2175 for (const Context::Rule &rule : context.rules) {
2176 const Context::Rule *includeRule = nullptr;
2177 if (rule.type == Context::Rule::Type::IncludeRules) {
2178 auto *context = rule.context.context;
2179 if (context && context->isOnlyIncluded) {
2180 includeRule = &rule;
2181 }
2182 }
2183
2184 observedRules.push_back({&rule, includeRule});
2185 if (includeRule) {
2186 for (const Context::Rule *rule2 : rule.includedRules) {
2187 observedRules.push_back({rule2, includeRule});
2188 }
2189 }
2190 }
2191
2192 for (auto &observedRule : observedRules) {
2193 const Context::Rule &rule = *observedRule.rule;
2194 bool isUnreachable = false;
2195 QList<RuleAndInclude> unreachableBy;
2196
2197 // declare rule as unreachable if ruleAndInclude is not empty
2198 auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
2199 if (ruleAndInclude) {
2200 isUnreachable = true;
2201 unreachableBy.append(ruleAndInclude);
2202 }
2203 };
2204
2205 // declare rule as unreachable if ruleAndIncludes is not empty
2206 auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) {
2207 if (!ruleAndIncludes.isEmpty()) {
2208 isUnreachable = true;
2209 unreachableBy.append(ruleAndIncludes);
2210 }
2211 };
2212
2213 // check if rule2.firstNonSpace/column is compatible with those of rule
2214 auto isCompatible = [&rule](Context::Rule const &rule2) {
2215 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
2216 || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
2217 };
2218
2219 updateUnreachable1(dotRegex.find(rule));
2220
2221 switch (rule.type) {
2222 // checks if hidden by DetectChar/AnyChar
2223 // then add the characters to detectChars
2224 case Context::Rule::Type::AnyChar: {
2225 auto tables = CharTableArray(detectChars, rule);
2226 updateUnreachable2(tables.find(rule.string));
2227 tables.removeNonSpecialWhenSpecial();
2228 tables.append(rule.string, rule);
2229 break;
2230 }
2231
2232 // check if is hidden by DetectChar/AnyChar
2233 // then add the characters to detectChars or dynamicDetectChars
2234 case Context::Rule::Type::DetectChar: {
2235 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2236 auto tables = CharTableArray(chars4, rule);
2237 updateUnreachable1(tables.find(rule.char0));
2238 tables.removeNonSpecialWhenSpecial();
2239 tables.append(rule.char0, rule);
2240 break;
2241 }
2242
2243 // check if hidden by DetectChar/AnyChar
2244 // then add spaces characters to detectChars
2245 case Context::Rule::Type::DetectSpaces: {
2246 auto tables = CharTableArray(detectChars, rule);
2247 updateUnreachable2(tables.find(u" \t"_sv));
2248 tables.removeNonSpecialWhenSpecial();
2249 tables.append(u' ', rule);
2250 tables.append(u'\t', rule);
2251 break;
2252 }
2253
2254 // check if hidden by DetectChar/AnyChar
2255 case Context::Rule::Type::HlCChar:
2256 updateUnreachable1(CharTableArray(detectChars, rule).find(u'\''));
2257 updateUnreachable1(hlCCharRule.setRule(rule));
2258 break;
2259
2260 // check if hidden by DetectChar/AnyChar
2261 case Context::Rule::Type::HlCHex:
2262 updateUnreachable1(CharTableArray(detectChars, rule).find(u'0'));
2263 updateUnreachable1(hlCHexRule.setRule(rule));
2264 break;
2265
2266 // check if hidden by DetectChar/AnyChar
2267 case Context::Rule::Type::HlCOct:
2268 updateUnreachable1(CharTableArray(detectChars, rule).find(u'0'));
2269 updateUnreachable1(hlCOctRule.setRule(rule));
2270 break;
2271
2272 // check if hidden by DetectChar/AnyChar
2273 case Context::Rule::Type::HlCStringChar:
2274 updateUnreachable1(CharTableArray(detectChars, rule).find(u'\\'));
2275 updateUnreachable1(hlCStringCharRule.setRule(rule));
2276 break;
2277
2278 // check if hidden by DetectChar/AnyChar
2279 case Context::Rule::Type::Int:
2280 updateUnreachable2(CharTableArray(detectChars, rule).find(u"0123456789"_sv));
2281 updateUnreachable1(intRule.setRule(rule));
2282 break;
2283
2284 // check if hidden by DetectChar/AnyChar
2285 case Context::Rule::Type::Float:
2286 updateUnreachable2(CharTableArray(detectChars, rule).find(u"0123456789."_sv));
2287 updateUnreachable1(floatRule.setRule(rule));
2288 // check that Float is before Int
2289 updateUnreachable1(Rule4(intRule).setRule(rule));
2290 break;
2291
2292 // check if hidden by another DetectIdentifier rule
2293 case Context::Rule::Type::DetectIdentifier:
2294 updateUnreachable1(detectIdentifierRule.setRule(rule));
2295 break;
2296
2297 // check if hidden by DetectChar/AnyChar or another LineContinue
2298 case Context::Rule::Type::LineContinue: {
2299 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2300
2301 auto tables = CharTableArray(lineContinueChars, rule);
2302 updateUnreachable1(tables.find(rule.char0));
2303 tables.removeNonSpecialWhenSpecial();
2304 tables.append(rule.char0, rule);
2305 break;
2306 }
2307
2308 // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect
2309 case Context::Rule::Type::Detect2Chars:
2310 case Context::Rule::Type::RangeDetect:
2311 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2312 if (!isUnreachable) {
2313 RuleIterator ruleIterator(observedRules, observedRule);
2314 while (const auto *rulePtr = ruleIterator.next()) {
2315 if (isUnreachable) {
2316 break;
2317 }
2318 const auto &rule2 = *rulePtr;
2319 if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2320 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2321 }
2322 }
2323 }
2324 break;
2325
2326 case Context::Rule::Type::RegExpr: {
2327 if (rule.isDotRegex) {
2328 dotRegex.append(rule, nullptr);
2329 break;
2330 }
2331
2332 // check that `rule` does not have another RegExpr as a prefix
2333 RuleIterator ruleIterator(observedRules, observedRule);
2334 while (const auto *rulePtr = ruleIterator.next()) {
2335 if (isUnreachable) {
2336 break;
2337 }
2338 const auto &rule2 = *rulePtr;
2339 if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2340 && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) {
2341 bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2342 if (!add) {
2343 // \s.* (sanitized = \s) is considered hiding \s*\S
2344 // we check the quantifiers to see if this is the case
2345 auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2346 auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2347 auto c3 = rule2.sanitizedString.back().unicode();
2348 if (c3 == '*' || c3 == '?' || c3 == '+') {
2349 add = true;
2350 } else if (c1 == '*' || c1 == '?') {
2351 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2352 } else {
2353 add = true;
2354 }
2355 }
2356 if (add) {
2357 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2358 }
2359 }
2360 }
2361
2362 Q_FALLTHROUGH();
2363 }
2364 // check if a rule does not have another rule as a prefix
2365 case Context::Rule::Type::WordDetect:
2366 case Context::Rule::Type::StringDetect: {
2367 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix
2368 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2369 RuleIterator ruleIterator(observedRules, observedRule);
2370 while (const auto *rulePtr = ruleIterator.next()) {
2371 if (isUnreachable) {
2372 break;
2373 }
2374
2375 const auto &rule2 = *rulePtr;
2376 if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2377 continue;
2378 }
2379
2380 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2381 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2382 if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) {
2383 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2384 }
2385 }
2386 }
2387
2388 // string used for comparison and truncated from "dynamic" part
2389 QStringView s = rule.string;
2390
2391 // truncate to '%' with dynamic rules
2392 if (rule.dynamic == XmlBool::True) {
2393 static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)"));
2394 auto result = dynamicPosition.match(rule.string);
2395 s = s.sliced(0, result.capturedLength());
2396 // check if hidden by DetectChar/AnyChar
2397 if (s.size() + 2 <= rule.string.size()) {
2398 auto tables = CharTableArray(dynamicDetectChars, rule);
2399 updateUnreachable1(tables.find(s.data()[s.size() + 2]));
2400 }
2401 }
2402
2403 QString sanitizedRegex;
2404 // truncate to special character with RegExpr.
2405 // If regexp contains '|', `s` becomes empty.
2406 if (rule.type == Context::Rule::Type::RegExpr) {
2407 static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2408 static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\‍([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2409 const qsizetype result = regularChars.match(rule.string).capturedLength();
2410 const qsizetype pos = qMin(result, s.size());
2411 if (rule.string.indexOf(u'|', pos) < pos) {
2412 sanitizedRegex = rule.string.sliced(0, qMin(result, s.size()));
2413 sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1"));
2414 s = sanitizedRegex;
2415 } else {
2416 s = QStringView();
2417 }
2418 }
2419
2420 // check if hidden by DetectChar/AnyChar
2421 if (s.size() > 0) {
2422 auto t = CharTableArray(detectChars, rule);
2423 if (rule.insensitive != XmlBool::True) {
2424 updateUnreachable1(t.find(s[0]));
2425 } else {
2426 QChar c2[]{s[0].toLower(), s[0].toUpper()};
2427 updateUnreachable2(t.find(QStringView(c2, 2)));
2428 }
2429
2430 // StringDetect is a DetectChar
2431 if (rule.type == Context::Rule::Type::StringDetect && rule.string.size() == 1) {
2432 auto tables = CharTableArray(detectChars, rule);
2433 auto c = rule.string[0];
2434 if (rule.insensitive != XmlBool::True) {
2435 c = c.toLower();
2436 tables.removeNonSpecialWhenSpecial();
2437 tables.append(c, rule);
2438 c = c.toUpper();
2439 }
2440 tables.removeNonSpecialWhenSpecial();
2441 tables.append(c, rule);
2442 }
2443 }
2444
2445 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s
2446 if (s.size() > 0 && !isUnreachable) {
2447 // combination of uppercase and lowercase
2448 RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2449
2450 RuleIterator ruleIterator(observedRules, observedRule);
2451 while (const auto *rulePtr = ruleIterator.next()) {
2452 if (isUnreachable) {
2453 break;
2454 }
2455 const auto &rule2 = *rulePtr;
2456 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2457 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2458
2459 switch (rule2.type) {
2460 // check that it is not a detectChars prefix
2461 case Context::Rule::Type::Detect2Chars:
2462 if (isCompatible(rule2) && s.size() >= 2) {
2463 if (rule.insensitive != XmlBool::True) {
2464 if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2465 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2466 }
2467 } else {
2468 // when the string is case insensitive,
2469 // all 4 upper/lower case combinations must be found
2470 auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) {
2471 if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2472 x = {&rule2, ruleIterator.currentIncludeRules()};
2473 }
2474 };
2475 set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2476 set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2477 set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2478 set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2479
2480 if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2481 && detect2CharsInsensitives[3]) {
2482 isUnreachable = true;
2483 unreachableBy.append(detect2CharsInsensitives[0]);
2484 unreachableBy.append(detect2CharsInsensitives[1]);
2485 unreachableBy.append(detect2CharsInsensitives[2]);
2486 unreachableBy.append(detect2CharsInsensitives[3]);
2487 }
2488 }
2489 }
2490 break;
2491
2492 // check that it is not a StringDetect prefix
2493 case Context::Rule::Type::StringDetect:
2494 if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2495 && s.startsWith(rule2.string, caseSensitivity)) {
2496 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2497 }
2498 break;
2499
2500 // check if a WordDetect is hidden by another WordDetect
2501 case Context::Rule::Type::WordDetect:
2502 if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2503 && 0 == rule.string.compare(rule2.string, caseSensitivity)) {
2504 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2505 }
2506 break;
2507
2508 default:;
2509 }
2510 }
2511 }
2512
2513 break;
2514 }
2515
2516 // check if hidden by another keyword rule
2517 case Context::Rule::Type::keyword: {
2518 RuleIterator ruleIterator(observedRules, observedRule);
2519 while (const auto *rulePtr = ruleIterator.next()) {
2520 if (isUnreachable) {
2521 break;
2522 }
2523 const auto &rule2 = *rulePtr;
2524 if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2525 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2526 }
2527 }
2528 // TODO check that all keywords are hidden by another rules
2529 break;
2530 }
2531
2532 // add characters in those used but without checking if they are already.
2533 // <DetectChar char="}" />
2534 // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked
2535 // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added
2536 // <DetectChar char="{" /> <- hidden by previous rule
2537 case Context::Rule::Type::IncludeRules:
2538 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2539 break;
2540 }
2541
2542 if (auto &ruleAndInclude = includeContexts[rule.context.context]) {
2543 updateUnreachable1(ruleAndInclude);
2544 } else {
2545 ruleAndInclude.rule = &rule;
2546 }
2547
2548 for (const auto *rulePtr : rule.includedIncludeRules) {
2549 includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule});
2550 }
2551
2552 if (observedRule.includeRules) {
2553 break;
2554 }
2555
2556 for (const auto *rulePtr : rule.includedRules) {
2557 const auto &rule2 = *rulePtr;
2558 switch (rule2.type) {
2559 case Context::Rule::Type::AnyChar: {
2560 auto tables = CharTableArray(detectChars, rule2);
2561 tables.removeNonSpecialWhenSpecial();
2562 tables.append(rule2.string, rule2, &rule);
2563 break;
2564 }
2565
2566 case Context::Rule::Type::DetectChar: {
2567 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2568 auto tables = CharTableArray(chars4, rule2);
2569 tables.removeNonSpecialWhenSpecial();
2570 tables.append(rule2.char0, rule2, &rule);
2571 break;
2572 }
2573
2574 case Context::Rule::Type::DetectSpaces: {
2575 auto tables = CharTableArray(detectChars, rule2);
2576 tables.removeNonSpecialWhenSpecial();
2577 tables.append(u' ', rule2, &rule);
2578 tables.append(u'\t', rule2, &rule);
2579 break;
2580 }
2581
2582 case Context::Rule::Type::HlCChar:
2583 hlCCharRule.setRule(rule2, &rule);
2584 break;
2585
2586 case Context::Rule::Type::HlCHex:
2587 hlCHexRule.setRule(rule2, &rule);
2588 break;
2589
2590 case Context::Rule::Type::HlCOct:
2591 hlCOctRule.setRule(rule2, &rule);
2592 break;
2593
2594 case Context::Rule::Type::HlCStringChar:
2595 hlCStringCharRule.setRule(rule2, &rule);
2596 break;
2597
2598 case Context::Rule::Type::Int:
2599 intRule.setRule(rule2, &rule);
2600 break;
2601
2602 case Context::Rule::Type::Float:
2603 floatRule.setRule(rule2, &rule);
2604 break;
2605
2606 case Context::Rule::Type::LineContinue: {
2607 auto tables = CharTableArray(lineContinueChars, rule2);
2608 tables.removeNonSpecialWhenSpecial();
2609 tables.append(rule2.char0, rule2, &rule);
2610 break;
2611 }
2612
2613 case Context::Rule::Type::RegExpr:
2614 if (rule2.isDotRegex) {
2615 dotRegex.append(rule2, &rule);
2616 }
2617 break;
2618
2619 case Context::Rule::Type::StringDetect: {
2620 // StringDetect is a DetectChar
2621 if (rule2.string.size() == 1 || (rule2.string.size() == 2 && rule2.dynamic == XmlBool::True)) {
2622 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2623 auto tables = CharTableArray(chars4, rule2);
2624 tables.removeNonSpecialWhenSpecial();
2625 tables.append(rule2.string.back(), rule2, &rule);
2626 }
2627 break;
2628 }
2629
2630 case Context::Rule::Type::WordDetect:
2631 case Context::Rule::Type::Detect2Chars:
2632 case Context::Rule::Type::IncludeRules:
2633 case Context::Rule::Type::DetectIdentifier:
2634 case Context::Rule::Type::keyword:
2635 case Context::Rule::Type::Unknown:
2636 case Context::Rule::Type::RangeDetect:
2637 break;
2638 }
2639 }
2640 break;
2641
2642 case Context::Rule::Type::Unknown:
2643 break;
2644 }
2645
2646 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2647 auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2648 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2649 unreachableIncludedRule.unreachableBy.append(unreachableBy);
2650 } else {
2651 unreachableIncludedRule.alwaysUnreachable = false;
2652 }
2653 } else if (isUnreachable) {
2654 success = false;
2655 QString message;
2656 message.reserve(128);
2657 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
2658 message += u"line "_sv;
2659 if (ruleAndInclude.includeRules) {
2660 message += QString::number(ruleAndInclude.includeRules->line);
2661 message += u" [by '"_sv;
2662 message += ruleAndInclude.includeRules->context.name;
2663 message += u"' line "_sv;
2664 message += QString::number(ruleAndInclude.rule->line);
2665 if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2666 message += u" ("_sv;
2667 message += ruleAndInclude.rule->filename;
2668 message += u')';
2669 }
2670 message += u']';
2671 } else {
2672 message += QString::number(ruleAndInclude.rule->line);
2673 }
2674 message += u", "_sv;
2675 }
2676 message.chop(2);
2677 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message;
2678 }
2679 }
2680
2681 return success;
2682 }
2683
2684 //! Proposes to merge certain rule sequences
2685 //! - several DetectChar/AnyChar into AnyChar
2686 //! - several RegExpr into one RegExpr
2687 bool suggestRuleMerger(const QString &filename, const Context &context) const
2688 {
2689 bool success = true;
2690
2691 if (context.rules.isEmpty()) {
2692 return success;
2693 }
2694
2695 auto it = context.rules.begin();
2696 const auto end = context.rules.end() - 1;
2697
2698 for (; it < end; ++it) {
2699 const auto &rule1 = *it;
2700 const auto &rule2 = it[1];
2701
2702 auto isCommonCompatible = [&] {
2703 if (rule1.lookAhead != rule2.lookAhead) {
2704 return false;
2705 }
2706 // ignore attribute when lookAhead is true
2707 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2708 return false;
2709 }
2710 // clang-format off
2711 return rule1.beginRegion == rule2.beginRegion
2712 && rule1.endRegion == rule2.endRegion
2713 && rule1.firstNonSpace == rule2.firstNonSpace
2714 && rule1.context.context == rule2.context.context
2715 && rule1.context.popCount == rule2.context.popCount;
2716 // clang-format on
2717 };
2718
2719 switch (rule1.type) {
2720 // request to merge StringDetect with AnyChar
2721 case Context::Rule::Type::StringDetect:
2722 if (rule1.string.size() != 1 || rule1.dynamic == XmlBool::True) {
2723 break;
2724 }
2725 Q_FALLTHROUGH();
2726 // request to merge AnyChar/DetectChar
2727 case Context::Rule::Type::AnyChar:
2728 case Context::Rule::Type::DetectChar:
2729 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar
2730 || (rule2.type == Context::Rule::Type::StringDetect && rule2.dynamic != XmlBool::True && rule2.string.size() == 1))
2731 && isCommonCompatible() && rule1.column == rule2.column) {
2732 qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule";
2733 success = false;
2734 }
2735 break;
2736
2737 // request to merge multiple RegExpr
2738 case Context::Rule::Type::RegExpr:
2739 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2740 && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2741 qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule";
2742 success = false;
2743 }
2744 break;
2745
2746 case Context::Rule::Type::DetectSpaces:
2747 case Context::Rule::Type::HlCChar:
2748 case Context::Rule::Type::HlCHex:
2749 case Context::Rule::Type::HlCOct:
2750 case Context::Rule::Type::HlCStringChar:
2751 case Context::Rule::Type::Int:
2752 case Context::Rule::Type::Float:
2753 case Context::Rule::Type::LineContinue:
2754 case Context::Rule::Type::WordDetect:
2755 case Context::Rule::Type::Detect2Chars:
2756 case Context::Rule::Type::IncludeRules:
2757 case Context::Rule::Type::DetectIdentifier:
2758 case Context::Rule::Type::keyword:
2759 case Context::Rule::Type::Unknown:
2760 case Context::Rule::Type::RangeDetect:
2761 break;
2762 }
2763 }
2764
2765 return success;
2766 }
2767
2768 //! Initialize the referenced context (ContextName::context)
2769 //! Some input / output examples are:
2770 //! - "#stay" -> ""
2771 //! - "#pop" -> ""
2772 //! - "Comment" -> "Comment"
2773 //! - "#pop!Comment" -> "Comment"
2774 //! - "##ISO C++" -> ""
2775 //! - "Comment##ISO C++"-> "Comment" in ISO C++
2776 void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line)
2777 {
2778 QStringView name = contextName.name;
2779 if (name.isEmpty()) {
2780 contextName.stay = true;
2781 } else if (name.startsWith(u"#stay"_sv)) {
2782 contextName.stay = true;
2783 if (name.size() > 5) {
2784 qWarning() << definition.filename << "line" << line << "invalid context in" << context.name;
2785 m_success = false;
2786 }
2787 } else {
2788 while (name.startsWith(u"#pop"_sv)) {
2789 name = name.sliced(4);
2790 ++contextName.popCount;
2791 }
2792
2793 if (contextName.popCount && !name.isEmpty()) {
2794 if (name.startsWith(u'!') && name.size() > 1) {
2795 name = name.sliced(1);
2796 } else {
2797 qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name;
2798 m_success = false;
2799 }
2800 }
2801
2802 if (!name.isEmpty()) {
2803 const int idx = name.indexOf(u"##"_sv);
2804 if (idx == -1) {
2805 auto it = definition.contexts.find(name.toString());
2806 if (it != definition.contexts.end()) {
2807 contextName.context = &*it;
2808 }
2809 } else {
2810 auto defName = name.sliced(idx + 2);
2811 auto it = m_definitions.find(defName.toString());
2812 if (it != m_definitions.end()) {
2813 auto listName = name.sliced(0, idx).toString();
2814 definition.referencedDefinitions.insert(&*it);
2815 auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName);
2816 if (ctxIt != it->contexts.end()) {
2817 contextName.context = &*ctxIt;
2818 }
2819 } else {
2820 qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name;
2821 m_success = false;
2822 }
2823 }
2824
2825 if (!contextName.context) {
2826 qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name;
2827 m_success = false;
2828 }
2829 }
2830 }
2831 }
2832
2833 QMap<QString, Definition> m_definitions;
2835 Definition *m_currentDefinition = nullptr;
2836 Keywords *m_currentKeywords = nullptr;
2837 Context *m_currentContext = nullptr;
2838 // xml reader variable
2839 //@{
2840 QString m_textContent;
2841 bool m_inKeywordItem = false;
2842 //@}
2843 bool m_success = true;
2844};
2845
2846class HlCompressor
2847{
2848public:
2849 HlCompressor(const QString &kateVersion)
2850 : m_kateVersion(kateVersion)
2851 {
2852 m_hasElems.push_back(true);
2853 }
2854
2855 const QString &compressedXML() const
2856 {
2857 return m_data;
2858 }
2859
2860 /**
2861 * Reduce xml space by removing what is superfluous.
2862 * - transforms boolean values into 0 or 1.
2863 * - remove unused attributes.
2864 * - remove spaces and comments.
2865 * - remove context attributes referring to #stay (because this is the default).
2866 * - replace Detect2Chars with StringDetect (String="xy" is shorter than char="x" char1="y").
2867 * - sort contexts by frequency of use to accelerate their search during loading.
2868 */
2869 void processElement(const QXmlStreamReader &xml)
2870 {
2871 switch (xml.tokenType()) {
2873 closePreviousOpenTag(m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data);
2874 m_hasElems.push_back(false);
2875
2876 const auto tagName = xml.name();
2877 if (tagName == u"contexts"_sv) {
2878 m_inContexts = true;
2879 m_data += u"<contexts"_sv;
2880 } else if (m_inContexts) {
2881 Context &ctx = (m_contexts.empty() || tagName == u"context"_sv) ? m_contexts.emplace_back() : m_contexts.back();
2882 QString &out = ctx.data;
2883 const bool isDetect2Chars = tagName == u"Detect2Chars"_sv;
2884 out += u'<' % (isDetect2Chars ? u"StringDetect"_sv : tagName);
2885
2886 auto attrs = xml.attributes();
2887 sortAttributes(attrs);
2888 for (const auto &attr : attrs) {
2889 const auto attrName = attr.name();
2890 auto value = attr.value();
2891 // transform Detect2Chars char and char1 attributes to StringDetect String attribute
2892 if (isDetect2Chars && (attrName == u"char"_sv || attrName == u"char1"_sv)) {
2893 if (attrName == u"char"_sv) {
2894 const auto ch0 = value;
2895 const auto ch1 = attrs.value(u"char1"_sv);
2896 QChar chars[]{ch0.isEmpty() ? u' ' : ch0[0], ch1.isEmpty() ? u' ' : ch1[0]};
2897 writeXmlAttribute(out, u"String"_sv, QStringView(chars, 2), tagName);
2898 }
2899 } else if (attrName == u"context"_sv || attrName == u"lineEndContext"_sv || attrName == u"fallthroughContext"_sv
2900 || attrName == u"lineEmptyContext"_sv) {
2901 // ignore #stay context because this is the default
2902 if (value != u"#stay"_sv) {
2903 writeXmlAttribute(out, attrName, value, tagName);
2904
2905 /*
2906 * Extract context name and increment context counter
2907 */
2908 bool hasPop = false;
2909 while (value.startsWith(u"#pop"_sv)) {
2910 hasPop = true;
2911 value = value.sliced(4);
2912 }
2913 if (hasPop && !value.isEmpty()) {
2914 value = value.sliced(1);
2915 }
2916 if (!value.isEmpty() && -1 == value.indexOf(u"##"_sv)) {
2917 m_contextRefs[value.toString()]++;
2918 }
2919 }
2920 } else if (tagName == u"LineContinue"_sv && attrName == u"char"_sv && value == u"\\") {
2921 // ignore char="\\" with LineContinue
2922 } else {
2923 if (attrName == u"name"_sv) {
2924 ctx.name = value.toString();
2925 }
2926 writeXmlAttribute(out, attrName, value, tagName);
2927 }
2928 }
2929 } else {
2930 m_data += u'<' % tagName;
2931 const auto attrs = xml.attributes();
2932 for (const auto &attr : attrs) {
2933 auto name = attr.name();
2934 auto value = (name == u"kateversion") ? QStringView(m_kateVersion) : attr.value();
2935 writeXmlAttribute(m_data, name, value, tagName);
2936 }
2937 }
2938 break;
2939 }
2940
2942 const auto name = xml.name();
2943 if (m_inContexts && !m_contexts.empty() && name == u"contexts"_sv) {
2944 m_inContexts = false;
2945 // sorting contexts by the most used (ignore first context)
2946 std::sort(m_contexts.begin() + 1, m_contexts.end(), [&](auto &ctx1, auto &ctx2) {
2947 auto i1 = m_contextRefs.value(ctx1.name);
2948 auto i2 = m_contextRefs.value(ctx2.name);
2949 if (i1 != i2) {
2950 return i1 > i2;
2951 }
2952 // for a reproducible build, contexts with the same number of uses are sorted by name
2953 return ctx1.name < ctx2.name;
2954 });
2955 for (const auto &ctx : m_contexts) {
2956 m_data += ctx.data;
2957 }
2958 }
2959
2960 QString &out = m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data;
2961 if (m_hasElems.back()) {
2962 out += u"</"_sv % name % u'>';
2963 } else {
2964 out += u"/>"_sv;
2965 }
2966 m_hasElems.pop_back();
2967 break;
2968 }
2969
2972 if (!m_inContexts && !xml.isWhitespace()) {
2973 closePreviousOpenTag(m_data);
2974 writeXmlText(m_data, xml.text());
2975 }
2976 break;
2977
2978 default:;
2979 }
2980 }
2981
2982private:
2983 void closePreviousOpenTag(QString &out)
2984 {
2985 if (!m_hasElems.back()) {
2986 m_hasElems.back() = true;
2987 out += u'>';
2988 }
2989 }
2990
2991 /**
2992 * Write \p text escaping special characters.
2993 */
2994 static void writeXmlText(QString &out, QStringView text, bool escapeDQ = false)
2995 {
2996 for (const QChar &c : text) {
2997 if (c == u'<') {
2998 out += u"&lt;"_sv;
2999 } else if (c == u'&') {
3000 out += u"&amp;"_sv;
3001 } else if (escapeDQ && c == u'"') {
3002 out += u"&#34;"_sv;
3003 } else if (c == u'\t') {
3004 // non-space whitespace character in an attribute is remplaced with space...
3005 out += u"&#9;"_sv;
3006 } else {
3007 out += c;
3008 }
3009 }
3010 }
3011
3012 /**
3013 * Write attribut in \p out.
3014 * Booleans are converted to 0, 1 or ignored if this corresponds to the default value.
3015 * Values will be written with either double quotes or single quotes,
3016 * depending on which takes up the least space
3017 */
3018 static void writeXmlAttribute(QString &out, QStringView attrName, QStringView value, QStringView tagName)
3019 {
3020 enum class DefaultBool {
3021 // default value is false
3022 False,
3023 // default value is true
3024 True,
3025 // manipulate as a tribool whose attribute absence is equivalent to None
3026 None,
3027 // not used
3028 Ignored,
3029 // default value is false, but None for <keyword>
3030 FalseOrKeywordTag,
3031 // default value is true, but depends on another value for <keywords>
3032 TrueOrKeywordsTag,
3033 // default is false, but ignored in <context>
3034 DynamicAttr,
3035 };
3036 static const QHash<QStringView, DefaultBool> booleanAttrs({
3037 {u"fallthrough"_sv, DefaultBool::Ignored},
3038 {u"dynamic"_sv, DefaultBool::DynamicAttr},
3039 {u"hidden"_sv, DefaultBool::False},
3040 {u"indentationsensitive"_sv, DefaultBool::False},
3041 {u"noIndentationBasedFolding"_sv, DefaultBool::False},
3042 {u"lookAhead"_sv, DefaultBool::False},
3043 {u"firstNonSpace"_sv, DefaultBool::False},
3044 {u"insensitive"_sv, DefaultBool::FalseOrKeywordTag},
3045 {u"minimal"_sv, DefaultBool::False},
3046 {u"includeAttrib"_sv, DefaultBool::False},
3047 {u"italic"_sv, DefaultBool::None},
3048 {u"bold"_sv, DefaultBool::None},
3049 {u"underline"_sv, DefaultBool::None},
3050 {u"strikeOut"_sv, DefaultBool::None},
3051 {u"spellChecking"_sv, DefaultBool::True},
3052 {u"casesensitive"_sv, DefaultBool::TrueOrKeywordsTag},
3053 {u"ignored"_sv, DefaultBool::Ignored},
3054 });
3055
3056 auto it = booleanAttrs.find(attrName);
3057 // convert boolean value
3058 if (it != booleanAttrs.end()) {
3059 bool b = KSyntaxHighlighting::Xml::attrToBool(value);
3060 bool ignoreAttr = false;
3061 switch (*it) {
3062 case DefaultBool::Ignored:
3063 ignoreAttr = true;
3064 break;
3065 case DefaultBool::TrueOrKeywordsTag:
3066 ignoreAttr = (tagName == u"keywords"_sv) ? false : b;
3067 break;
3068 case DefaultBool::True:
3069 ignoreAttr = b;
3070 break;
3071 case DefaultBool::FalseOrKeywordTag:
3072 ignoreAttr = (tagName == u"keyword"_sv) ? false : !b;
3073 break;
3074 case DefaultBool::DynamicAttr:
3075 ignoreAttr = (tagName == u"context"_sv) || !b;
3076 break;
3077 case DefaultBool::False:
3078 ignoreAttr = !b;
3079 break;
3080 case DefaultBool::None:
3081 ignoreAttr = false;
3082 break;
3083 }
3084 if (!ignoreAttr) {
3085 out += u' ' % attrName % u"=\""_sv % (b ? u'1' : u'0') % u'"';
3086 }
3087 } else {
3088 const bool hasDQ = value.contains(u'"');
3089 // attribute in double quotes when the value does not contain " or contains " and '
3090 if (!hasDQ || value.contains(u'\'')) {
3091 out += u' ' % attrName % u"=\""_sv;
3092 writeXmlText(out, value, hasDQ);
3093 out += u'"';
3094 // attribute in single quotes because the value contains "
3095 } else {
3096 out += u' ' % attrName % u"='"_sv;
3097 writeXmlText(out, value);
3098 out += u'\'';
3099 }
3100 }
3101 }
3102
3103 /**
3104 * Sort attributes for better compression by rcc.
3105 */
3106 static void sortAttributes(QXmlStreamAttributes &attrs)
3107 {
3108 static const QHash<QStringView, int> priorityAttrs({
3109 // context and rule
3110 {u"attribute"_sv, 5},
3111
3112 // context and itemData
3113 {u"name"_sv, 4},
3114
3115 // context
3116 {u"noIndentationBasedFolding"_sv, 11},
3117 {u"lineEndContext"_sv, 9},
3118 {u"lineEmptyContext"_sv, 8},
3119 {u"fallthroughContext"_sv, 7},
3120
3121 // rule
3122 {u"lookAhead"_sv, 100},
3123 {u"firstNonSpace"_sv, 99},
3124 {u"dynamic"_sv, 98},
3125 {u"minimal"_sv, 97},
3126 {u"includeAttrib"_sv, 96},
3127 {u"insensitive"_sv, 95},
3128 {u"column"_sv, 50},
3129 {u"beginRegion"_sv, 40},
3130 {u"endRegion"_sv, 41},
3131 {u"weakDeliminator"_sv, 31},
3132 {u"additionalDeliminator"_sv, 30},
3133 {u"context"_sv, 20},
3134 {u"String"_sv, 2},
3135 {u"char"_sv, 2},
3136
3137 // itemData
3138 {u"strikeOut"_sv, 100},
3139 {u"underline"_sv, 99},
3140 {u"italic"_sv, 98},
3141 {u"bold"_sv, 97},
3142 {u"spellChecking"_sv, 96},
3143 {u"defStyleNum"_sv, 95},
3144 {u"color"_sv, 94},
3145 {u"backgroundColor"_sv, 93},
3146 {u"selBackgroundColor"_sv, 92},
3147 {u"selColor"_sv, 91},
3148 });
3149 std::sort(attrs.begin(), attrs.end(), [](auto &attr1, auto &attr2) {
3150 auto i1 = priorityAttrs.value(attr1.name());
3151 auto i2 = priorityAttrs.value(attr2.name());
3152 if (i1 != i2) {
3153 return i1 < i2;
3154 }
3155 return attr1.name() < attr2.name();
3156 });
3157 }
3158
3159 struct Context {
3160 QString name;
3161 QString data;
3162 };
3163 QString m_data = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE language>"_s;
3164 std::vector<Context> m_contexts;
3165 QHash<QString, int> m_contextRefs;
3166 QVarLengthArray<bool, 8> m_hasElems;
3167 QString m_kateVersion;
3168 bool m_inContexts = false;
3169};
3170
3171void printFileError(const QFile &file)
3172{
3173 qWarning() << "Failed to open" << file.fileName() << "-" << file.errorString();
3174}
3175
3176void printXmlError(const QString &fileName, const QXmlStreamReader &xml)
3177{
3178 qWarning() << fileName << "-" << xml.errorString() << "@ offset" << xml.characterOffset();
3179};
3180
3181QStringList readListing(const QString &fileName)
3182{
3183 QFile file(fileName);
3184 if (!file.open(QIODevice::ReadOnly)) {
3185 printFileError(file);
3186 return QStringList();
3187 }
3188
3189 QXmlStreamReader xml(&file);
3190 QStringList listing;
3191 while (!xml.atEnd()) {
3192 xml.readNext();
3193
3194 // add only .xml files, no .json or stuff
3195 if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) {
3196 listing.append(xml.text().toString());
3197 }
3198 }
3199
3200 if (xml.hasError()) {
3201 printXmlError(fileName, xml);
3202 listing.clear();
3203 }
3204
3205 return listing;
3206}
3207
3208/**
3209 * check if the "extensions" attribute have valid wildcards
3210 * @param extensions extensions string to check
3211 * @return valid?
3212 */
3213bool checkExtensions(QStringView extensions)
3214{
3215 // get list of extensions
3216 const QList<QStringView> extensionParts = extensions.split(u';', Qt::SkipEmptyParts);
3217
3218 // ok if empty
3219 if (extensionParts.isEmpty()) {
3220 return true;
3221 }
3222
3223 // check that only valid wildcard things are inside the parts
3224 for (const auto &extension : extensionParts) {
3225 for (const auto c : extension) {
3226 // eat normal things
3227 if (c.isDigit() || c.isLetter()) {
3228 continue;
3229 }
3230
3231 // allow some special characters
3232 if (c == u'.' || c == u'-' || c == u'_' || c == u'+') {
3233 continue;
3234 }
3235
3236 // only allowed wildcard things: '?' and '*'
3237 if (c == u'?' || c == u'*') {
3238 continue;
3239 }
3240
3241 qWarning() << "invalid character" << c << "seen in extensions wildcard";
3242 return false;
3243 }
3244 }
3245
3246 // all checks passed
3247 return true;
3248}
3249
3250struct CompressedFile {
3251 QString fileName;
3252 QString xmlData;
3253};
3254
3255}
3256
3257int main(int argc, char *argv[])
3258{
3259 // get app instance
3260 QCoreApplication app(argc, argv);
3261
3262 // ensure enough arguments are passed
3263 if (app.arguments().size() < 4) {
3264 return 1;
3265 }
3266
3267#ifdef HAS_XERCESC
3268 // care for proper init and cleanup
3269 XMLPlatformUtils::Initialize();
3270 auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate);
3271
3272 /*
3273 * parse XSD first time and cache it
3274 */
3275 XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager);
3276
3277 // create parser for the XSD
3278 CustomXMLValidator parser(&xsd);
3279
3280 // load grammar into the pool, on error just abort
3281 const auto xsdFile = app.arguments().at(2);
3282 if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || parser.eh.failed()) {
3283 qWarning("Failed to parse XSD %s: %s", qPrintable(xsdFile), qPrintable(parser.messages));
3284 return 2;
3285 }
3286
3287 // lock the pool, no later modifications wanted!
3288 xsd.lockPool();
3289#endif
3290
3291 const QString hlFilenamesListing = app.arguments().value(3);
3292 if (hlFilenamesListing.isEmpty()) {
3293 return 1;
3294 }
3295
3296 QStringList hlFilenames = readListing(hlFilenamesListing);
3297 if (hlFilenames.isEmpty()) {
3298 qWarning("Failed to read %s", qPrintable(hlFilenamesListing));
3299 return 3;
3300 }
3301
3302 // text attributes
3303 const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("alternativeNames") << QStringLiteral("section")
3304 << QStringLiteral("mimetype") << QStringLiteral("extensions") << QStringLiteral("style")
3305 << QStringLiteral("author") << QStringLiteral("license") << QStringLiteral("indenter");
3306
3307 // index all given highlightings
3308 HlFilesChecker filesChecker;
3309 QVariantMap hls;
3310 int anyError = 0;
3311 std::vector<CompressedFile> compressedFiles;
3312 for (const QString &hlFilename : std::as_const(hlFilenames)) {
3313 QFile hlFile(hlFilename);
3314 if (!hlFile.open(QIODevice::ReadOnly)) {
3315 printFileError(hlFile);
3316 anyError = 3;
3317 continue;
3318 }
3319
3320#ifdef HAS_XERCESC
3321 // create parser
3322 CustomXMLValidator parser(&xsd);
3323
3324 // parse the XML file
3325 parser.parse((const char16_t *)hlFile.fileName().utf16());
3326
3327 // report issues
3328 if (parser.eh.failed()) {
3329 qWarning("Failed to validate XML %s: %s", qPrintable(hlFile.fileName()), qPrintable(parser.messages));
3330 anyError = 4;
3331 continue;
3332 }
3333#endif
3334
3335 // read the needed attributes from toplevel language tag
3336 hlFile.reset();
3337 QXmlStreamReader xml(&hlFile);
3338 if (xml.readNextStartElement()) {
3339 if (xml.name() != QLatin1String("language")) {
3340 anyError = 5;
3341 continue;
3342 }
3343 } else {
3344 anyError = 6;
3345 continue;
3346 }
3347
3348 // map to store hl info
3349 QVariantMap hl;
3350
3351 // transfer text attributes
3352 for (const QString &attribute : std::as_const(textAttributes)) {
3353 hl[attribute] = xml.attributes().value(attribute).toString();
3354 }
3355
3356 // check if extensions have the right format
3357 if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) {
3358 qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString();
3359 anyError = 23;
3360 }
3361
3362 // numerical attributes
3363 hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt();
3364 hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt();
3365
3366 // add boolean one
3367 hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden")));
3368
3369 // keep some strings as UTF-8 for faster translations
3370 hl[QStringLiteral("nameUtf8")] = hl[QStringLiteral("name")].toString().toUtf8();
3371 hl[QStringLiteral("sectionUtf8")] = hl[QStringLiteral("section")].toString().toUtf8();
3372
3373 // remember hl
3374 hls[QFileInfo(hlFile).fileName()] = hl;
3375
3376 const QStringView kateversion = xml.attributes().value(QStringLiteral("kateversion"));
3377 const QString hlName = hl[QStringLiteral("name")].toString();
3378 const QString hlAlternativeNames = hl[QStringLiteral("alternativeNames")].toString();
3379
3380 filesChecker.setDefinition(kateversion, hlFilename, hlName, hlAlternativeNames.split(u';', Qt::SkipEmptyParts));
3381
3382 // As the compressor removes "fallthrough" attribute which is required with
3383 // "fallthroughContext" before the 5.62 version, the minimum version is
3384 // automatically increased
3385 HlCompressor compressor((filesChecker.currentVersion() < KateVersion{5, 62}) ? u"5.62"_s : kateversion.toString());
3386 compressor.processElement(xml);
3387
3388 // scan for broken regex or keywords with spaces
3389 while (!xml.atEnd()) {
3390 xml.readNext();
3391 filesChecker.processElement(xml);
3392 compressor.processElement(xml);
3393 }
3394
3395 if (xml.hasError()) {
3396 anyError = 33;
3397 printXmlError(hlFilename, xml);
3398 }
3399
3400 compressedFiles.emplace_back(CompressedFile{
3401 QFileInfo(hlFilename).fileName(),
3402 compressor.compressedXML(),
3403 });
3404 }
3405
3406 filesChecker.resolveContexts();
3407
3408 if (!filesChecker.check()) {
3409 anyError = 7;
3410 }
3411
3412 // bail out if any problem was seen
3413 if (anyError) {
3414 return anyError;
3415 }
3416
3417 // check compressed file
3418 HlFilesChecker filesChecker2;
3419 const QString compressedDir = app.arguments().at(4) + u"/"_sv;
3420 for (const auto &compressedFile : std::as_const(compressedFiles)) {
3421 const auto outFileName = compressedDir + compressedFile.fileName;
3422 auto utf8Data = compressedFile.xmlData.toUtf8();
3423
3424#ifdef HAS_XERCESC
3425 // create parser
3426 CustomXMLValidator parser(&xsd);
3427
3428 auto utf8Filename = outFileName.toUtf8();
3429 utf8Filename.append('\0');
3430 // parse the XML file
3431 MemBufInputSource membuf(reinterpret_cast<const XMLByte *>(utf8Data.constData()), utf8Data.size(), utf8Filename.data());
3432
3433 // report issues
3434 if (parser.eh.failed()) {
3435 qWarning("Failed to validate XML %s: %s", qPrintable(outFileName), qPrintable(parser.messages));
3436 return 8;
3437 }
3438#endif
3439
3440 QBuffer buffer(&utf8Data);
3441 buffer.open(QBuffer::ReadOnly);
3442 QXmlStreamReader xml(&buffer);
3443 // scan for broken file
3444 while (!xml.atEnd()) {
3445 if (xml.readNext() == QXmlStreamReader::TokenType::StartElement && xml.name() == u"language"_sv) {
3446 const auto attrs = xml.attributes();
3447 const auto version = attrs.value(u"kateversion"_sv);
3448 const QString hlName = attrs.value(u"name"_sv).toString();
3449 const QString hlAlternativeNames = attrs.value(u"alternativeNames"_sv).toString();
3450 filesChecker2.setDefinition(version, outFileName, hlName, hlAlternativeNames.split(u';', Qt::SkipEmptyParts));
3451 }
3452 filesChecker2.processElement(xml);
3453 }
3454
3455 if (xml.hasError()) {
3456 printXmlError(outFileName, xml);
3457 return 9;
3458 }
3459
3460 // create outfile, after all has worked!
3461 QFile outFile(outFileName);
3462 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
3463 return 10;
3464 }
3465 outFile.write(utf8Data);
3466 }
3467
3468 filesChecker2.resolveContexts();
3469
3470 // bail out if any problem was seen
3471 if (!filesChecker2.check()) {
3472 return 11;
3473 }
3474
3475 // create outfile, after all has worked!
3476 QFile outFile(app.arguments().at(1));
3477 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
3478 return 12;
3479 }
3480
3481 // write out json
3482 outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor());
3483
3484 // be done
3485 return 0;
3486}
AKONADI_MIME_EXPORT const char Ignored[]
Type type(const QSqlDatabase &db)
char * toString(const EngineQuery &query)
KDB_EXPORT KDbVersionInfo version()
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
KIOCORE_EXPORT void add(const QString &fileClass, const QString &directory)
QAction * replace(const QObject *recvr, const char *slot, QObject *parent)
QString name(StandardAction id)
QAction * next(const QObject *recvr, const char *slot, QObject *parent)
QAction * find(const QObject *recvr, const char *slot, QObject *parent)
const QList< QKeySequence > & end()
KTEXTEDITOR_EXPORT size_t qHash(KTextEditor::Cursor cursor, size_t seed=0) noexcept
bool operator<(const PosRange< Trait > &l, const PosRange< Trait > &r)
bool operator==(const StyleDelim &l, const StyleDelim &r)
QCborValue fromVariant(const QVariant &variant)
bool isDigit(char32_t ucs4)
bool isLetter(char32_t ucs4)
char32_t toLower(char32_t ucs4)
char32_t toUpper(char32_t ucs4)
char16_t & unicode()
virtual QString fileName() const const override
bool open(FILE *fh, OpenMode mode, FileHandleFlags handleFlags)
QString fileName() const const
iterator find(const Key &key)
QString errorString() const const
void append(QList< T > &&value)
iterator begin()
void clear()
iterator end()
bool isEmpty() const const
void push_back(parameter_type value)
void reserve(qsizetype size)
qsizetype size() const const
iterator end()
iterator find(const Key &key)
iterator insert(const Key &key, const T &value)
QString errorString() const const
bool isValid() const const
QString pattern() const const
qsizetype patternErrorOffset() const const
void clear()
bool contains(const QSet< T > &other) const const
iterator erase(const_iterator pos)
iterator insert(const T &value)
qsizetype size() const const
const QChar at(qsizetype position) const const
QChar & back()
void chop(qsizetype n)
QString fromUtf16(const char16_t *unicode, qsizetype size)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString number(double n, char format, int precision)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
void reserve(qsizetype size)
qsizetype size() const const
QString sliced(qsizetype pos) const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QByteArray toUtf8() const const
bool contains(QChar c, Qt::CaseSensitivity cs) const const
const_pointer data() const const
QChar first() const const
qsizetype indexOf(QChar c, qsizetype from, Qt::CaseSensitivity cs) const const
bool isNull() const const
qsizetype size() const const
QStringView sliced(qsizetype pos) const const
QList< QStringView > split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar ch) const const
int toInt(bool *ok, int base) const const
QString toString() const const
CaseInsensitive
SkipEmptyParts
QTextStream & endl(QTextStream &stream)
QStringView name() const const
QStringView value() const const
QStringView value(QAnyStringView namespaceUri, QAnyStringView name) const const
bool atEnd() const const
QXmlStreamAttributes attributes() const const
qint64 characterOffset() const const
QString errorString() const const
bool hasError() const const
bool isCharacters() const const
bool isWhitespace() const const
qint64 lineNumber() const const
QStringView name() const const
TokenType readNext()
bool readNextStartElement()
QStringView text() const const
TokenType tokenType() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:49:02 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.