KSyntaxHighlighting

katehighlightingindexer.cpp
1/*
2 SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org>
3 SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com>
4
5 SPDX-License-Identifier: MIT
6*/
7
8#include <QBuffer>
9#include <QCborValue>
10#include <QCoreApplication>
11#include <QDebug>
12#include <QFile>
13#include <QFileInfo>
14#include <QMutableMapIterator>
15#include <QRegularExpression>
16#include <QScopeGuard>
17#include <QString>
18#include <QVariant>
19#include <QXmlStreamReader>
20
21#ifdef HAS_XERCESC
22
23#include <xercesc/framework/MemBufInputSource.hpp>
24#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
25
26#include <xercesc/parsers/SAX2XMLReaderImpl.hpp>
27
28#include <xercesc/sax/ErrorHandler.hpp>
29#include <xercesc/sax/SAXParseException.hpp>
30
31#include <xercesc/util/PlatformUtils.hpp>
32#include <xercesc/util/XMLString.hpp>
33#include <xercesc/util/XMLUni.hpp>
34
35#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
36#include <xercesc/validators/common/Grammar.hpp>
37
38using namespace xercesc;
39
40/*
41 * Ideas taken from:
42 *
43 * author : Boris Kolpackov <boris@codesynthesis.com>
44 * copyright : not copyrighted - public domain
45 *
46 * This program uses Xerces-C++ SAX2 parser to load a set of schema files
47 * and then to validate a set of XML documents against these schemas. To
48 * build this program you will need Xerces-C++ 3.0.0 or later. For more
49 * information, see:
50 *
51 * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/
52 */
53
54/**
55 * Error handler object used during xml schema validation.
56 */
57class CustomErrorHandler : public ErrorHandler
58{
59public:
60 /**
61 * Constructor
62 * @param messages Pointer to the error message string to fill.
63 */
64 CustomErrorHandler(QString *messages)
65 : m_messages(messages)
66 {
67 }
68
69 /**
70 * Check global success/fail state.
71 * @return True if there was a failure, false otherwise.
72 */
73 bool failed() const
74 {
75 return m_failed;
76 }
77
78private:
79 /**
80 * Severity classes for error messages.
81 */
82 enum severity { s_warning, s_error, s_fatal };
83
84 /**
85 * Wrapper for warning exceptions.
86 * @param e Exception to handle.
87 */
88 void warning(const SAXParseException &e) override
89 {
90 m_failed = true; // be strict, warnings are evil, too!
91 handle(e, s_warning);
92 }
93
94 /**
95 * Wrapper for error exceptions.
96 * @param e Exception to handle.
97 */
98 void error(const SAXParseException &e) override
99 {
100 m_failed = true;
101 handle(e, s_error);
102 }
103
104 /**
105 * Wrapper for fatal error exceptions.
106 * @param e Exception to handle.
107 */
108 void fatalError(const SAXParseException &e) override
109 {
110 m_failed = true;
111 handle(e, s_fatal);
112 }
113
114 /**
115 * Reset the error status to "no error".
116 */
117 void resetErrors() override
118 {
119 m_failed = false;
120 }
121
122 /**
123 * Generic handler for error/warning/fatal error message exceptions.
124 * @param e Exception to handle.
125 * @param s Enum value encoding the message severtity.
126 */
127 void handle(const SAXParseException &e, severity s)
128 {
129 // get id to print
130 const XMLCh *xid(e.getPublicId());
131 if (!xid)
132 xid = e.getSystemId();
133
134 m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: ")
135 << QString::fromUtf16(e.getMessage()) << Qt::endl;
136 }
137
138private:
139 /**
140 * Storage for created error messages in this handler.
141 */
142 QTextStream m_messages;
143
144 /**
145 * Global error state. True if there was an error, false otherwise.
146 */
147 bool m_failed = false;
148};
149
150class CustomXMLValidator : public SAX2XMLReaderImpl
151{
152public:
153 QString messages;
154 CustomErrorHandler eh{&messages};
155
156 CustomXMLValidator(XMLGrammarPool *xsd)
157 : SAX2XMLReaderImpl(XMLPlatformUtils::fgMemoryManager, xsd)
158 {
159 // Commonly useful configuration.
160 //
161 setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
162 setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
163 setFeature(XMLUni::fgSAX2CoreValidation, true);
164
165 // Enable validation.
166 //
167 setFeature(XMLUni::fgXercesSchema, true);
168 setFeature(XMLUni::fgXercesSchemaFullChecking, true);
169 setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
170
171 // Use the loaded grammar during parsing.
172 //
173 setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true);
174
175 // Don't load schemas from any other source (e.g., from XML document's
176 // xsi:schemaLocation attributes).
177 //
178 setFeature(XMLUni::fgXercesLoadSchema, false);
179
180 // Xerces-C++ 3.1.0 is the first version with working multi import
181 // support.
182 //
183 setFeature(XMLUni::fgXercesHandleMultipleImports, true);
184
185 setErrorHandler(&eh);
186 }
187};
188
189#endif
190
191#include "../lib/worddelimiters_p.h"
192#include "../lib/xml_p.h"
193
194#include <array>
195
196using KSyntaxHighlighting::WordDelimiters;
197using KSyntaxHighlighting::Xml::attrToBool;
198
199using namespace Qt::Literals::StringLiterals;
200
201#if QT_VERSION < QT_VERSION_CHECK(6, 10, 0)
202static constexpr QStringView operator""_sv(const char16_t *s, std::size_t n)
203{
204 return QStringView(s, s + n);
205}
206#endif
207
208namespace
209{
210
211struct KateVersion {
212 int majorRevision;
213 int minorRevision;
214
215 KateVersion(int majorRevision = 0, int minorRevision = 0)
216 : majorRevision(majorRevision)
217 , minorRevision(minorRevision)
218 {
219 }
220
221 bool operator<(const KateVersion &version) const
222 {
223 return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision);
224 }
225};
226
227class HlFilesChecker
228{
229public:
230 void setDefinition(QStringView verStr, const QString &filename, const QString &name, const QStringList &alternativeNames)
231 {
232 m_currentDefinition = &*m_definitions.insert(name, Definition{});
233 m_currentDefinition->languageName = name;
234 m_currentDefinition->filename = filename;
235 m_currentDefinition->kateVersionStr = verStr.toString();
236 m_currentKeywords = nullptr;
237 m_currentContext = nullptr;
238
239 const auto idx = verStr.indexOf(u'.');
240 if (idx <= 0) {
241 qWarning() << filename << "invalid kateversion" << verStr;
242 m_success = false;
243 } else {
244 m_currentDefinition->kateVersion = {verStr.sliced(0, idx).toInt(), verStr.sliced(idx + 1).toInt()};
245 }
246
247 auto checkName = [this, &filename](char const *nameType, const QString &name) {
248 auto it = m_names.find(name);
249 if (it != m_names.end()) {
250 qWarning() << filename << "duplicate" << nameType << "with" << it.value();
251 m_success = false;
252 } else {
253 m_names.insert(name, filename);
254 }
255 };
256 checkName("name", name);
257 for (const auto &alternativeName : alternativeNames) {
258 checkName("alternative name", alternativeName);
259 }
260 }
261
262 KateVersion currentVersion() const
263 {
264 return m_currentDefinition->kateVersion;
265 }
266
267 void processElement(const QXmlStreamReader &xml)
268 {
269 switch (xml.tokenType()) {
271 if (m_currentContext) {
272 m_currentContext->rules.push_back(Context::Rule{});
273 auto &rule = m_currentContext->rules.back();
274 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success;
275 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
276 } else if (m_currentKeywords) {
277 m_inKeywordItem = true;
278 } else if (xml.name() == u"context"_sv) {
279 processContextElement(xml);
280 } else if (xml.name() == u"list"_sv) {
281 processListElement(xml);
282 } else if (xml.name() == u"keywords"_sv) {
283 m_success = m_currentDefinition->parseKeywords(xml) && m_success;
284 } else if (xml.name() == u"emptyLine"_sv) {
285 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success;
286 } else if (xml.name() == u"itemData"_sv) {
287 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success;
288 }
289 break;
290
292 if (m_currentContext && xml.name() == u"context"_sv) {
293 m_currentContext = nullptr;
294 } else if (m_currentKeywords && xml.name() == u"list"_sv) {
295 m_currentKeywords = nullptr;
296 } else if (m_currentKeywords) {
297 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml, m_textContent) && m_success;
298 m_textContent.clear();
299 m_inKeywordItem = false;
300 }
301 break;
302
305 if (m_inKeywordItem) {
306 m_textContent += xml.text();
307 }
308 break;
309
310 default:;
311 }
312 }
313
314 //! Resolve context attribute and include tag
315 void resolveContexts()
316 {
317 QMutableMapIterator<QString, Definition> def(m_definitions);
318 while (def.hasNext()) {
319 def.next();
320 auto &definition = def.value();
321 auto &contexts = definition.contexts;
322
323 if (contexts.isEmpty()) {
324 qWarning() << definition.filename << "has no context";
325 m_success = false;
326 continue;
327 }
328
329 auto markAsUsedContext = [](ContextName &contextName) {
330 if (!contextName.stay && contextName.context) {
331 contextName.context->isOnlyIncluded = false;
332 }
333 };
334
335 QMutableMapIterator<QString, Context> contextIt(contexts);
336 while (contextIt.hasNext()) {
337 contextIt.next();
338 auto &context = contextIt.value();
339 resolveContextName(definition, context, context.lineEndContext, context.line);
340 resolveContextName(definition, context, context.lineEmptyContext, context.line);
341 resolveContextName(definition, context, context.fallthroughContext, context.line);
342 markAsUsedContext(context.lineEndContext);
343 markAsUsedContext(context.lineEmptyContext);
344 markAsUsedContext(context.fallthroughContext);
345 for (auto &rule : context.rules) {
346 rule.parentContext = &context;
347 resolveContextName(definition, context, rule.context, rule.line);
348 if (rule.type != Context::Rule::Type::IncludeRules) {
349 markAsUsedContext(rule.context);
350 } else if (rule.includeAttrib == XmlBool::True && rule.context.context) {
351 rule.context.context->referencedWithIncludeAttrib = true;
352 }
353 }
354 }
355
356 auto *firstContext = &*definition.contexts.find(definition.firstContextName);
357 firstContext->isOnlyIncluded = false;
358 definition.firstContext = firstContext;
359 }
360
361 resolveIncludeRules();
362 }
363
364 bool check() const
365 {
366 bool success = m_success;
367
368 const auto usedContexts = extractUsedContexts();
369
370 QMap<const Definition *, const Definition *> maxVersionByDefinitions;
371 QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules;
372
373 QMapIterator<QString, Definition> def(m_definitions);
374 while (def.hasNext()) {
375 def.next();
376 const auto &definition = def.value();
377 const auto &filename = definition.filename;
378
379 auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
380 if (maxDef != &definition) {
381 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr
382 << ". Please, increase kateversion.";
383 success = false;
384 }
385
386 QSet<ItemDatas::Style> usedAttributeNames;
387 QSet<ItemDatas::Style> ignoredAttributeNames;
388 success = checkKeywordsList(definition) && success;
389 success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
390
391 // search for non-existing itemDatas.
392 const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
393 for (const auto &styleName : invalidNames) {
394 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name;
395 success = false;
396 }
397
398 // search for existing itemDatas, but unusable.
399 const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
400 for (const auto &styleName : ignoredNames) {
401 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name
402 << "is never used. All uses are with lookAhead=true or <IncludeRules/>";
403 success = false;
404 }
405
406 // search for unused itemDatas.
407 auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
408 unusedNames -= ignoredNames;
409 for (const auto &styleName : std::as_const(unusedNames)) {
410 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name;
411 success = false;
412 }
413 }
414
415 QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
416 while (unreachableIncludedRuleIt.hasNext()) {
417 unreachableIncludedRuleIt.next();
418 IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
419 if (unreachableRulesBy.alwaysUnreachable) {
420 auto *rule = unreachableIncludedRuleIt.key();
421
422 if (!rule->parentContext->isOnlyIncluded) {
423 continue;
424 }
425
426 // remove duplicates rules
427 QSet<const Context::Rule *> rules;
428 auto &unreachableBy = unreachableRulesBy.unreachableBy;
429 unreachableBy.erase(std::remove_if(unreachableBy.begin(),
430 unreachableBy.end(),
431 [&](const RuleAndInclude &ruleAndInclude) {
432 if (rules.contains(ruleAndInclude.rule)) {
433 return true;
434 }
435 rules.insert(ruleAndInclude.rule);
436 return false;
437 }),
438 unreachableBy.end());
439
440 QString message;
441 message.reserve(128);
442 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
443 message += u"line "_sv;
444 message += QString::number(ruleAndInclude.rule->line);
445 message += u" ["_sv;
446 message += ruleAndInclude.rule->parentContext->name;
447 if (rule->filename != ruleAndInclude.rule->filename) {
448 message += u" ("_sv;
449 message += ruleAndInclude.rule->filename;
450 message += u')';
451 }
452 if (ruleAndInclude.includeRules) {
453 message += u" via line "_sv;
454 message += QString::number(ruleAndInclude.includeRules->line);
455 }
456 message += u"], "_sv;
457 }
458 message.chop(2);
459
460 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message;
461 success = false;
462 }
463 }
464
465 return success;
466 }
467
468private:
469 enum class XmlBool {
471 False,
472 True,
473 };
474
475 struct Context;
476
477 struct ContextName {
478 QString name;
479 int popCount = 0;
480 bool stay = false;
481
482 Context *context = nullptr;
483 };
484
485 struct Parser {
486 const QString &filename;
487 const QXmlStreamReader &xml;
488 const QXmlStreamAttribute &attr;
489 bool success;
490
491 //! Read a string type attribute, \c success = \c false when \p str is not empty
492 //! \return \c true when attr.name() == attrName, otherwise false
493 bool extractString(QString &str, QStringView attrName)
494 {
495 if (attr.name() != attrName) {
496 return false;
497 }
498
499 str = attr.value().toString();
500 if (str.isEmpty()) {
501 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty";
502 success = false;
503 }
504
505 return true;
506 }
507
508 //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified.
509 //! \return \c true when attr.name() == attrName, otherwise false
510 bool extractXmlBool(XmlBool &xmlBool, QStringView attrName)
511 {
512 if (attr.name() != attrName) {
513 return false;
514 }
515
516 xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False;
517
518 return true;
519 }
520
521 //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0
522 //! \return \c true when attr.name() == attrName, otherwise false
523 bool extractPositive(int &positive, QStringView attrName)
524 {
525 if (attr.name() != attrName) {
526 return false;
527 }
528
529 bool ok = true;
530 positive = attr.value().toInt(&ok);
531
532 if (!ok || positive < 0) {
533 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value();
534 success = false;
535 }
536
537 return true;
538 }
539
540 //! Read a color, \c success = \c false when \p color is already greater than or equal to 0
541 //! \return \c true when attr.name() == attrName, otherwise false
542 bool checkColor(QStringView attrName)
543 {
544 if (attr.name() != attrName) {
545 return false;
546 }
547
548 const auto value = attr.value();
549 if (value.isEmpty() /*|| QColor(value).isValid()*/) {
550 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value;
551 success = false;
552 }
553
554 return true;
555 }
556
557 //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char
558 //! \return \c true when attr.name() == attrName, otherwise false
559 bool extractChar(QChar &c, QStringView attrName)
560 {
561 if (attr.name() != attrName) {
562 return false;
563 }
564
565 if (attr.value().size() == 1) {
566 c = attr.value()[0];
567 } else {
568 c = u'_';
569 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value();
570 success = false;
571 }
572
573 return true;
574 }
575
576 //! \return parsing status when \p isExtracted is \c true, otherwise \c false
577 bool checkIfExtracted(bool isExtracted)
578 {
579 if (isExtracted) {
580 return success;
581 }
582
583 qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name();
584 return false;
585 }
586 };
587
588 struct Keywords {
589 struct Items {
590 struct Item {
591 QString content;
592 int line;
593
594 friend size_t qHash(const Item &item, size_t seed = 0)
595 {
596 return qHash(item.content, seed);
597 }
598
599 friend bool operator==(const Item &item0, const Item &item1)
600 {
601 return item0.content == item1.content;
602 }
603 };
604
605 QList<Item> keywords;
606 QSet<Item> includes;
607
608 bool parseElement(const QString &filename, const QXmlStreamReader &xml, const QString &content)
609 {
610 bool success = true;
611
612 const int line = xml.lineNumber();
613
614 if (content.isEmpty()) {
615 qWarning() << filename << "line" << line << "is empty:" << xml.name();
616 success = false;
617 }
618
619 if (xml.name() == u"include"_sv) {
620 includes.insert({content, line});
621 } else if (xml.name() == u"item"_sv) {
622 keywords.append({content, line});
623 } else {
624 qWarning() << filename << "line" << line << "invalid element:" << xml.name();
625 success = false;
626 }
627
628 return success;
629 }
630 };
631
632 QString name;
633 Items items;
634 int line;
635
636 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
637 {
638 line = xml.lineNumber();
639
640 bool success = true;
641 const auto attrs = xml.attributes();
642 for (const auto &attr : attrs) {
643 Parser parser{filename, xml, attr, success};
644
645 const bool isExtracted = parser.extractString(name, u"name"_sv);
646
647 success = parser.checkIfExtracted(isExtracted);
648 }
649 return success;
650 }
651 };
652
653 struct Context {
654 struct Rule {
655 enum class Type {
656 Unknown,
657 AnyChar,
658 Detect2Chars,
659 DetectChar,
660 DetectIdentifier,
661 DetectSpaces,
662 Float,
663 HlCChar,
664 HlCHex,
665 HlCOct,
666 HlCStringChar,
667 IncludeRules,
668 Int,
669 LineContinue,
670 RangeDetect,
671 RegExpr,
672 StringDetect,
673 WordDetect,
674 keyword,
675 };
676
677 Type type{};
678
679 bool isDotRegex = false;
680 int line = -1;
681
682 // commonAttributes
683 QString attribute;
684 ContextName context;
685 QString beginRegion;
686 QString endRegion;
687 int column = -1;
688 XmlBool lookAhead{};
689 XmlBool firstNonSpace{};
690
691 // StringDetect, WordDetect, keyword
692 XmlBool insensitive{};
693
694 // DetectChar, StringDetect, RegExpr, keyword
695 XmlBool dynamic{};
696
697 // Regex
698 XmlBool minimal{};
699
700 // IncludeRule
701 XmlBool includeAttrib{};
702
703 // DetectChar, Detect2Chars, LineContinue, RangeDetect
704 QChar char0;
705 // Detect2Chars, RangeDetect
706 QChar char1;
707
708 // AnyChar, StringDetect, RegExpr, WordDetect, keyword
709 QString string;
710 // RegExpr without .* as suffix
711 QString sanitizedString;
712
713 // Float, HlCHex, HlCOct, Int, WordDetect, keyword
714 QString additionalDeliminator;
715 QString weakDeliminator;
716
717 // rules included by IncludeRules (without IncludeRule)
718 QList<const Rule *> includedRules;
719
720 // IncludeRules included by IncludeRules
721 QSet<const Rule *> includedIncludeRules;
722
723 Context const *parentContext = nullptr;
724
725 QString filename;
726
727 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
728 {
729 this->filename = filename;
730 line = xml.lineNumber();
731
732 using Pair = QPair<QStringView, Type>;
733 static const auto pairs = {
734 Pair{u"AnyChar"_sv, Type::AnyChar},
735 Pair{u"Detect2Chars"_sv, Type::Detect2Chars},
736 Pair{u"DetectChar"_sv, Type::DetectChar},
737 Pair{u"DetectIdentifier"_sv, Type::DetectIdentifier},
738 Pair{u"DetectSpaces"_sv, Type::DetectSpaces},
739 Pair{u"Float"_sv, Type::Float},
740 Pair{u"HlCChar"_sv, Type::HlCChar},
741 Pair{u"HlCHex"_sv, Type::HlCHex},
742 Pair{u"HlCOct"_sv, Type::HlCOct},
743 Pair{u"HlCStringChar"_sv, Type::HlCStringChar},
744 Pair{u"IncludeRules"_sv, Type::IncludeRules},
745 Pair{u"Int"_sv, Type::Int},
746 Pair{u"LineContinue"_sv, Type::LineContinue},
747 Pair{u"RangeDetect"_sv, Type::RangeDetect},
748 Pair{u"RegExpr"_sv, Type::RegExpr},
749 Pair{u"StringDetect"_sv, Type::StringDetect},
750 Pair{u"WordDetect"_sv, Type::WordDetect},
751 Pair{u"keyword", Type::keyword},
752 };
753
754 for (auto pair : pairs) {
755 if (xml.name() == pair.first) {
756 type = pair.second;
757 bool success = parseAttributes(filename, xml);
758 success = checkMandoryAttributes(filename, xml) && success;
759 if (success && type == Type::RegExpr) {
760 // ., (.) followed by *, +, {1} or nothing
761 static const QRegularExpression isDot(QStringLiteral(R"(^\‍(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
762 // remove "(?:" and ")"
763 static const QRegularExpression removeParentheses(QStringLiteral(R"(\‍((?:\?:)?|\))"));
764 // remove parentheses on a copy of string
765 auto reg = QString(string).replace(removeParentheses, QString());
766 isDotRegex = reg.contains(isDot);
767
768 // Remove .* and .*$ suffix.
769 static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$"));
770 sanitizedString = string;
771 sanitizedString.replace(allSuffix, QString());
772 // string is a catch-all, do not sanitize
773 if (sanitizedString.isEmpty() || sanitizedString == u"^"_sv) {
774 sanitizedString = string;
775 }
776 }
777 return success;
778 }
779 }
780
781 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name();
782 return false;
783 }
784
785 private:
786 bool parseAttributes(const QString &filename, const QXmlStreamReader &xml)
787 {
788 bool success = true;
789
790 const auto attrs = xml.attributes();
791 for (const auto &attr : attrs) {
792 Parser parser{filename, xml, attr, success};
793
794 // clang-format off
795 const bool isExtracted
796 = parser.extractString(attribute, u"attribute"_sv)
797 || parser.extractString(context.name, u"context"_sv)
798 || parser.extractXmlBool(lookAhead, u"lookAhead"_sv)
799 || parser.extractXmlBool(firstNonSpace, u"firstNonSpace"_sv)
800 || parser.extractString(beginRegion, u"beginRegion"_sv)
801 || parser.extractString(endRegion, u"endRegion"_sv)
802 || parser.extractPositive(column, u"column"_sv)
803 || ((type == Type::RegExpr
804 || type == Type::StringDetect
805 || type == Type::WordDetect
806 || type == Type::keyword
807 ) && parser.extractXmlBool(insensitive, u"insensitive"_sv))
808 || ((type == Type::DetectChar
809 || type == Type::RegExpr
810 || type == Type::StringDetect
811 || type == Type::keyword
812 ) && parser.extractXmlBool(dynamic, u"dynamic"_sv))
813 || ((type == Type::RegExpr)
814 && parser.extractXmlBool(minimal, u"minimal"_sv))
815 || ((type == Type::DetectChar
816 || type == Type::Detect2Chars
817 || type == Type::LineContinue
818 || type == Type::RangeDetect
819 ) && parser.extractChar(char0, u"char"_sv))
820 || ((type == Type::Detect2Chars
821 || type == Type::RangeDetect
822 ) && parser.extractChar(char1, u"char1"_sv))
823 || ((type == Type::AnyChar
824 || type == Type::RegExpr
825 || type == Type::StringDetect
826 || type == Type::WordDetect
827 || type == Type::keyword
828 ) && parser.extractString(string, u"String"_sv))
829 || ((type == Type::IncludeRules)
830 && parser.extractXmlBool(includeAttrib, u"includeAttrib"_sv))
831 || ((type == Type::Float
832 || type == Type::HlCHex
833 || type == Type::HlCOct
834 || type == Type::Int
835 || type == Type::keyword
836 || type == Type::WordDetect
837 ) && (parser.extractString(additionalDeliminator, u"additionalDeliminator"_sv)
838 || parser.extractString(weakDeliminator, u"weakDeliminator"_sv)))
839 ;
840 // clang-format on
841
842 success = parser.checkIfExtracted(isExtracted);
843 }
844
845 if (type == Type::LineContinue && char0 == u'\0') {
846 char0 = u'\\';
847 }
848
849 return success;
850 }
851
852 bool checkMandoryAttributes(const QString &filename, const QXmlStreamReader &xml)
853 {
854 QString missingAttr;
855
856 switch (type) {
857 case Type::Unknown:
858 return false;
859
860 case Type::AnyChar:
861 case Type::RegExpr:
862 case Type::StringDetect:
863 case Type::WordDetect:
864 case Type::keyword:
865 missingAttr = string.isEmpty() ? QStringLiteral("String") : QString();
866 break;
867
868 case Type::DetectChar:
869 missingAttr = !char0.unicode() ? QStringLiteral("char") : QString();
870 break;
871
872 case Type::Detect2Chars:
873 case Type::RangeDetect:
874 missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1")
875 : !char0.unicode() ? QStringLiteral("char")
876 : !char1.unicode() ? QStringLiteral("char1")
877 : QString();
878 break;
879
880 case Type::IncludeRules:
881 missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString();
882 break;
883
884 case Type::DetectIdentifier:
885 case Type::DetectSpaces:
886 case Type::Float:
887 case Type::HlCChar:
888 case Type::HlCHex:
889 case Type::HlCOct:
890 case Type::HlCStringChar:
891 case Type::Int:
892 case Type::LineContinue:
893 break;
894 }
895
896 if (!missingAttr.isEmpty()) {
897 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr;
898 return false;
899 }
900
901 return true;
902 }
903 };
904
905 int line;
906 // becomes false when a context (except includeRule) refers to it
907 bool isOnlyIncluded = true;
908 // becomes true when an includedRule refers to it with includeAttrib=true
909 bool referencedWithIncludeAttrib = false;
910 bool hasDynamicRule = false;
911 QString name;
912 QString attribute;
913 ContextName lineEndContext;
914 ContextName lineEmptyContext;
915 ContextName fallthroughContext;
916 QList<Rule> rules;
917 XmlBool dynamic{};
918 XmlBool fallthrough{};
919 XmlBool stopEmptyLineContextSwitchLoop{};
920
921 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
922 {
923 line = xml.lineNumber();
924
925 bool success = true;
926
927 const auto attrs = xml.attributes();
928 for (const auto &attr : attrs) {
929 Parser parser{filename, xml, attr, success};
930 XmlBool noIndentationBasedFolding{};
931
932 // clang-format off
933 const bool isExtracted = parser.extractString(name, u"name"_sv)
934 || parser.extractString(attribute, u"attribute"_sv)
935 || parser.extractString(lineEndContext.name, u"lineEndContext"_sv)
936 || parser.extractString(lineEmptyContext.name, u"lineEmptyContext"_sv)
937 || parser.extractString(fallthroughContext.name, u"fallthroughContext"_sv)
938 || parser.extractXmlBool(dynamic, u"dynamic"_sv)
939 || parser.extractXmlBool(fallthrough, u"fallthrough"_sv)
940 || parser.extractXmlBool(stopEmptyLineContextSwitchLoop, u"stopEmptyLineContextSwitchLoop"_sv)
941 || parser.extractXmlBool(noIndentationBasedFolding, u"noIndentationBasedFolding"_sv);
942 // clang-format on
943
944 success = parser.checkIfExtracted(isExtracted);
945 }
946
947 if (name.isEmpty()) {
948 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name";
949 success = false;
950 }
951
952 if (attribute.isEmpty()) {
953 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute";
954 success = false;
955 }
956
957 return success;
958 }
959 };
960
961 struct ItemDatas {
962 struct Style {
963 QString name;
964 int line;
965
966 friend size_t qHash(const Style &style, size_t seed = 0)
967 {
968 return qHash(style.name, seed);
969 }
970
971 friend bool operator==(const Style &style0, const Style &style1)
972 {
973 return style0.name == style1.name;
974 }
975 };
976
977 QSet<Style> styleNames;
978
979 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
980 {
981 bool success = true;
982
983 QString name;
984 QString defStyleNum;
985 XmlBool boolean;
986
987 const auto attrs = xml.attributes();
988 for (const auto &attr : attrs) {
989 Parser parser{filename, xml, attr, success};
990
991 // clang-format off
992 const bool isExtracted
993 = parser.extractString(name, u"name"_sv)
994 || parser.extractString(defStyleNum, u"defStyleNum"_sv)
995 || parser.extractXmlBool(boolean, u"bold"_sv)
996 || parser.extractXmlBool(boolean, u"italic"_sv)
997 || parser.extractXmlBool(boolean, u"underline"_sv)
998 || parser.extractXmlBool(boolean, u"strikeOut"_sv)
999 || parser.extractXmlBool(boolean, u"spellChecking"_sv)
1000 || parser.checkColor(u"color"_sv)
1001 || parser.checkColor(u"selColor"_sv)
1002 || parser.checkColor(u"backgroundColor"_sv)
1003 || parser.checkColor(u"selBackgroundColor"_sv);
1004 // clang-format on
1005
1006 success = parser.checkIfExtracted(isExtracted);
1007 }
1008
1009 if (!name.isEmpty()) {
1010 const auto len = styleNames.size();
1011 styleNames.insert({name, int(xml.lineNumber())});
1012 if (len == styleNames.size()) {
1013 qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name;
1014 success = false;
1015 }
1016 }
1017
1018 return success;
1019 }
1020 };
1021
1022 struct Definition {
1023 QMap<QString, Keywords> keywordsList;
1024 QMap<QString, Context> contexts;
1025 ItemDatas itemDatas;
1026 QString firstContextName;
1027 const Context *firstContext = nullptr;
1028 QString filename;
1029 WordDelimiters wordDelimiters;
1030 KateVersion kateVersion{};
1031 QString kateVersionStr;
1032 QString languageName;
1033 QSet<const Definition *> referencedDefinitions;
1034
1035 // Parse <keywords ...>
1036 bool parseKeywords(const QXmlStreamReader &xml)
1037 {
1038 wordDelimiters.append(xml.attributes().value(u"additionalDeliminator"_sv));
1039 wordDelimiters.remove(xml.attributes().value(u"weakDeliminator"_sv));
1040 return true;
1041 }
1042 };
1043
1044 // Parse <context>
1045 void processContextElement(const QXmlStreamReader &xml)
1046 {
1047 Context context;
1048 m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success;
1049 if (m_currentDefinition->firstContextName.isEmpty()) {
1050 m_currentDefinition->firstContextName = context.name;
1051 }
1052 if (m_currentDefinition->contexts.contains(context.name)) {
1053 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name;
1054 m_success = false;
1055 }
1056 m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context);
1057 }
1058
1059 // Parse <list name="...">
1060 void processListElement(const QXmlStreamReader &xml)
1061 {
1062 Keywords keywords;
1063 m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success;
1064 if (m_currentDefinition->keywordsList.contains(keywords.name)) {
1065 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name;
1066 m_success = false;
1067 }
1068 m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords);
1069 }
1070
1071 const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const
1072 {
1073 auto it = maxVersionByDefinitions.find(&definition);
1074 if (it != maxVersionByDefinitions.end()) {
1075 return it.value();
1076 } else {
1077 auto it = maxVersionByDefinitions.insert(&definition, &definition);
1078 for (const auto &referencedDef : definition.referencedDefinitions) {
1079 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions);
1080 if (it.value()->kateVersion < maxDef->kateVersion) {
1081 it.value() = maxDef;
1082 }
1083 }
1084 return it.value();
1085 }
1086 }
1087
1088 // Initialize the referenced rules (Rule::includedRules)
1089 void resolveIncludeRules()
1090 {
1091 QSet<const Context *> usedContexts;
1092 QList<const Context *> contexts;
1093
1095 while (def.hasNext()) {
1096 def.next();
1097 auto &definition = def.value();
1098 QMutableMapIterator<QString, Context> contextIt(definition.contexts);
1099 while (contextIt.hasNext()) {
1100 contextIt.next();
1101 auto &currentContext = contextIt.value();
1102 for (auto &rule : currentContext.rules) {
1103 if (rule.type != Context::Rule::Type::IncludeRules) {
1104 continue;
1105 }
1106
1107 if (rule.context.stay) {
1108 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself";
1109 m_success = false;
1110 continue;
1111 }
1112
1113 if (rule.context.popCount) {
1114 qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix";
1115 m_success = false;
1116 }
1117
1118 if (!rule.context.context) {
1119 m_success = false;
1120 continue;
1121 }
1122
1123 // resolve includedRules and includedIncludeRules
1124
1125 usedContexts.clear();
1126 usedContexts.insert(rule.context.context);
1127 contexts.clear();
1128 contexts.append(rule.context.context);
1129
1130 for (int i = 0; i < contexts.size(); ++i) {
1131 currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
1132 for (const auto &includedRule : contexts[i]->rules) {
1133 if (includedRule.type != Context::Rule::Type::IncludeRules) {
1134 rule.includedRules.append(&includedRule);
1135 } else if (&rule == &includedRule) {
1136 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity";
1137 m_success = false;
1138 } else {
1139 rule.includedIncludeRules.insert(&includedRule);
1140
1141 if (includedRule.includedRules.isEmpty()) {
1142 const auto *context = includedRule.context.context;
1143 if (context && !usedContexts.contains(context)) {
1144 contexts.append(context);
1145 usedContexts.insert(context);
1146 }
1147 } else {
1148 rule.includedRules.append(includedRule.includedRules);
1149 }
1150 }
1151 }
1152 }
1153 }
1154 }
1155 }
1156 }
1157
1158 //! Recursively extracts the contexts used from the first context of the definitions.
1159 //! This method detects groups of contexts which are only used among themselves.
1160 QSet<const Context *> extractUsedContexts() const
1161 {
1162 QSet<const Context *> usedContexts;
1163 QList<const Context *> contexts;
1164
1165 QMapIterator<QString, Definition> def(m_definitions);
1166 while (def.hasNext()) {
1167 def.next();
1168 const auto &definition = def.value();
1169
1170 if (definition.firstContext) {
1171 usedContexts.insert(definition.firstContext);
1172 contexts.clear();
1173 contexts.append(definition.firstContext);
1174
1175 for (int i = 0; i < contexts.size(); ++i) {
1176 auto appendContext = [&](const Context *context) {
1177 if (context && !usedContexts.contains(context)) {
1178 contexts.append(context);
1179 usedContexts.insert(context);
1180 }
1181 };
1182
1183 const auto *context = contexts[i];
1184 appendContext(context->lineEndContext.context);
1185 appendContext(context->lineEmptyContext.context);
1186 appendContext(context->fallthroughContext.context);
1187
1188 for (auto &rule : context->rules) {
1189 appendContext(rule.context.context);
1190 }
1191 }
1192 }
1193 }
1194
1195 return usedContexts;
1196 }
1197
1198 struct RuleAndInclude {
1199 const Context::Rule *rule;
1200 const Context::Rule *includeRules;
1201
1202 explicit operator bool() const
1203 {
1204 return rule;
1205 }
1206 };
1207
1208 struct IncludedRuleUnreachableBy {
1209 QList<RuleAndInclude> unreachableBy;
1210 bool alwaysUnreachable = true;
1211 };
1212
1213 //! Check contexts and rules
1214 bool checkContexts(const Definition &definition,
1215 QSet<ItemDatas::Style> &usedAttributeNames,
1216 QSet<ItemDatas::Style> &ignoredAttributeNames,
1217 const QSet<const Context *> &usedContexts,
1218 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1219 {
1220 bool success = true;
1221
1222 QMapIterator<QString, Context> contextIt(definition.contexts);
1223 while (contextIt.hasNext()) {
1224 contextIt.next();
1225
1226 const auto &context = contextIt.value();
1227 const auto &filename = definition.filename;
1228
1229 if (!usedContexts.contains(&context)) {
1230 qWarning() << filename << "line" << context.line << "unused context:" << context.name;
1231 success = false;
1232 continue;
1233 }
1234
1235 if (context.name.startsWith(u"#pop"_sv)) {
1236 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name;
1237 success = false;
1238 }
1239
1240 if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1241 usedAttributeNames.insert({context.attribute, context.line});
1242 }
1243
1244 success = checkContextAttribute(definition, context) && success;
1245 success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success;
1246 success = suggestRuleMerger(definition.filename, context) && success;
1247
1248 for (const auto &rule : context.rules) {
1249 if (!rule.attribute.isEmpty()) {
1250 if (rule.lookAhead != XmlBool::True) {
1251 usedAttributeNames.insert({rule.attribute, rule.line});
1252 } else {
1253 ignoredAttributeNames.insert({rule.attribute, rule.line});
1254 }
1255 }
1256 success = checkLookAhead(rule) && success;
1257 success = checkStringDetect(rule) && success;
1258 success = checkWordDetect(rule) && success;
1259 success = checkKeyword(definition, rule) && success;
1260 success = checkRegExpr(filename, rule, context) && success;
1261 success = checkDelimiters(definition, rule) && success;
1262 }
1263 }
1264
1265 return success;
1266 }
1267
1268 //! Check that a regular expression in a RegExpr rule:
1269 //! - isValid()
1270 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1271 //! - dynamic=true but no place holder used?
1272 //! - is not . with lookAhead="1"
1273 //! - is not ^... without column ou firstNonSpace attribute
1274 //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar
1275 //! - has no unused captures
1276 //! - has no unnecessary quantifier with lookAhead
1277 bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const
1278 {
1279 // ignore empty regex because the error is raised during xml parsing
1280 if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) {
1281 const QRegularExpression regexp(rule.string);
1282 if (!checkRegularExpression(rule.filename, regexp, rule.line)) {
1283 return false;
1284 }
1285
1286 // dynamic == true and no place holder?
1287 if (rule.dynamic == XmlBool::True) {
1288 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1289 if (!rule.string.contains(placeHolder)) {
1290 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1291 return false;
1292 }
1293 }
1294
1295 if (rule.lookAhead == XmlBool::True && (rule.string.endsWith(u".*$"_sv) || rule.string.endsWith(u".*"_sv)) && -1 == rule.string.indexOf(u'|')) {
1296 qWarning() << rule.filename << "line" << rule.line << "RegExpr with lookAhead=1 doesn't need to end with '.*' or '.*$':" << rule.string;
1297 return false;
1298 }
1299
1300 auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1301 if (rule.lookAhead == XmlBool::True) {
1302 static const QRegularExpression removeAllSuffix(QStringLiteral(
1303 R"(((?<!\\)\\‍(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1304 reg.replace(removeAllSuffix, QString());
1305 }
1306
1307 reg.replace(QStringLiteral("{1}"), QString());
1308 reg.replace(QStringLiteral("{1,1}"), QString());
1309
1310 // is DetectSpaces
1311 // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, +
1312 static const QRegularExpression isDetectSpaces(
1313 QStringLiteral(R"(^\^?(?:\‍((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1314 if (rule.string.contains(isDetectSpaces)) {
1315 char const *extraMsg = rule.string.contains(u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1316 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":"
1317 << rule.string;
1318 return false;
1319 }
1320
1321#define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1322#define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1323
1324 // is RangeDetect
1325 static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:"
1326 "\\.\\*[?+]?" REG_CHAR "|"
1327 "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1"
1328 ")$"));
1329 if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(u".*?"_sv) || rule.string.contains(u"[^"_sv))
1330 && reg.contains(isRange)) {
1331 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string;
1332 return false;
1333 }
1334
1335 // is AnyChar
1336 static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\‍((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)"));
1337 if (rule.string.contains(isAnyChar)) {
1338 auto extra = (reg[0] == u'^' || reg[1] == u'^') ? "with column=\"0\"" : "";
1339 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra;
1340 return false;
1341 }
1342
1343 // is LineContinue
1344 static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$"));
1345 if (reg.contains(isLineContinue)) {
1346 auto extra = (reg[0] == u'^') ? "with column=\"0\"" : "";
1347 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra;
1348 return false;
1349 }
1350
1351#define REG_DIGIT uR"((\[(0-9|\\d)\]|\\d))"
1352#define REG_DIGITS REG_DIGIT u"([+]|" REG_DIGIT u"[*])"
1353#define REG_DOT uR"((\\[.]|\[.\]))"
1354 // is Int, check \b[0-9]+
1355 static const QRegularExpression isInt(uR"(^(\‍((\?:)?)*\\b(\‍((\?:)?)*)" REG_DIGITS uR"(\)*$)"_s);
1356 if (reg.contains(isInt)) {
1357 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Int:" << rule.string;
1358 return false;
1359 }
1360
1361 // is Float, check (\b[0-9]+\.[0-9]*|\.[0-9]+)([eE][-+]?[0-9]+)?
1362 static const QRegularExpression isFloat(
1363 uR"(^(\\b|\‍((\?:)?)*)" REG_DIGITS REG_DOT
1364 REG_DIGIT u"[*][|]" REG_DOT REG_DIGITS uR"(\)+\‍((\?:)?\[[eE]+\]\[(\\?-\\?\+|\\?\+\\?-)\]\?)" REG_DIGITS uR"(\)\?\)*$)"_s);
1365 if (reg.contains(isFloat)) {
1366 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Float:" << rule.string;
1367 return false;
1368 }
1369#undef REG_DOT
1370#undef REG_DIGIT
1371#undef REG_DIGITS
1372
1373 // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _
1374 static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR));
1375 reg.replace(sanitize1, QStringLiteral("_"));
1376
1377#undef REG_CHAR
1378#undef REG_ESCAPE_CHAR
1379
1380 // use minimal or lazy operator
1381 static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1382 static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]"));
1383
1384 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy)
1385 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0)
1386 && (reg.back() != u'$' || reg.contains(u'|'))) {
1387 qWarning() << rule.filename << "line" << rule.line
1388 << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1389 return false;
1390 }
1391
1392 // replace [:...:] with ___
1393 static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])"));
1394 reg.replace(sanitize2, QStringLiteral("___"));
1395
1396 // replace [ccc...], [special] with ...
1397 static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1398 reg.replace(sanitize3, QStringLiteral("...\\1"));
1399
1400 // replace [c] with _
1401 static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])"));
1402 reg.replace(sanitize4, QStringLiteral("_"));
1403
1404 const int len = reg.size();
1405 // replace [cC] with _
1406 static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])"));
1407 reg = reg.toUpper();
1408 reg.replace(toInsensitive, QString());
1409
1410 // is StringDetect
1411 // ignore (?:, ) and {n}
1412 static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\‍(\?:)+$)"));
1413 if (reg.contains(isStringDetect)) {
1414 char const *extraMsg = rule.string.contains(u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1415 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1416 << ":" << rule.string;
1417 if (len != reg.size()) {
1418 qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string;
1419 }
1420 return false;
1421 }
1422
1423 // column="0"
1424 if (rule.column == -1) {
1425 // ^ without |
1426 // (^sas*) -> ok
1427 // (^sa|s*) -> ko
1428 // (^(sa|s*)) -> ok
1429 auto first = std::as_const(reg).begin();
1430 auto last = std::as_const(reg).end();
1431 int depth = 0;
1432
1433 while (u'(' == *first) {
1434 ++depth;
1435 ++first;
1436 if (u'?' == *first || u':' == first[1]) {
1437 first += 2;
1438 }
1439 }
1440
1441 if (u'^' == *first) {
1442 const int bolDepth = depth;
1443 bool replace = true;
1444
1445 while (++first != last) {
1446 if (u'(' == *first) {
1447 ++depth;
1448 } else if (u')' == *first) {
1449 --depth;
1450 if (depth < bolDepth) {
1451 // (^a)? === (^a|) -> ko
1452 if (first + 1 != last && u"*?"_sv.contains(first[1])) {
1453 replace = false;
1454 break;
1455 }
1456 }
1457 } else if (u'|' == *first) {
1458 // ignore '|' within subgroup
1459 if (depth <= bolDepth) {
1460 replace = false;
1461 break;
1462 }
1463 }
1464 }
1465
1466 if (replace) {
1467 qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string;
1468 return false;
1469 }
1470 }
1471 }
1472
1473 // add ^ with column=0
1474 if (rule.column == 0 && !rule.isDotRegex) {
1475 bool hasStartOfLine = false;
1476 auto first = std::as_const(reg).begin();
1477 auto last = std::as_const(reg).end();
1478 for (; first != last; ++first) {
1479 if (*first == u'^') {
1480 hasStartOfLine = true;
1481 break;
1482 } else if (*first == u'(') {
1483 if (last - first >= 3 && first[1] == u'?' && first[2] == u':') {
1484 first += 2;
1485 }
1486 } else {
1487 break;
1488 }
1489 }
1490
1491 if (!hasStartOfLine) {
1492 qWarning() << rule.filename << "line" << rule.line
1493 << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1494 return false;
1495 }
1496 }
1497
1498 bool useCapture = false;
1499
1500 // detection of unnecessary capture
1501 if (regexp.captureCount()) {
1502 auto maximalCapture = [](const QStringView(&referenceNames)[9], const QString &s) {
1503 int maxCapture = 9;
1504 while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) {
1505 --maxCapture;
1506 }
1507 return maxCapture;
1508 };
1509
1510 int maxCaptureUsed = 0;
1511 // maximal dynamic reference
1512 if (rule.context.context && !rule.context.stay) {
1513 for (const auto &nextRule : std::as_const(rule.context.context->rules)) {
1514 if (nextRule.dynamic == XmlBool::True) {
1515 static const QStringView cap[]{
1516 u"%1"_sv,
1517 u"%2"_sv,
1518 u"%3"_sv,
1519 u"%4"_sv,
1520 u"%5"_sv,
1521 u"%6"_sv,
1522 u"%7"_sv,
1523 u"%8"_sv,
1524 u"%9"_sv,
1525 };
1526 int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1527 maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture);
1528 }
1529 }
1530 }
1531
1532 static const QStringView num1[]{
1533 u"\\1"_sv,
1534 u"\\2"_sv,
1535 u"\\3"_sv,
1536 u"\\4"_sv,
1537 u"\\5"_sv,
1538 u"\\6"_sv,
1539 u"\\7"_sv,
1540 u"\\8"_sv,
1541 u"\\9"_sv,
1542 };
1543 static const QStringView num2[]{
1544 u"\\g1"_sv,
1545 u"\\g2"_sv,
1546 u"\\g3"_sv,
1547 u"\\g4"_sv,
1548 u"\\g5"_sv,
1549 u"\\g6"_sv,
1550 u"\\g7"_sv,
1551 u"\\g8"_sv,
1552 u"\\g9"_sv,
1553 };
1554 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num2, rule.string));
1555
1556 const int maxCapture = std::max(maxCaptureUsed, maxBackReference);
1557
1558 if (maxCapture && regexp.captureCount() > maxCapture) {
1559 qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture
1560 << "are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1561 return false;
1562 }
1563
1564 useCapture = maxCapture;
1565 }
1566
1567 if (!useCapture) {
1568 // is DetectIdentifier
1569 static const QRegularExpression isDetectIdentifier(
1570 QStringLiteral(R"(^(\‍((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)"));
1571 if (rule.string.contains(isDetectIdentifier)) {
1572 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string;
1573 return false;
1574 }
1575 }
1576
1577 if (rule.isDotRegex) {
1578 // search next rule with same column or firstNonSpace
1579 int i = &rule - context.rules.data() + 1;
1580 const bool hasColumn = (rule.column != -1);
1581 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1582 const bool isSpecial = (hasColumn || hasFirstNonSpace);
1583 for (; i < context.rules.size(); ++i) {
1584 auto &rule2 = context.rules[i];
1585 if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1586 i = context.rules.size();
1587 break;
1588 }
1589
1590 const bool hasColumn2 = (rule2.column != -1);
1591 const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1592 if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1593 || (hasFirstNonSpace && hasFirstNonSpace2)) {
1594 break;
1595 }
1596 }
1597
1598 auto ruleFilename = (filename == rule.filename) ? QString() : u"in "_sv + rule.filename;
1599 if (i == context.rules.size()) {
1600 if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1601 && rule.endRegion.isEmpty() && !useCapture) {
1602 qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename
1603 << "should be replaced by fallthroughContext:" << rule.string;
1604 }
1605 } else {
1606 auto &nextRule = context.rules[i];
1607 auto nextRuleFilename = (filename == nextRule.filename) ? QString() : u"in "_sv + nextRule.filename;
1608 qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename
1609 << "because a dot RegExpr is used line" << rule.line << ruleFilename;
1610 }
1611
1612 // unnecessary quantifier
1613 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)"));
1614 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)"));
1615 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1616 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) {
1617 qWarning() << rule.filename << "line" << rule.line
1618 << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1619 return false;
1620 }
1621 }
1622 }
1623
1624 return true;
1625 }
1626
1627 // Parse and check <emptyLine>
1628 bool parseEmptyLine(const QString &filename, const QXmlStreamReader &xml)
1629 {
1630 bool success = true;
1631
1632 QString pattern;
1633 XmlBool casesensitive{};
1634
1635 const auto attrs = xml.attributes();
1636 for (auto &attr : attrs) {
1637 Parser parser{filename, xml, attr, success};
1638
1639 const bool isExtracted = parser.extractString(pattern, u"regexpr"_sv) || parser.extractXmlBool(casesensitive, u"casesensitive"_sv);
1640
1641 success = parser.checkIfExtracted(isExtracted);
1642 }
1643
1644 if (pattern.isEmpty()) {
1645 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr";
1646 success = false;
1647 } else {
1648 success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber());
1649 }
1650
1651 return success;
1652 }
1653
1654 //! Check that a regular expression:
1655 //! - isValid()
1656 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1657 bool checkRegularExpression(const QString &filename, const QRegularExpression &regexp, int line) const
1658 {
1659 const auto pattern = regexp.pattern();
1660
1661 // validate regexp
1662 if (!regexp.isValid()) {
1663 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset"
1664 << regexp.patternErrorOffset();
1665 return false;
1666 }
1667
1668 // catch possible case typos: [A-z] or [a-Z]
1669 const int azOffset = std::max(pattern.indexOf(u"A-z"_sv), pattern.indexOf(u"a-Z"_sv));
1670 if (azOffset >= 0) {
1671 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset;
1672 return false;
1673 }
1674
1675 return true;
1676 }
1677
1678 //! Check fallthrough and fallthroughContext.
1679 //! Check kateversion for stopEmptyLineContextSwitchLoop.
1680 bool checkContextAttribute(const Definition &definition, const Context &context) const
1681 {
1682 bool success = true;
1683
1684 if (!context.fallthroughContext.name.isEmpty()) {
1685 const bool mandatoryFallthroughAttribute = definition.kateVersion < KateVersion{5, 62};
1686 if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1687 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1688 << context.name;
1689 success = false;
1690 } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1691 qWarning() << definition.filename << "line" << context.line
1692 << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1693 << context.name;
1694 success = false;
1695 }
1696 }
1697
1698 if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < KateVersion{5, 103}) {
1699 qWarning() << definition.filename << "line" << context.line
1700 << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name;
1701 success = false;
1702 }
1703
1704 return success;
1705 }
1706
1707 //! Search for additionalDeliminator/weakDeliminator which has no effect.
1708 bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const
1709 {
1710 if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1711 return true;
1712 }
1713
1714 bool success = true;
1715
1716 if (definition.kateVersion < KateVersion{5, 79}) {
1717 qWarning() << definition.filename << "line" << rule.line
1718 << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1719 success = false;
1720 }
1721
1722 for (QChar c : rule.additionalDeliminator) {
1723 if (!definition.wordDelimiters.contains(c)) {
1724 return success;
1725 }
1726 }
1727
1728 for (QChar c : rule.weakDeliminator) {
1729 if (definition.wordDelimiters.contains(c)) {
1730 return success;
1731 }
1732 }
1733
1734 qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1735 return false;
1736 }
1737
1738 //! Check that keyword rule reference an existing keyword list.
1739 bool checkKeyword(const Definition &definition, const Context::Rule &rule) const
1740 {
1741 if (rule.type == Context::Rule::Type::keyword) {
1742 auto it = definition.keywordsList.find(rule.string);
1743 if (it == definition.keywordsList.end()) {
1744 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string;
1745 return false;
1746 }
1747 }
1748 return true;
1749 }
1750
1751 //! Search for rules with lookAhead="true" and context="#stay".
1752 //! This would cause an infinite loop.
1753 bool checkLookAhead(const Context::Rule &rule) const
1754 {
1755 if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1756 qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay";
1757 }
1758 return true;
1759 }
1760
1761 //! Check that StringDetect contains a placeHolder when dynamic="1"
1762 bool checkStringDetect(const Context::Rule &rule) const
1763 {
1764 if (rule.type == Context::Rule::Type::StringDetect) {
1765 // dynamic == true and no place holder?
1766 if (rule.dynamic == XmlBool::True) {
1767 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1768 if (!rule.string.contains(placeHolder)) {
1769 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1770 return false;
1771 }
1772 }
1773 }
1774 return true;
1775 }
1776
1777 //! Check that WordDetect does not contain spaces at the beginning and end of text.
1778 bool checkWordDetect(const Context::Rule &rule) const
1779 {
1780 if (rule.type == Context::Rule::Type::WordDetect) {
1781 if (!rule.string.isEmpty() && (rule.string.front().isSpace() || rule.string.back().isSpace())) {
1782 qWarning() << rule.filename << "line" << rule.line << "contains a space at the beginning or end of the string:" << rule.string;
1783 return false;
1784 }
1785 }
1786 return true;
1787 }
1788
1789 //! Check <include> and delimiter in a keyword list
1790 bool checkKeywordsList(const Definition &definition) const
1791 {
1792 bool success = true;
1793
1794 bool includeNotSupport = (definition.kateVersion < KateVersion{5, 53});
1795 QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList);
1796 while (keywordsIt.hasNext()) {
1797 keywordsIt.next();
1798
1799 for (const auto &include : keywordsIt.value().items.includes) {
1800 if (includeNotSupport) {
1801 qWarning() << definition.filename << "line" << include.line
1802 << "<include> is only available since version \"5.53\". Please, increase kateversion.";
1803 success = false;
1804 }
1805 success = checkKeywordInclude(definition, include) && success;
1806 }
1807
1808 // Check that keyword list items do not have deliminator character
1809#if 0
1810 for (const auto& keyword : keywordsIt.value().items.keywords) {
1811 for (QChar c : keyword.content) {
1812 if (definition.wordDelimiters.contains(c)) {
1813 qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content;
1814 success = false;
1815 }
1816 }
1817 }
1818#endif
1819 }
1820
1821 return success;
1822 }
1823
1824 //! Search for non-existing keyword include.
1825 bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const
1826 {
1827 bool containsKeywordName = true;
1828 int const idx = include.content.indexOf(u"##"_sv);
1829 if (idx == -1) {
1830 auto it = definition.keywordsList.find(include.content);
1831 containsKeywordName = (it != definition.keywordsList.end());
1832 } else {
1833 auto defName = include.content.sliced(idx + 2);
1834 auto listName = include.content.sliced(0, idx);
1835 auto it = m_definitions.find(defName);
1836 if (it == m_definitions.end()) {
1837 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content;
1838 return false;
1839 }
1840 containsKeywordName = it->keywordsList.contains(listName);
1841 }
1842
1843 if (!containsKeywordName) {
1844 qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content;
1845 }
1846
1847 return containsKeywordName;
1848 }
1849
1850 //! Check if a rule is hidden by another
1851 //! - rule hidden by DetectChar or AnyChar
1852 //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar
1853 //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings
1854 //! - duplicate rule (Int, Float, keyword with same String, etc)
1855 //! - Rule hidden by a dot regex
1856 bool checkUreachableRules(const QString &filename,
1857 const Context &context,
1858 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1859 {
1860 if (context.isOnlyIncluded) {
1861 return true;
1862 }
1863
1864 struct Rule4 {
1865 RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr)
1866 {
1867 auto set = [&](RuleAndInclude &ruleAndInclude) {
1868 auto old = ruleAndInclude;
1869 ruleAndInclude = {&rule, includeRules};
1870 return old;
1871 };
1872
1873 if (rule.firstNonSpace == XmlBool::True) {
1874 return set(firstNonSpace);
1875 } else if (rule.column == 0) {
1876 return set(column0);
1877 } else if (rule.column > 0) {
1878 return set(columnGreaterThan0[rule.column]);
1879 } else {
1880 return set(normal);
1881 }
1882 }
1883
1884 private:
1885 RuleAndInclude normal;
1886 RuleAndInclude column0;
1887 QMap<int, RuleAndInclude> columnGreaterThan0;
1888 RuleAndInclude firstNonSpace;
1889 };
1890
1891 // Associate QChar with RuleAndInclude
1892 struct CharTable {
1893 /// Search RuleAndInclude associated with @p c.
1894 RuleAndInclude find(QChar c) const
1895 {
1896 if (c.unicode() < 128) {
1897 return m_asciiMap[c.unicode()];
1898 }
1899 auto it = m_utf8Map.find(c);
1900 return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value();
1901 }
1902
1903 /// Search RuleAndInclude associated with the characters of @p s.
1904 /// \return an empty QList when at least one character is not found.
1905 QList<RuleAndInclude> find(QStringView s) const
1906 {
1907 QList<RuleAndInclude> result;
1908
1909 for (QChar c : s) {
1910 if (!find(c)) {
1911 return result;
1912 }
1913 }
1914
1915 for (QChar c : s) {
1916 result.append(find(c));
1917 }
1918
1919 return result;
1920 }
1921
1922 /// Associates @p c with a rule.
1923 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1924 {
1925 if (c.unicode() < 128) {
1926 m_asciiMap[c.unicode()] = {&rule, includeRule};
1927 } else {
1928 m_utf8Map[c] = {&rule, includeRule};
1929 }
1930 }
1931
1932 /// Associates each character of @p s with a rule.
1933 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1934 {
1935 for (QChar c : s) {
1936 append(c, rule, includeRule);
1937 }
1938 }
1939
1940 private:
1941 RuleAndInclude m_asciiMap[127]{};
1942 QMap<QChar, RuleAndInclude> m_utf8Map;
1943 };
1944
1945 struct Char4Tables {
1946 CharTable chars;
1947 CharTable charsColumn0;
1948 QMap<int, CharTable> charsColumnGreaterThan0;
1949 CharTable charsFirstNonSpace;
1950 };
1951
1952 // View on Char4Tables members
1953 struct CharTableArray {
1954 // Append Char4Tables members that satisfies firstNonSpace and column.
1955 // Char4Tables::char is always added.
1956 CharTableArray(Char4Tables &tables, const Context::Rule &rule)
1957 {
1958 if (rule.firstNonSpace == XmlBool::True) {
1959 appendTable(tables.charsFirstNonSpace);
1960 }
1961
1962 if (rule.column == 0) {
1963 appendTable(tables.charsColumn0);
1964 } else if (rule.column > 0) {
1965 appendTable(tables.charsColumnGreaterThan0[rule.column]);
1966 }
1967
1968 appendTable(tables.chars);
1969 }
1970
1971 // Removes Char4Tables::chars when the rule contains firstNonSpace or column
1972 void removeNonSpecialWhenSpecial()
1973 {
1974 if (m_size > 1) {
1975 --m_size;
1976 }
1977 }
1978
1979 /// Search RuleAndInclude associated with @p c.
1980 RuleAndInclude find(QChar c) const
1981 {
1982 for (int i = 0; i < m_size; ++i) {
1983 if (auto ruleAndInclude = m_charTables[i]->find(c)) {
1984 return ruleAndInclude;
1985 }
1986 }
1987 return RuleAndInclude{nullptr, nullptr};
1988 }
1989
1990 /// Search RuleAndInclude associated with the characters of @p s.
1991 /// \return an empty QList when at least one character is not found.
1992 QList<RuleAndInclude> find(QStringView s) const
1993 {
1994 for (int i = 0; i < m_size; ++i) {
1995 auto result = m_charTables[i]->find(s);
1996 if (result.size()) {
1997 while (++i < m_size) {
1998 result.append(m_charTables[i]->find(s));
1999 }
2000 return result;
2001 }
2002 }
2003 return QList<RuleAndInclude>();
2004 }
2005
2006 /// Associates @p c with a rule.
2007 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
2008 {
2009 for (int i = 0; i < m_size; ++i) {
2010 m_charTables[i]->append(c, rule, includeRule);
2011 }
2012 }
2013
2014 /// Associates each character of @p s with a rule.
2015 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
2016 {
2017 for (int i = 0; i < m_size; ++i) {
2018 m_charTables[i]->append(s, rule, includeRule);
2019 }
2020 }
2021
2022 private:
2023 void appendTable(CharTable &t)
2024 {
2025 m_charTables[m_size] = &t;
2026 ++m_size;
2027 }
2028
2029 CharTable *m_charTables[3];
2030 int m_size = 0;
2031 };
2032
2033 struct ObservableRule {
2034 const Context::Rule *rule;
2035 const Context::Rule *includeRules;
2036
2037 bool hasResolvedIncludeRules() const
2038 {
2039 return rule == includeRules;
2040 }
2041 };
2042
2043 // Iterates over all the rules, including those in includedRules
2044 struct RuleIterator {
2045 RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule)
2046 : m_end(&endRule - rules.data())
2047 , m_rules(rules)
2048 {
2049 }
2050
2051 /// \return next rule or nullptr
2052 const Context::Rule *next()
2053 {
2054 // if in includedRules
2055 if (m_includedRules) {
2056 ++m_i2;
2057 if (m_i2 != m_includedRules->size()) {
2058 return (*m_includedRules)[m_i2];
2059 }
2060 ++m_i;
2061 m_includedRules = nullptr;
2062 }
2063
2064 // if is a includedRules
2065 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
2066 if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
2067 m_i2 = 0;
2068 m_includedRules = &m_rules[m_i].rule->includedRules;
2069 return (*m_includedRules)[m_i2];
2070 }
2071 ++m_i;
2072 }
2073
2074 if (m_i < m_end) {
2075 ++m_i;
2076 return m_rules[m_i - 1].rule;
2077 }
2078
2079 return nullptr;
2080 }
2081
2082 /// \return current IncludeRules or nullptr
2083 const Context::Rule *currentIncludeRules() const
2084 {
2085 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
2086 }
2087
2088 private:
2089 int m_i = 0;
2090 int m_i2 = 0;
2091 const int m_end;
2092 const QList<ObservableRule> &m_rules;
2093 const QList<const Context::Rule *> *m_includedRules = nullptr;
2094 };
2095
2096 // Dot regex container that satisfies firstNonSpace and column.
2097 struct DotRegex {
2098 /// Append a dot regex rule.
2099 void append(const Context::Rule &rule, const Context::Rule *includedRule)
2100 {
2101 auto array = extractDotRegexes(rule);
2102 if (array[0]) {
2103 *array[0] = {&rule, includedRule};
2104 }
2105 if (array[1]) {
2106 *array[1] = {&rule, includedRule};
2107 }
2108 }
2109
2110 /// Search dot regex which hides @p rule
2111 RuleAndInclude find(const Context::Rule &rule)
2112 {
2113 auto array = extractDotRegexes(rule);
2114 if (array[0]) {
2115 return *array[0];
2116 }
2117 if (array[1]) {
2118 return *array[1];
2119 }
2120 return RuleAndInclude{};
2121 }
2122
2123 private:
2124 using Array = std::array<RuleAndInclude *, 2>;
2125
2126 Array extractDotRegexes(const Context::Rule &rule)
2127 {
2128 Array ret{};
2129
2130 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
2131 ret[0] = &dotRegex;
2132 } else {
2133 if (rule.firstNonSpace == XmlBool::True) {
2134 ret[0] = &dotRegexFirstNonSpace;
2135 }
2136
2137 if (rule.column == 0) {
2138 ret[1] = &dotRegexColumn0;
2139 } else if (rule.column > 0) {
2140 ret[1] = &dotRegexColumnGreaterThan0[rule.column];
2141 }
2142 }
2143
2144 return ret;
2145 }
2146
2147 RuleAndInclude dotRegex{};
2148 RuleAndInclude dotRegexColumn0{};
2149 QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
2150 RuleAndInclude dotRegexFirstNonSpace{};
2151 };
2152
2153 bool success = true;
2154
2155 // characters of DetectChar/AnyChar
2156 Char4Tables detectChars;
2157 // characters of dynamic DetectChar
2158 Char4Tables dynamicDetectChars;
2159 // characters of LineContinue
2160 Char4Tables lineContinueChars;
2161
2162 Rule4 intRule{};
2163 Rule4 floatRule{};
2164 Rule4 hlCCharRule{};
2165 Rule4 hlCOctRule{};
2166 Rule4 hlCHexRule{};
2167 Rule4 hlCStringCharRule{};
2168 Rule4 detectIdentifierRule{};
2169
2170 // Contains includedRules and included includedRules
2172
2173 DotRegex dotRegex;
2174
2175 QList<ObservableRule> observedRules;
2176 observedRules.reserve(context.rules.size());
2177 for (const Context::Rule &rule : context.rules) {
2178 const Context::Rule *includeRule = nullptr;
2179 if (rule.type == Context::Rule::Type::IncludeRules) {
2180 auto *context = rule.context.context;
2181 if (context && context->isOnlyIncluded) {
2182 includeRule = &rule;
2183 }
2184 }
2185
2186 observedRules.push_back({&rule, includeRule});
2187 if (includeRule) {
2188 for (const Context::Rule *rule2 : rule.includedRules) {
2189 observedRules.push_back({rule2, includeRule});
2190 }
2191 }
2192 }
2193
2194 for (auto &observedRule : observedRules) {
2195 const Context::Rule &rule = *observedRule.rule;
2196 bool isUnreachable = false;
2197 QList<RuleAndInclude> unreachableBy;
2198
2199 // declare rule as unreachable if ruleAndInclude is not empty
2200 auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
2201 if (ruleAndInclude) {
2202 isUnreachable = true;
2203 unreachableBy.append(ruleAndInclude);
2204 }
2205 };
2206
2207 // declare rule as unreachable if ruleAndIncludes is not empty
2208 auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) {
2209 if (!ruleAndIncludes.isEmpty()) {
2210 isUnreachable = true;
2211 unreachableBy.append(ruleAndIncludes);
2212 }
2213 };
2214
2215 // check if rule2.firstNonSpace/column is compatible with those of rule
2216 auto isCompatible = [&rule](Context::Rule const &rule2) {
2217 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
2218 || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
2219 };
2220
2221 updateUnreachable1(dotRegex.find(rule));
2222
2223 switch (rule.type) {
2224 // checks if hidden by DetectChar/AnyChar
2225 // then add the characters to detectChars
2226 case Context::Rule::Type::AnyChar: {
2227 auto tables = CharTableArray(detectChars, rule);
2228 updateUnreachable2(tables.find(rule.string));
2229 tables.removeNonSpecialWhenSpecial();
2230 tables.append(rule.string, rule);
2231 break;
2232 }
2233
2234 // check if is hidden by DetectChar/AnyChar
2235 // then add the characters to detectChars or dynamicDetectChars
2236 case Context::Rule::Type::DetectChar: {
2237 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2238 auto tables = CharTableArray(chars4, rule);
2239 updateUnreachable1(tables.find(rule.char0));
2240 tables.removeNonSpecialWhenSpecial();
2241 tables.append(rule.char0, rule);
2242 break;
2243 }
2244
2245 // check if hidden by DetectChar/AnyChar
2246 // then add spaces characters to detectChars
2247 case Context::Rule::Type::DetectSpaces: {
2248 auto tables = CharTableArray(detectChars, rule);
2249 updateUnreachable2(tables.find(u" \t"_sv));
2250 tables.removeNonSpecialWhenSpecial();
2251 tables.append(u' ', rule);
2252 tables.append(u'\t', rule);
2253 break;
2254 }
2255
2256 // check if hidden by DetectChar/AnyChar
2257 case Context::Rule::Type::HlCChar:
2258 updateUnreachable1(CharTableArray(detectChars, rule).find(u'\''));
2259 updateUnreachable1(hlCCharRule.setRule(rule));
2260 break;
2261
2262 // check if hidden by DetectChar/AnyChar
2263 case Context::Rule::Type::HlCHex:
2264 updateUnreachable1(CharTableArray(detectChars, rule).find(u'0'));
2265 updateUnreachable1(hlCHexRule.setRule(rule));
2266 break;
2267
2268 // check if hidden by DetectChar/AnyChar
2269 case Context::Rule::Type::HlCOct:
2270 updateUnreachable1(CharTableArray(detectChars, rule).find(u'0'));
2271 updateUnreachable1(hlCOctRule.setRule(rule));
2272 break;
2273
2274 // check if hidden by DetectChar/AnyChar
2275 case Context::Rule::Type::HlCStringChar:
2276 updateUnreachable1(CharTableArray(detectChars, rule).find(u'\\'));
2277 updateUnreachable1(hlCStringCharRule.setRule(rule));
2278 break;
2279
2280 // check if hidden by DetectChar/AnyChar
2281 case Context::Rule::Type::Int:
2282 updateUnreachable2(CharTableArray(detectChars, rule).find(u"0123456789"_sv));
2283 updateUnreachable1(intRule.setRule(rule));
2284 break;
2285
2286 // check if hidden by DetectChar/AnyChar
2287 case Context::Rule::Type::Float:
2288 updateUnreachable2(CharTableArray(detectChars, rule).find(u"0123456789."_sv));
2289 updateUnreachable1(floatRule.setRule(rule));
2290 // check that Float is before Int
2291 updateUnreachable1(Rule4(intRule).setRule(rule));
2292 break;
2293
2294 // check if hidden by another DetectIdentifier rule
2295 case Context::Rule::Type::DetectIdentifier:
2296 updateUnreachable1(detectIdentifierRule.setRule(rule));
2297 break;
2298
2299 // check if hidden by DetectChar/AnyChar or another LineContinue
2300 case Context::Rule::Type::LineContinue: {
2301 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2302
2303 auto tables = CharTableArray(lineContinueChars, rule);
2304 updateUnreachable1(tables.find(rule.char0));
2305 tables.removeNonSpecialWhenSpecial();
2306 tables.append(rule.char0, rule);
2307 break;
2308 }
2309
2310 // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect
2311 case Context::Rule::Type::Detect2Chars:
2312 case Context::Rule::Type::RangeDetect:
2313 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2314 if (!isUnreachable) {
2315 RuleIterator ruleIterator(observedRules, observedRule);
2316 while (const auto *rulePtr = ruleIterator.next()) {
2317 if (isUnreachable) {
2318 break;
2319 }
2320 const auto &rule2 = *rulePtr;
2321 if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2322 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2323 }
2324 }
2325 }
2326 break;
2327
2328 case Context::Rule::Type::RegExpr: {
2329 if (rule.isDotRegex) {
2330 dotRegex.append(rule, nullptr);
2331 break;
2332 }
2333
2334 // check that `rule` does not have another RegExpr as a prefix
2335 RuleIterator ruleIterator(observedRules, observedRule);
2336 while (const auto *rulePtr = ruleIterator.next()) {
2337 if (isUnreachable) {
2338 break;
2339 }
2340 const auto &rule2 = *rulePtr;
2341 if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2342 && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) {
2343 bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2344 if (!add) {
2345 // \s.* (sanitized = \s) is considered hiding \s*\S
2346 // we check the quantifiers to see if this is the case
2347 auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2348 auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2349 auto c3 = rule2.sanitizedString.back().unicode();
2350 if (c3 == '*' || c3 == '?' || c3 == '+') {
2351 add = true;
2352 } else if (c1 == '*' || c1 == '?') {
2353 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2354 } else {
2355 add = true;
2356 }
2357 }
2358 if (add) {
2359 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2360 }
2361 }
2362 }
2363
2364 Q_FALLTHROUGH();
2365 }
2366 // check if a rule does not have another rule as a prefix
2367 case Context::Rule::Type::WordDetect:
2368 case Context::Rule::Type::StringDetect: {
2369 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix
2370 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2371 RuleIterator ruleIterator(observedRules, observedRule);
2372 while (const auto *rulePtr = ruleIterator.next()) {
2373 if (isUnreachable) {
2374 break;
2375 }
2376
2377 const auto &rule2 = *rulePtr;
2378 if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2379 continue;
2380 }
2381
2382 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2383 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2384 if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) {
2385 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2386 }
2387 }
2388 }
2389
2390 // string used for comparison and truncated from "dynamic" part
2391 QStringView s = rule.string;
2392
2393 // truncate to '%' with dynamic rules
2394 if (rule.dynamic == XmlBool::True) {
2395 static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)"));
2396 auto result = dynamicPosition.match(rule.string);
2397 s = s.sliced(0, result.capturedLength());
2398 // check if hidden by DetectChar/AnyChar
2399 if (s.size() + 2 <= rule.string.size()) {
2400 auto tables = CharTableArray(dynamicDetectChars, rule);
2401 updateUnreachable1(tables.find(s.data()[s.size() + 2]));
2402 }
2403 }
2404
2405 QString sanitizedRegex;
2406 // truncate to special character with RegExpr.
2407 // If regexp contains '|', `s` becomes empty.
2408 if (rule.type == Context::Rule::Type::RegExpr) {
2409 static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2410 static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\‍([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2411 const qsizetype result = regularChars.match(rule.string).capturedLength();
2412 const qsizetype pos = qMin(result, s.size());
2413 if (rule.string.indexOf(u'|', pos) < pos) {
2414 sanitizedRegex = rule.string.sliced(0, qMin(result, s.size()));
2415 sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1"));
2416 s = sanitizedRegex;
2417 } else {
2418 s = QStringView();
2419 }
2420 }
2421
2422 // check if hidden by DetectChar/AnyChar
2423 if (s.size() > 0) {
2424 auto t = CharTableArray(detectChars, rule);
2425 if (rule.insensitive != XmlBool::True) {
2426 updateUnreachable1(t.find(s[0]));
2427 } else {
2428 QChar c2[]{s[0].toLower(), s[0].toUpper()};
2429 updateUnreachable2(t.find(QStringView(c2, 2)));
2430 }
2431
2432 // StringDetect is a DetectChar
2433 if (rule.type == Context::Rule::Type::StringDetect && rule.string.size() == 1) {
2434 auto tables = CharTableArray(detectChars, rule);
2435 auto c = rule.string[0];
2436 if (rule.insensitive != XmlBool::True) {
2437 c = c.toLower();
2438 tables.removeNonSpecialWhenSpecial();
2439 tables.append(c, rule);
2440 c = c.toUpper();
2441 }
2442 tables.removeNonSpecialWhenSpecial();
2443 tables.append(c, rule);
2444 }
2445 }
2446
2447 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s
2448 if (s.size() > 0 && !isUnreachable) {
2449 // combination of uppercase and lowercase
2450 RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2451
2452 RuleIterator ruleIterator(observedRules, observedRule);
2453 while (const auto *rulePtr = ruleIterator.next()) {
2454 if (isUnreachable) {
2455 break;
2456 }
2457 const auto &rule2 = *rulePtr;
2458 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2459 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2460
2461 switch (rule2.type) {
2462 // check that it is not a detectChars prefix
2463 case Context::Rule::Type::Detect2Chars:
2464 if (isCompatible(rule2) && s.size() >= 2) {
2465 if (rule.insensitive != XmlBool::True) {
2466 if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2467 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2468 }
2469 } else {
2470 // when the string is case insensitive,
2471 // all 4 upper/lower case combinations must be found
2472 auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) {
2473 if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2474 x = {&rule2, ruleIterator.currentIncludeRules()};
2475 }
2476 };
2477 set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2478 set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2479 set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2480 set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2481
2482 if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2483 && detect2CharsInsensitives[3]) {
2484 isUnreachable = true;
2485 unreachableBy.append(detect2CharsInsensitives[0]);
2486 unreachableBy.append(detect2CharsInsensitives[1]);
2487 unreachableBy.append(detect2CharsInsensitives[2]);
2488 unreachableBy.append(detect2CharsInsensitives[3]);
2489 }
2490 }
2491 }
2492 break;
2493
2494 // check that it is not a StringDetect prefix
2495 case Context::Rule::Type::StringDetect:
2496 if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2497 && s.startsWith(rule2.string, caseSensitivity)) {
2498 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2499 }
2500 break;
2501
2502 // check if a WordDetect is hidden by another WordDetect
2503 case Context::Rule::Type::WordDetect:
2504 if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2505 && 0 == rule.string.compare(rule2.string, caseSensitivity)) {
2506 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2507 }
2508 break;
2509
2510 default:;
2511 }
2512 }
2513 }
2514
2515 break;
2516 }
2517
2518 // check if hidden by another keyword rule
2519 case Context::Rule::Type::keyword: {
2520 RuleIterator ruleIterator(observedRules, observedRule);
2521 while (const auto *rulePtr = ruleIterator.next()) {
2522 if (isUnreachable) {
2523 break;
2524 }
2525 const auto &rule2 = *rulePtr;
2526 if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2527 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2528 }
2529 }
2530 // TODO check that all keywords are hidden by another rules
2531 break;
2532 }
2533
2534 // add characters in those used but without checking if they are already.
2535 // <DetectChar char="}" />
2536 // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked
2537 // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added
2538 // <DetectChar char="{" /> <- hidden by previous rule
2539 case Context::Rule::Type::IncludeRules:
2540 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2541 break;
2542 }
2543
2544 if (auto &ruleAndInclude = includeContexts[rule.context.context]) {
2545 updateUnreachable1(ruleAndInclude);
2546 } else {
2547 ruleAndInclude.rule = &rule;
2548 }
2549
2550 for (const auto *rulePtr : rule.includedIncludeRules) {
2551 includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule});
2552 }
2553
2554 if (observedRule.includeRules) {
2555 break;
2556 }
2557
2558 for (const auto *rulePtr : rule.includedRules) {
2559 const auto &rule2 = *rulePtr;
2560 switch (rule2.type) {
2561 case Context::Rule::Type::AnyChar: {
2562 auto tables = CharTableArray(detectChars, rule2);
2563 tables.removeNonSpecialWhenSpecial();
2564 tables.append(rule2.string, rule2, &rule);
2565 break;
2566 }
2567
2568 case Context::Rule::Type::DetectChar: {
2569 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2570 auto tables = CharTableArray(chars4, rule2);
2571 tables.removeNonSpecialWhenSpecial();
2572 tables.append(rule2.char0, rule2, &rule);
2573 break;
2574 }
2575
2576 case Context::Rule::Type::DetectSpaces: {
2577 auto tables = CharTableArray(detectChars, rule2);
2578 tables.removeNonSpecialWhenSpecial();
2579 tables.append(u' ', rule2, &rule);
2580 tables.append(u'\t', rule2, &rule);
2581 break;
2582 }
2583
2584 case Context::Rule::Type::HlCChar:
2585 hlCCharRule.setRule(rule2, &rule);
2586 break;
2587
2588 case Context::Rule::Type::HlCHex:
2589 hlCHexRule.setRule(rule2, &rule);
2590 break;
2591
2592 case Context::Rule::Type::HlCOct:
2593 hlCOctRule.setRule(rule2, &rule);
2594 break;
2595
2596 case Context::Rule::Type::HlCStringChar:
2597 hlCStringCharRule.setRule(rule2, &rule);
2598 break;
2599
2600 case Context::Rule::Type::Int:
2601 intRule.setRule(rule2, &rule);
2602 break;
2603
2604 case Context::Rule::Type::Float:
2605 floatRule.setRule(rule2, &rule);
2606 break;
2607
2608 case Context::Rule::Type::LineContinue: {
2609 auto tables = CharTableArray(lineContinueChars, rule2);
2610 tables.removeNonSpecialWhenSpecial();
2611 tables.append(rule2.char0, rule2, &rule);
2612 break;
2613 }
2614
2615 case Context::Rule::Type::RegExpr:
2616 if (rule2.isDotRegex) {
2617 dotRegex.append(rule2, &rule);
2618 }
2619 break;
2620
2621 case Context::Rule::Type::StringDetect: {
2622 // StringDetect is a DetectChar
2623 if (rule2.string.size() == 1 || (rule2.string.size() == 2 && rule2.dynamic == XmlBool::True)) {
2624 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2625 auto tables = CharTableArray(chars4, rule2);
2626 tables.removeNonSpecialWhenSpecial();
2627 tables.append(rule2.string.back(), rule2, &rule);
2628 }
2629 break;
2630 }
2631
2632 case Context::Rule::Type::WordDetect:
2633 case Context::Rule::Type::Detect2Chars:
2634 case Context::Rule::Type::IncludeRules:
2635 case Context::Rule::Type::DetectIdentifier:
2636 case Context::Rule::Type::keyword:
2637 case Context::Rule::Type::Unknown:
2638 case Context::Rule::Type::RangeDetect:
2639 break;
2640 }
2641 }
2642 break;
2643
2644 case Context::Rule::Type::Unknown:
2645 break;
2646 }
2647
2648 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2649 auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2650 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2651 unreachableIncludedRule.unreachableBy.append(unreachableBy);
2652 } else {
2653 unreachableIncludedRule.alwaysUnreachable = false;
2654 }
2655 } else if (isUnreachable) {
2656 success = false;
2657 QString message;
2658 message.reserve(128);
2659 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
2660 message += u"line "_sv;
2661 if (ruleAndInclude.includeRules) {
2662 message += QString::number(ruleAndInclude.includeRules->line);
2663 message += u" [by '"_sv;
2664 message += ruleAndInclude.includeRules->context.name;
2665 message += u"' line "_sv;
2666 message += QString::number(ruleAndInclude.rule->line);
2667 if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2668 message += u" ("_sv;
2669 message += ruleAndInclude.rule->filename;
2670 message += u')';
2671 }
2672 message += u']';
2673 } else {
2674 message += QString::number(ruleAndInclude.rule->line);
2675 }
2676 message += u", "_sv;
2677 }
2678 message.chop(2);
2679 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message;
2680 }
2681 }
2682
2683 return success;
2684 }
2685
2686 //! Proposes to merge certain rule sequences
2687 //! - several DetectChar/AnyChar into AnyChar
2688 //! - several RegExpr into one RegExpr
2689 bool suggestRuleMerger(const QString &filename, const Context &context) const
2690 {
2691 bool success = true;
2692
2693 if (context.rules.isEmpty()) {
2694 return success;
2695 }
2696
2697 auto it = context.rules.begin();
2698 const auto end = context.rules.end() - 1;
2699
2700 for (; it < end; ++it) {
2701 const auto &rule1 = *it;
2702 const auto &rule2 = it[1];
2703
2704 auto isCommonCompatible = [&] {
2705 if (rule1.lookAhead != rule2.lookAhead) {
2706 return false;
2707 }
2708 // ignore attribute when lookAhead is true
2709 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2710 return false;
2711 }
2712 // clang-format off
2713 return rule1.beginRegion == rule2.beginRegion
2714 && rule1.endRegion == rule2.endRegion
2715 && rule1.firstNonSpace == rule2.firstNonSpace
2716 && rule1.context.context == rule2.context.context
2717 && rule1.context.popCount == rule2.context.popCount;
2718 // clang-format on
2719 };
2720
2721 switch (rule1.type) {
2722 // request to merge StringDetect with AnyChar
2723 case Context::Rule::Type::StringDetect:
2724 if (rule1.string.size() != 1 || rule1.dynamic == XmlBool::True) {
2725 break;
2726 }
2727 Q_FALLTHROUGH();
2728 // request to merge AnyChar/DetectChar
2729 case Context::Rule::Type::AnyChar:
2730 case Context::Rule::Type::DetectChar:
2731 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar
2732 || (rule2.type == Context::Rule::Type::StringDetect && rule2.dynamic != XmlBool::True && rule2.string.size() == 1))
2733 && isCommonCompatible() && rule1.column == rule2.column) {
2734 qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule";
2735 success = false;
2736 }
2737 break;
2738
2739 // request to merge multiple RegExpr
2740 case Context::Rule::Type::RegExpr:
2741 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2742 && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2743 qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule";
2744 success = false;
2745 }
2746 break;
2747
2748 case Context::Rule::Type::DetectSpaces:
2749 case Context::Rule::Type::HlCChar:
2750 case Context::Rule::Type::HlCHex:
2751 case Context::Rule::Type::HlCOct:
2752 case Context::Rule::Type::HlCStringChar:
2753 case Context::Rule::Type::Int:
2754 case Context::Rule::Type::Float:
2755 case Context::Rule::Type::LineContinue:
2756 case Context::Rule::Type::WordDetect:
2757 case Context::Rule::Type::Detect2Chars:
2758 case Context::Rule::Type::IncludeRules:
2759 case Context::Rule::Type::DetectIdentifier:
2760 case Context::Rule::Type::keyword:
2761 case Context::Rule::Type::Unknown:
2762 case Context::Rule::Type::RangeDetect:
2763 break;
2764 }
2765 }
2766
2767 return success;
2768 }
2769
2770 //! Initialize the referenced context (ContextName::context)
2771 //! Some input / output examples are:
2772 //! - "#stay" -> ""
2773 //! - "#pop" -> ""
2774 //! - "Comment" -> "Comment"
2775 //! - "#pop!Comment" -> "Comment"
2776 //! - "##ISO C++" -> ""
2777 //! - "Comment##ISO C++"-> "Comment" in ISO C++
2778 void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line)
2779 {
2780 QStringView name = contextName.name;
2781 if (name.isEmpty()) {
2782 contextName.stay = true;
2783 } else if (name.startsWith(u"#stay"_sv)) {
2784 contextName.stay = true;
2785 if (name.size() > 5) {
2786 qWarning() << definition.filename << "line" << line << "invalid context in" << context.name;
2787 m_success = false;
2788 }
2789 } else {
2790 while (name.startsWith(u"#pop"_sv)) {
2791 name = name.sliced(4);
2792 ++contextName.popCount;
2793 }
2794
2795 if (contextName.popCount && !name.isEmpty()) {
2796 if (name.startsWith(u'!') && name.size() > 1) {
2797 name = name.sliced(1);
2798 } else {
2799 qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name;
2800 m_success = false;
2801 }
2802 }
2803
2804 if (!name.isEmpty()) {
2805 const int idx = name.indexOf(u"##"_sv);
2806 if (idx == -1) {
2807 auto it = definition.contexts.find(name.toString());
2808 if (it != definition.contexts.end()) {
2809 contextName.context = &*it;
2810 }
2811 } else {
2812 auto defName = name.sliced(idx + 2);
2813 auto it = m_definitions.find(defName.toString());
2814 if (it != m_definitions.end()) {
2815 auto listName = name.sliced(0, idx).toString();
2816 definition.referencedDefinitions.insert(&*it);
2817 auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName);
2818 if (ctxIt != it->contexts.end()) {
2819 contextName.context = &*ctxIt;
2820 }
2821 } else {
2822 qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name;
2823 m_success = false;
2824 }
2825 }
2826
2827 if (!contextName.context) {
2828 qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name;
2829 m_success = false;
2830 }
2831 }
2832 }
2833 }
2834
2835 QMap<QString, Definition> m_definitions;
2837 Definition *m_currentDefinition = nullptr;
2838 Keywords *m_currentKeywords = nullptr;
2839 Context *m_currentContext = nullptr;
2840 // xml reader variable
2841 //@{
2842 QString m_textContent;
2843 bool m_inKeywordItem = false;
2844 //@}
2845 bool m_success = true;
2846};
2847
2848class HlCompressor
2849{
2850public:
2851 HlCompressor(const QString &kateVersion)
2852 : m_kateVersion(kateVersion)
2853 {
2854 m_hasElems.push_back(true);
2855 }
2856
2857 const QString &compressedXML() const
2858 {
2859 return m_data;
2860 }
2861
2862 /**
2863 * Reduce xml space by removing what is superfluous.
2864 * - transforms boolean values into 0 or 1.
2865 * - remove unused attributes.
2866 * - remove spaces and comments.
2867 * - remove context attributes referring to #stay (because this is the default).
2868 * - replace Detect2Chars with StringDetect (String="xy" is shorter than char="x" char1="y").
2869 * - sort contexts by frequency of use to accelerate their search during loading.
2870 */
2871 void processElement(const QXmlStreamReader &xml)
2872 {
2873 switch (xml.tokenType()) {
2875 closePreviousOpenTag(m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data);
2876 m_hasElems.push_back(false);
2877
2878 const auto tagName = xml.name();
2879 if (tagName == u"contexts"_sv) {
2880 m_inContexts = true;
2881 m_data += u"<contexts"_sv;
2882 } else if (m_inContexts) {
2883 Context &ctx = (m_contexts.empty() || tagName == u"context"_sv) ? m_contexts.emplace_back() : m_contexts.back();
2884 QString &out = ctx.data;
2885 const bool isDetect2Chars = tagName == u"Detect2Chars"_sv;
2886 out += u'<' % (isDetect2Chars ? u"StringDetect"_sv : tagName);
2887
2888 auto attrs = xml.attributes();
2889 sortAttributes(attrs);
2890 for (const auto &attr : attrs) {
2891 const auto attrName = attr.name();
2892 auto value = attr.value();
2893 // transform Detect2Chars char and char1 attributes to StringDetect String attribute
2894 if (isDetect2Chars && (attrName == u"char"_sv || attrName == u"char1"_sv)) {
2895 if (attrName == u"char"_sv) {
2896 const auto ch0 = value;
2897 const auto ch1 = attrs.value(u"char1"_sv);
2898 QChar chars[]{ch0.isEmpty() ? u' ' : ch0[0], ch1.isEmpty() ? u' ' : ch1[0]};
2899 writeXmlAttribute(out, u"String"_sv, QStringView(chars, 2), tagName);
2900 }
2901 } else if (attrName == u"context"_sv || attrName == u"lineEndContext"_sv || attrName == u"fallthroughContext"_sv
2902 || attrName == u"lineEmptyContext"_sv) {
2903 // ignore #stay context because this is the default
2904 if (value != u"#stay"_sv) {
2905 writeXmlAttribute(out, attrName, value, tagName);
2906
2907 /*
2908 * Extract context name and increment context counter
2909 */
2910 bool hasPop = false;
2911 while (value.startsWith(u"#pop"_sv)) {
2912 hasPop = true;
2913 value = value.sliced(4);
2914 }
2915 if (hasPop && !value.isEmpty()) {
2916 value = value.sliced(1);
2917 }
2918 if (!value.isEmpty() && -1 == value.indexOf(u"##"_sv)) {
2919 m_contextRefs[value.toString()]++;
2920 }
2921 }
2922 } else if (tagName == u"LineContinue"_sv && attrName == u"char"_sv && value == u"\\") {
2923 // ignore char="\\" with LineContinue
2924 } else {
2925 if (attrName == u"name"_sv) {
2926 ctx.name = value.toString();
2927 }
2928 writeXmlAttribute(out, attrName, value, tagName);
2929 }
2930 }
2931 } else {
2932 m_data += u'<' % tagName;
2933 const auto attrs = xml.attributes();
2934 for (const auto &attr : attrs) {
2935 auto name = attr.name();
2936 auto value = (name == u"kateversion") ? QStringView(m_kateVersion) : attr.value();
2937 writeXmlAttribute(m_data, name, value, tagName);
2938 }
2939 }
2940 break;
2941 }
2942
2944 const auto name = xml.name();
2945 if (m_inContexts && !m_contexts.empty() && name == u"contexts"_sv) {
2946 m_inContexts = false;
2947 // sorting contexts by the most used (ignore first context)
2948 std::sort(m_contexts.begin() + 1, m_contexts.end(), [&](auto &ctx1, auto &ctx2) {
2949 auto i1 = m_contextRefs.value(ctx1.name);
2950 auto i2 = m_contextRefs.value(ctx2.name);
2951 if (i1 != i2) {
2952 return i1 > i2;
2953 }
2954 // for a reproducible build, contexts with the same number of uses are sorted by name
2955 return ctx1.name < ctx2.name;
2956 });
2957 for (const auto &ctx : m_contexts) {
2958 m_data += ctx.data;
2959 }
2960 }
2961
2962 QString &out = m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data;
2963 if (m_hasElems.back()) {
2964 out += u"</"_sv % name % u'>';
2965 } else {
2966 out += u"/>"_sv;
2967 }
2968 m_hasElems.pop_back();
2969 break;
2970 }
2971
2974 if (!m_inContexts && !xml.isWhitespace()) {
2975 closePreviousOpenTag(m_data);
2976 writeXmlText(m_data, xml.text());
2977 }
2978 break;
2979
2980 default:;
2981 }
2982 }
2983
2984private:
2985 void closePreviousOpenTag(QString &out)
2986 {
2987 if (!m_hasElems.back()) {
2988 m_hasElems.back() = true;
2989 out += u'>';
2990 }
2991 }
2992
2993 /**
2994 * Write \p text escaping special characters.
2995 */
2996 static void writeXmlText(QString &out, QStringView text, bool escapeDQ = false)
2997 {
2998 for (const QChar &c : text) {
2999 if (c == u'<') {
3000 out += u"&lt;"_sv;
3001 } else if (c == u'&') {
3002 out += u"&amp;"_sv;
3003 } else if (escapeDQ && c == u'"') {
3004 out += u"&#34;"_sv;
3005 } else if (c == u'\t') {
3006 // non-space whitespace character in an attribute is remplaced with space...
3007 out += u"&#9;"_sv;
3008 } else {
3009 out += c;
3010 }
3011 }
3012 }
3013
3014 /**
3015 * Write attribut in \p out.
3016 * Booleans are converted to 0, 1 or ignored if this corresponds to the default value.
3017 * Values will be written with either double quotes or single quotes,
3018 * depending on which takes up the least space
3019 */
3020 static void writeXmlAttribute(QString &out, QStringView attrName, QStringView value, QStringView tagName)
3021 {
3022 enum class DefaultBool {
3023 // default value is false
3024 False,
3025 // default value is true
3026 True,
3027 // manipulate as a tribool whose attribute absence is equivalent to None
3028 None,
3029 // not used
3030 Ignored,
3031 // default value is false, but None for <keyword>
3032 FalseOrKeywordTag,
3033 // default value is true, but depends on another value for <keywords>
3034 TrueOrKeywordsTag,
3035 // default is false, but ignored in <context>
3036 DynamicAttr,
3037 };
3038 static const QHash<QStringView, DefaultBool> booleanAttrs({
3039 {u"fallthrough"_sv, DefaultBool::Ignored},
3040 {u"dynamic"_sv, DefaultBool::DynamicAttr},
3041 {u"hidden"_sv, DefaultBool::False},
3042 {u"indentationsensitive"_sv, DefaultBool::False},
3043 {u"noIndentationBasedFolding"_sv, DefaultBool::False},
3044 {u"lookAhead"_sv, DefaultBool::False},
3045 {u"firstNonSpace"_sv, DefaultBool::False},
3046 {u"insensitive"_sv, DefaultBool::FalseOrKeywordTag},
3047 {u"minimal"_sv, DefaultBool::False},
3048 {u"includeAttrib"_sv, DefaultBool::False},
3049 {u"italic"_sv, DefaultBool::None},
3050 {u"bold"_sv, DefaultBool::None},
3051 {u"underline"_sv, DefaultBool::None},
3052 {u"strikeOut"_sv, DefaultBool::None},
3053 {u"spellChecking"_sv, DefaultBool::True},
3054 {u"casesensitive"_sv, DefaultBool::TrueOrKeywordsTag},
3055 {u"ignored"_sv, DefaultBool::Ignored},
3056 });
3057
3058 auto it = booleanAttrs.find(attrName);
3059 // convert boolean value
3060 if (it != booleanAttrs.end()) {
3061 bool b = KSyntaxHighlighting::Xml::attrToBool(value);
3062 bool ignoreAttr = false;
3063 switch (*it) {
3064 case DefaultBool::Ignored:
3065 ignoreAttr = true;
3066 break;
3067 case DefaultBool::TrueOrKeywordsTag:
3068 ignoreAttr = (tagName == u"keywords"_sv) ? false : b;
3069 break;
3070 case DefaultBool::True:
3071 ignoreAttr = b;
3072 break;
3073 case DefaultBool::FalseOrKeywordTag:
3074 ignoreAttr = (tagName == u"keyword"_sv) ? false : !b;
3075 break;
3076 case DefaultBool::DynamicAttr:
3077 ignoreAttr = (tagName == u"context"_sv) || !b;
3078 break;
3079 case DefaultBool::False:
3080 ignoreAttr = !b;
3081 break;
3082 case DefaultBool::None:
3083 ignoreAttr = false;
3084 break;
3085 }
3086 if (!ignoreAttr) {
3087 out += u' ' % attrName % u"=\""_sv % (b ? u'1' : u'0') % u'"';
3088 }
3089 } else {
3090 const bool hasDQ = value.contains(u'"');
3091 // attribute in double quotes when the value does not contain " or contains " and '
3092 if (!hasDQ || value.contains(u'\'')) {
3093 out += u' ' % attrName % u"=\""_sv;
3094 writeXmlText(out, value, hasDQ);
3095 out += u'"';
3096 // attribute in single quotes because the value contains "
3097 } else {
3098 out += u' ' % attrName % u"='"_sv;
3099 writeXmlText(out, value);
3100 out += u'\'';
3101 }
3102 }
3103 }
3104
3105 /**
3106 * Sort attributes for better compression by rcc.
3107 */
3108 static void sortAttributes(QXmlStreamAttributes &attrs)
3109 {
3110 static const QHash<QStringView, int> priorityAttrs({
3111 // context and rule
3112 {u"attribute"_sv, 5},
3113
3114 // context and itemData
3115 {u"name"_sv, 4},
3116
3117 // context
3118 {u"noIndentationBasedFolding"_sv, 11},
3119 {u"lineEndContext"_sv, 9},
3120 {u"lineEmptyContext"_sv, 8},
3121 {u"fallthroughContext"_sv, 7},
3122
3123 // rule
3124 {u"lookAhead"_sv, 100},
3125 {u"firstNonSpace"_sv, 99},
3126 {u"dynamic"_sv, 98},
3127 {u"minimal"_sv, 97},
3128 {u"includeAttrib"_sv, 96},
3129 {u"insensitive"_sv, 95},
3130 {u"column"_sv, 50},
3131 {u"beginRegion"_sv, 40},
3132 {u"endRegion"_sv, 41},
3133 {u"weakDeliminator"_sv, 31},
3134 {u"additionalDeliminator"_sv, 30},
3135 {u"context"_sv, 20},
3136 {u"String"_sv, 2},
3137 {u"char"_sv, 2},
3138
3139 // itemData
3140 {u"strikeOut"_sv, 100},
3141 {u"underline"_sv, 99},
3142 {u"italic"_sv, 98},
3143 {u"bold"_sv, 97},
3144 {u"spellChecking"_sv, 96},
3145 {u"defStyleNum"_sv, 95},
3146 {u"color"_sv, 94},
3147 {u"backgroundColor"_sv, 93},
3148 {u"selBackgroundColor"_sv, 92},
3149 {u"selColor"_sv, 91},
3150 });
3151 std::sort(attrs.begin(), attrs.end(), [](auto &attr1, auto &attr2) {
3152 auto i1 = priorityAttrs.value(attr1.name());
3153 auto i2 = priorityAttrs.value(attr2.name());
3154 if (i1 != i2) {
3155 return i1 < i2;
3156 }
3157 return attr1.name() < attr2.name();
3158 });
3159 }
3160
3161 struct Context {
3162 QString name;
3163 QString data;
3164 };
3165 QString m_data = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE language>"_s;
3166 std::vector<Context> m_contexts;
3167 QHash<QString, int> m_contextRefs;
3168 QVarLengthArray<bool, 8> m_hasElems;
3169 QString m_kateVersion;
3170 bool m_inContexts = false;
3171};
3172
3173void printFileError(const QFile &file)
3174{
3175 qWarning() << "Failed to open" << file.fileName() << "-" << file.errorString();
3176}
3177
3178void printXmlError(const QString &fileName, const QXmlStreamReader &xml)
3179{
3180 qWarning() << fileName << "-" << xml.errorString() << "@ offset" << xml.characterOffset();
3181};
3182
3183QStringList readListing(const QString &fileName)
3184{
3185 QFile file(fileName);
3186 if (!file.open(QIODevice::ReadOnly)) {
3187 printFileError(file);
3188 return QStringList();
3189 }
3190
3191 QXmlStreamReader xml(&file);
3192 QStringList listing;
3193 while (!xml.atEnd()) {
3194 xml.readNext();
3195
3196 // add only .xml files, no .json or stuff
3197 if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) {
3198 listing.append(xml.text().toString());
3199 }
3200 }
3201
3202 if (xml.hasError()) {
3203 printXmlError(fileName, xml);
3204 listing.clear();
3205 }
3206
3207 return listing;
3208}
3209
3210/**
3211 * check if the "extensions" attribute have valid wildcards
3212 * @param extensions extensions string to check
3213 * @return valid?
3214 */
3215bool checkExtensions(QStringView extensions)
3216{
3217 // get list of extensions
3218 const QList<QStringView> extensionParts = extensions.split(u';', Qt::SkipEmptyParts);
3219
3220 // ok if empty
3221 if (extensionParts.isEmpty()) {
3222 return true;
3223 }
3224
3225 // check that only valid wildcard things are inside the parts
3226 for (const auto &extension : extensionParts) {
3227 for (const auto c : extension) {
3228 // eat normal things
3229 if (c.isDigit() || c.isLetter()) {
3230 continue;
3231 }
3232
3233 // allow some special characters
3234 if (c == u'.' || c == u'-' || c == u'_' || c == u'+') {
3235 continue;
3236 }
3237
3238 // only allowed wildcard things: '?' and '*'
3239 if (c == u'?' || c == u'*') {
3240 continue;
3241 }
3242
3243 qWarning() << "invalid character" << c << "seen in extensions wildcard";
3244 return false;
3245 }
3246 }
3247
3248 // all checks passed
3249 return true;
3250}
3251
3252struct CompressedFile {
3253 QString fileName;
3254 QString xmlData;
3255};
3256
3257}
3258
3259int main(int argc, char *argv[])
3260{
3261 // get app instance
3262 QCoreApplication app(argc, argv);
3263
3264 // ensure enough arguments are passed
3265 if (app.arguments().size() < 4) {
3266 return 1;
3267 }
3268
3269#ifdef HAS_XERCESC
3270 // care for proper init and cleanup
3271 XMLPlatformUtils::Initialize();
3272 auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate);
3273
3274 /*
3275 * parse XSD first time and cache it
3276 */
3277 XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager);
3278
3279 // create parser for the XSD
3280 CustomXMLValidator parser(&xsd);
3281
3282 // load grammar into the pool, on error just abort
3283 const auto xsdFile = app.arguments().at(2);
3284 if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || parser.eh.failed()) {
3285 qWarning("Failed to parse XSD %s: %s", qPrintable(xsdFile), qPrintable(parser.messages));
3286 return 2;
3287 }
3288
3289 // lock the pool, no later modifications wanted!
3290 xsd.lockPool();
3291#endif
3292
3293 const QString hlFilenamesListing = app.arguments().value(3);
3294 if (hlFilenamesListing.isEmpty()) {
3295 return 1;
3296 }
3297
3298 QStringList hlFilenames = readListing(hlFilenamesListing);
3299 if (hlFilenames.isEmpty()) {
3300 qWarning("Failed to read %s", qPrintable(hlFilenamesListing));
3301 return 3;
3302 }
3303
3304 // text attributes
3305 const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("alternativeNames") << QStringLiteral("section")
3306 << QStringLiteral("mimetype") << QStringLiteral("extensions") << QStringLiteral("style")
3307 << QStringLiteral("author") << QStringLiteral("license") << QStringLiteral("indenter");
3308
3309 // index all given highlightings
3310 HlFilesChecker filesChecker;
3311 QVariantMap hls;
3312 int anyError = 0;
3313 std::vector<CompressedFile> compressedFiles;
3314 for (const QString &hlFilename : std::as_const(hlFilenames)) {
3315 QFile hlFile(hlFilename);
3316 if (!hlFile.open(QIODevice::ReadOnly)) {
3317 printFileError(hlFile);
3318 anyError = 3;
3319 continue;
3320 }
3321
3322#ifdef HAS_XERCESC
3323 // create parser
3324 CustomXMLValidator parser(&xsd);
3325
3326 // parse the XML file
3327 parser.parse((const char16_t *)hlFile.fileName().utf16());
3328
3329 // report issues
3330 if (parser.eh.failed()) {
3331 qWarning("Failed to validate XML %s: %s", qPrintable(hlFile.fileName()), qPrintable(parser.messages));
3332 anyError = 4;
3333 continue;
3334 }
3335#endif
3336
3337 // read the needed attributes from toplevel language tag
3338 hlFile.reset();
3339 QXmlStreamReader xml(&hlFile);
3340 if (xml.readNextStartElement()) {
3341 if (xml.name() != QLatin1String("language")) {
3342 anyError = 5;
3343 continue;
3344 }
3345 } else {
3346 anyError = 6;
3347 continue;
3348 }
3349
3350 // map to store hl info
3351 QVariantMap hl;
3352
3353 // transfer text attributes
3354 for (const QString &attribute : std::as_const(textAttributes)) {
3355 hl[attribute] = xml.attributes().value(attribute).toString();
3356 }
3357
3358 // check if extensions have the right format
3359 if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) {
3360 qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString();
3361 anyError = 23;
3362 }
3363
3364 // numerical attributes
3365 hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt();
3366 hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt();
3367
3368 // add boolean one
3369 hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden")));
3370
3371 // keep some strings as UTF-8 for faster translations
3372 hl[QStringLiteral("nameUtf8")] = hl[QStringLiteral("name")].toString().toUtf8();
3373 hl[QStringLiteral("sectionUtf8")] = hl[QStringLiteral("section")].toString().toUtf8();
3374
3375 // remember hl
3376 hls[QFileInfo(hlFile).fileName()] = hl;
3377
3378 const QStringView kateversion = xml.attributes().value(QStringLiteral("kateversion"));
3379 const QString hlName = hl[QStringLiteral("name")].toString();
3380 const QString hlAlternativeNames = hl[QStringLiteral("alternativeNames")].toString();
3381
3382 filesChecker.setDefinition(kateversion, hlFilename, hlName, hlAlternativeNames.split(u';', Qt::SkipEmptyParts));
3383
3384 // As the compressor removes "fallthrough" attribute which is required with
3385 // "fallthroughContext" before the 5.62 version, the minimum version is
3386 // automatically increased
3387 HlCompressor compressor((filesChecker.currentVersion() < KateVersion{5, 62}) ? u"5.62"_s : kateversion.toString());
3388 compressor.processElement(xml);
3389
3390 // scan for broken regex or keywords with spaces
3391 while (!xml.atEnd()) {
3392 xml.readNext();
3393 filesChecker.processElement(xml);
3394 compressor.processElement(xml);
3395 }
3396
3397 if (xml.hasError()) {
3398 anyError = 33;
3399 printXmlError(hlFilename, xml);
3400 }
3401
3402 compressedFiles.emplace_back(CompressedFile{
3403 QFileInfo(hlFilename).fileName(),
3404 compressor.compressedXML(),
3405 });
3406 }
3407
3408 filesChecker.resolveContexts();
3409
3410 if (!filesChecker.check()) {
3411 anyError = 7;
3412 }
3413
3414 // bail out if any problem was seen
3415 if (anyError) {
3416 return anyError;
3417 }
3418
3419 // check compressed file
3420 HlFilesChecker filesChecker2;
3421 const QString compressedDir = app.arguments().at(4) + u"/"_sv;
3422 for (const auto &compressedFile : std::as_const(compressedFiles)) {
3423 const auto outFileName = compressedDir + compressedFile.fileName;
3424 auto utf8Data = compressedFile.xmlData.toUtf8();
3425
3426#ifdef HAS_XERCESC
3427 // create parser
3428 CustomXMLValidator parser(&xsd);
3429
3430 auto utf8Filename = outFileName.toUtf8();
3431 utf8Filename.append('\0');
3432 // parse the XML file
3433 MemBufInputSource membuf(reinterpret_cast<const XMLByte *>(utf8Data.constData()), utf8Data.size(), utf8Filename.data());
3434
3435 // report issues
3436 if (parser.eh.failed()) {
3437 qWarning("Failed to validate XML %s: %s", qPrintable(outFileName), qPrintable(parser.messages));
3438 return 8;
3439 }
3440#endif
3441
3442 QBuffer buffer(&utf8Data);
3443 buffer.open(QBuffer::ReadOnly);
3444 QXmlStreamReader xml(&buffer);
3445 // scan for broken file
3446 while (!xml.atEnd()) {
3447 if (xml.readNext() == QXmlStreamReader::TokenType::StartElement && xml.name() == u"language"_sv) {
3448 const auto attrs = xml.attributes();
3449 const auto version = attrs.value(u"kateversion"_sv);
3450 const QString hlName = attrs.value(u"name"_sv).toString();
3451 const QString hlAlternativeNames = attrs.value(u"alternativeNames"_sv).toString();
3452 filesChecker2.setDefinition(version, outFileName, hlName, hlAlternativeNames.split(u';', Qt::SkipEmptyParts));
3453 }
3454 filesChecker2.processElement(xml);
3455 }
3456
3457 if (xml.hasError()) {
3458 printXmlError(outFileName, xml);
3459 return 9;
3460 }
3461
3462 // create outfile, after all has worked!
3463 QFile outFile(outFileName);
3464 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
3465 return 10;
3466 }
3467 outFile.write(utf8Data);
3468 }
3469
3470 filesChecker2.resolveContexts();
3471
3472 // bail out if any problem was seen
3473 if (!filesChecker2.check()) {
3474 return 11;
3475 }
3476
3477 // create outfile, after all has worked!
3478 QFile outFile(app.arguments().at(1));
3479 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
3480 return 12;
3481 }
3482
3483 // write out json
3484 outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor());
3485
3486 // be done
3487 return 0;
3488}
AKONADI_MIME_EXPORT const char Ignored[]
Type type(const QSqlDatabase &db)
char * toString(const EngineQuery &query)
QAction * end(const QObject *recvr, const char *slot, QObject *parent)
KIOCORE_EXPORT bool operator==(const UDSEntry &entry, const UDSEntry &other)
QString name(const QVariant &location)
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
KGuiItem add()
KGuiItem find()
const QList< QKeySequence > & next()
const QList< QKeySequence > & replace()
KTEXTEDITOR_EXPORT size_t qHash(KTextEditor::Cursor cursor, size_t seed=0) noexcept
bool operator<(const PosRange< Trait > &l, const PosRange< Trait > &r)
NETWORKMANAGERQT_EXPORT QString version()
QCborValue fromVariant(const QVariant &variant)
bool isDigit(char32_t ucs4)
bool isLetter(char32_t ucs4)
char32_t toLower(char32_t ucs4)
char32_t toUpper(char32_t ucs4)
char16_t & unicode()
virtual QString fileName() const const override
bool open(FILE *fh, OpenMode mode, FileHandleFlags handleFlags)
QString fileName() const const
iterator find(const Key &key)
QString errorString() const const
void append(QList< T > &&value)
iterator begin()
void clear()
iterator end()
bool isEmpty() const const
void push_back(parameter_type value)
void reserve(qsizetype size)
qsizetype size() const const
iterator end()
iterator find(const Key &key)
iterator insert(const Key &key, const T &value)
QString errorString() const const
bool isValid() const const
QString pattern() const const
qsizetype patternErrorOffset() const const
void clear()
bool contains(const QSet< T > &other) const const
iterator insert(const T &value)
qsizetype size() const const
QString & append(QChar ch)
const QChar at(qsizetype position) const const
QChar & back()
void chop(qsizetype n)
QString fromUtf16(const char16_t *unicode, qsizetype size)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
QString & insert(qsizetype position, QChar ch)
bool isEmpty() const const
QString number(double n, char format, int precision)
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
void reserve(qsizetype size)
qsizetype size() const const
QString sliced(qsizetype pos) const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QByteArray toUtf8() const const
bool contains(QChar c, Qt::CaseSensitivity cs) const const
const_pointer data() const const
QChar first() const const
qsizetype indexOf(QChar c, qsizetype from, Qt::CaseSensitivity cs) const const
bool isNull() const const
qsizetype size() const const
QStringView sliced(qsizetype pos) const const
QList< QStringView > split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar ch) const const
int toInt(bool *ok, int base) const const
QString toString() const const
CaseInsensitive
SkipEmptyParts
QTextStream & endl(QTextStream &stream)
QStringView name() const const
QStringView value() const const
QStringView value(QAnyStringView namespaceUri, QAnyStringView name) const const
bool atEnd() const const
QXmlStreamAttributes attributes() const const
qint64 characterOffset() const const
QString errorString() const const
bool hasError() const const
bool isCharacters() const const
bool isWhitespace() const const
qint64 lineNumber() const const
QStringView name() const const
TokenType readNext()
bool readNextStartElement()
QStringView text() const const
TokenType tokenType() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Mar 28 2025 11:51:45 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.