KSyntaxHighlighting

katehighlightingindexer.cpp
1/*
2 SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org>
3 SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com>
4
5 SPDX-License-Identifier: MIT
6*/
7
8#include <QBuffer>
9#include <QCborValue>
10#include <QCoreApplication>
11#include <QDebug>
12#include <QFile>
13#include <QFileInfo>
14#include <QMutableMapIterator>
15#include <QRegularExpression>
16#include <QScopeGuard>
17#include <QString>
18#include <QVariant>
19#include <QXmlStreamReader>
20
21#ifdef HAS_XERCESC
22
23#include <xercesc/framework/MemBufInputSource.hpp>
24#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
25
26#include <xercesc/parsers/SAX2XMLReaderImpl.hpp>
27
28#include <xercesc/sax/ErrorHandler.hpp>
29#include <xercesc/sax/SAXParseException.hpp>
30
31#include <xercesc/util/PlatformUtils.hpp>
32#include <xercesc/util/XMLString.hpp>
33#include <xercesc/util/XMLUni.hpp>
34
35#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
36#include <xercesc/validators/common/Grammar.hpp>
37
38using namespace xercesc;
39
40/*
41 * Ideas taken from:
42 *
43 * author : Boris Kolpackov <boris@codesynthesis.com>
44 * copyright : not copyrighted - public domain
45 *
46 * This program uses Xerces-C++ SAX2 parser to load a set of schema files
47 * and then to validate a set of XML documents against these schemas. To
48 * build this program you will need Xerces-C++ 3.0.0 or later. For more
49 * information, see:
50 *
51 * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/
52 */
53
54/**
55 * Error handler object used during xml schema validation.
56 */
57class CustomErrorHandler : public ErrorHandler
58{
59public:
60 /**
61 * Constructor
62 * @param messages Pointer to the error message string to fill.
63 */
64 CustomErrorHandler(QString *messages)
65 : m_messages(messages)
66 {
67 }
68
69 /**
70 * Check global success/fail state.
71 * @return True if there was a failure, false otherwise.
72 */
73 bool failed() const
74 {
75 return m_failed;
76 }
77
78private:
79 /**
80 * Severity classes for error messages.
81 */
82 enum severity { s_warning, s_error, s_fatal };
83
84 /**
85 * Wrapper for warning exceptions.
86 * @param e Exception to handle.
87 */
88 void warning(const SAXParseException &e) override
89 {
90 m_failed = true; // be strict, warnings are evil, too!
91 handle(e, s_warning);
92 }
93
94 /**
95 * Wrapper for error exceptions.
96 * @param e Exception to handle.
97 */
98 void error(const SAXParseException &e) override
99 {
100 m_failed = true;
101 handle(e, s_error);
102 }
103
104 /**
105 * Wrapper for fatal error exceptions.
106 * @param e Exception to handle.
107 */
108 void fatalError(const SAXParseException &e) override
109 {
110 m_failed = true;
111 handle(e, s_fatal);
112 }
113
114 /**
115 * Reset the error status to "no error".
116 */
117 void resetErrors() override
118 {
119 m_failed = false;
120 }
121
122 /**
123 * Generic handler for error/warning/fatal error message exceptions.
124 * @param e Exception to handle.
125 * @param s Enum value encoding the message severtity.
126 */
127 void handle(const SAXParseException &e, severity s)
128 {
129 // get id to print
130 const XMLCh *xid(e.getPublicId());
131 if (!xid)
132 xid = e.getSystemId();
133
134 m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: ")
135 << QString::fromUtf16(e.getMessage()) << Qt::endl;
136 }
137
138private:
139 /**
140 * Storage for created error messages in this handler.
141 */
142 QTextStream m_messages;
143
144 /**
145 * Global error state. True if there was an error, false otherwise.
146 */
147 bool m_failed = false;
148};
149
150class CustomXMLValidator : public SAX2XMLReaderImpl
151{
152public:
153 QString messages;
154 CustomErrorHandler eh{&messages};
155
156 CustomXMLValidator(XMLGrammarPool *xsd)
157 : SAX2XMLReaderImpl(XMLPlatformUtils::fgMemoryManager, xsd)
158 {
159 // Commonly useful configuration.
160 //
161 setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
162 setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
163 setFeature(XMLUni::fgSAX2CoreValidation, true);
164
165 // Enable validation.
166 //
167 setFeature(XMLUni::fgXercesSchema, true);
168 setFeature(XMLUni::fgXercesSchemaFullChecking, true);
169 setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
170
171 // Use the loaded grammar during parsing.
172 //
173 setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true);
174
175 // Don't load schemas from any other source (e.g., from XML document's
176 // xsi:schemaLocation attributes).
177 //
178 setFeature(XMLUni::fgXercesLoadSchema, false);
179
180 // Xerces-C++ 3.1.0 is the first version with working multi import
181 // support.
182 //
183 setFeature(XMLUni::fgXercesHandleMultipleImports, true);
184
185 setErrorHandler(&eh);
186 }
187};
188
189#endif
190
191#include "../lib/worddelimiters_p.h"
192#include "../lib/xml_p.h"
193
194#include <array>
195
196using KSyntaxHighlighting::WordDelimiters;
197using KSyntaxHighlighting::Xml::attrToBool;
198
199using namespace Qt::Literals::StringLiterals;
200
201#if QT_VERSION < QT_VERSION_CHECK(6, 10, 0)
202static constexpr QStringView operator""_sv(const char16_t *s, std::size_t n)
203{
204 return QStringView(s, s + n);
205}
206#endif
207
208namespace
209{
210
211struct KateVersion {
212 int majorRevision;
213 int minorRevision;
214
215 KateVersion(int majorRevision = 0, int minorRevision = 0)
216 : majorRevision(majorRevision)
217 , minorRevision(minorRevision)
218 {
219 }
220
221 bool operator<(const KateVersion &version) const
222 {
223 return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision);
224 }
225};
226
227class HlFilesChecker
228{
229public:
230 void setDefinition(QStringView verStr, const QString &filename, const QString &name, const QStringList &alternativeNames)
231 {
232 m_currentDefinition = &*m_definitions.insert(name, Definition{});
233 m_currentDefinition->languageName = name;
234 m_currentDefinition->filename = filename;
235 m_currentDefinition->kateVersionStr = verStr.toString();
236 m_currentKeywords = nullptr;
237 m_currentContext = nullptr;
238
239 const auto idx = verStr.indexOf(u'.');
240 if (idx <= 0) {
241 qWarning() << filename << "invalid kateversion" << verStr;
242 m_success = false;
243 } else {
244 m_currentDefinition->kateVersion = {verStr.sliced(0, idx).toInt(), verStr.sliced(idx + 1).toInt()};
245 }
246
247 auto checkName = [this, &filename](char const *nameType, const QString &name) {
248 auto it = m_names.find(name);
249 if (it != m_names.end()) {
250 qWarning() << filename << "duplicate" << nameType << "with" << it.value();
251 m_success = false;
252 } else {
253 m_names.insert(name, filename);
254 }
255 };
256 checkName("name", name);
257 for (const auto &alternativeName : alternativeNames) {
258 checkName("alternative name", alternativeName);
259 }
260 }
261
262 KateVersion currentVersion() const
263 {
264 return m_currentDefinition->kateVersion;
265 }
266
267 void processElement(const QXmlStreamReader &xml)
268 {
269 switch (xml.tokenType()) {
271 if (m_currentContext) {
272 m_currentContext->rules.push_back(Context::Rule{});
273 auto &rule = m_currentContext->rules.back();
274 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success;
275 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
276 } else if (m_currentKeywords) {
277 m_inKeywordItem = true;
278 } else if (xml.name() == u"context"_sv) {
279 processContextElement(xml);
280 } else if (xml.name() == u"list"_sv) {
281 processListElement(xml);
282 } else if (xml.name() == u"keywords"_sv) {
283 m_success = m_currentDefinition->parseKeywords(xml) && m_success;
284 } else if (xml.name() == u"emptyLine"_sv) {
285 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success;
286 } else if (xml.name() == u"itemData"_sv) {
287 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success;
288 }
289 break;
290
292 if (m_currentContext && xml.name() == u"context"_sv) {
293 m_currentContext = nullptr;
294 } else if (m_currentKeywords && xml.name() == u"list"_sv) {
295 m_currentKeywords = nullptr;
296 } else if (m_currentKeywords) {
297 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml, m_textContent) && m_success;
298 m_textContent.clear();
299 m_inKeywordItem = false;
300 }
301 break;
302
305 if (m_inKeywordItem) {
306 m_textContent += xml.text();
307 }
308 break;
309
310 default:;
311 }
312 }
313
314 //! Resolve context attribute and include tag
315 void resolveContexts()
316 {
317 QMutableMapIterator<QString, Definition> def(m_definitions);
318 while (def.hasNext()) {
319 def.next();
320 auto &definition = def.value();
321 auto &contexts = definition.contexts;
322
323 if (contexts.isEmpty()) {
324 qWarning() << definition.filename << "has no context";
325 m_success = false;
326 continue;
327 }
328
329 auto markAsUsedContext = [](ContextName &contextName) {
330 if (!contextName.stay && contextName.context) {
331 contextName.context->isOnlyIncluded = false;
332 }
333 };
334
335 QMutableMapIterator<QString, Context> contextIt(contexts);
336 while (contextIt.hasNext()) {
337 contextIt.next();
338 auto &context = contextIt.value();
339 resolveContextName(definition, context, context.lineEndContext, context.line);
340 resolveContextName(definition, context, context.lineEmptyContext, context.line);
341 resolveContextName(definition, context, context.fallthroughContext, context.line);
342 markAsUsedContext(context.lineEndContext);
343 markAsUsedContext(context.lineEmptyContext);
344 markAsUsedContext(context.fallthroughContext);
345 for (auto &rule : context.rules) {
346 rule.parentContext = &context;
347 resolveContextName(definition, context, rule.context, rule.line);
348 if (rule.type != Context::Rule::Type::IncludeRules) {
349 markAsUsedContext(rule.context);
350 } else if (rule.includeAttrib == XmlBool::True && rule.context.context) {
351 rule.context.context->referencedWithIncludeAttrib = true;
352 }
353 }
354 }
355
356 auto *firstContext = &*definition.contexts.find(definition.firstContextName);
357 firstContext->isOnlyIncluded = false;
358 definition.firstContext = firstContext;
359 }
360
361 resolveIncludeRules();
362 }
363
364 bool check() const
365 {
366 bool success = m_success;
367
368 const auto usedContexts = extractUsedContexts();
369
370 QMap<const Definition *, const Definition *> maxVersionByDefinitions;
371 QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules;
372
373 QMapIterator<QString, Definition> def(m_definitions);
374 while (def.hasNext()) {
375 def.next();
376 const auto &definition = def.value();
377 const auto &filename = definition.filename;
378
379 auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
380 if (maxDef != &definition) {
381 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr
382 << ". Please, increase kateversion.";
383 success = false;
384 }
385
386 QSet<ItemDatas::Style> usedAttributeNames;
387 QSet<ItemDatas::Style> ignoredAttributeNames;
388 success = checkKeywordsList(definition) && success;
389 success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
390
391 // search for non-existing itemDatas.
392 const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
393 for (const auto &styleName : invalidNames) {
394 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name;
395 success = false;
396 }
397
398 // search for existing itemDatas, but unusable.
399 const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
400 for (const auto &styleName : ignoredNames) {
401 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name
402 << "is never used. All uses are with lookAhead=true or <IncludeRules/>";
403 success = false;
404 }
405
406 // search for unused itemDatas.
407 auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
408 unusedNames -= ignoredNames;
409 for (const auto &styleName : std::as_const(unusedNames)) {
410 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name;
411 success = false;
412 }
413 }
414
415 QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
416 while (unreachableIncludedRuleIt.hasNext()) {
417 unreachableIncludedRuleIt.next();
418 IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
419 if (unreachableRulesBy.alwaysUnreachable) {
420 auto *rule = unreachableIncludedRuleIt.key();
421
422 if (!rule->parentContext->isOnlyIncluded) {
423 continue;
424 }
425
426 // remove duplicates rules
427 QSet<const Context::Rule *> rules;
428 auto &unreachableBy = unreachableRulesBy.unreachableBy;
429 unreachableBy.erase(std::remove_if(unreachableBy.begin(),
430 unreachableBy.end(),
431 [&](const RuleAndInclude &ruleAndInclude) {
432 if (rules.contains(ruleAndInclude.rule)) {
433 return true;
434 }
435 rules.insert(ruleAndInclude.rule);
436 return false;
437 }),
438 unreachableBy.end());
439
440 QString message;
441 message.reserve(128);
442 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
443 message += u"line "_sv;
444 message += QString::number(ruleAndInclude.rule->line);
445 message += u" ["_sv;
446 message += ruleAndInclude.rule->parentContext->name;
447 if (rule->filename != ruleAndInclude.rule->filename) {
448 message += u" ("_sv;
449 message += ruleAndInclude.rule->filename;
450 message += u')';
451 }
452 if (ruleAndInclude.includeRules) {
453 message += u" via line "_sv;
454 message += QString::number(ruleAndInclude.includeRules->line);
455 }
456 message += u"], "_sv;
457 }
458 message.chop(2);
459
460 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message;
461 success = false;
462 }
463 }
464
465 return success;
466 }
467
468private:
469 enum class XmlBool {
471 False,
472 True,
473 };
474
475 struct Context;
476
477 struct ContextName {
478 QString name;
479 int popCount = 0;
480 bool stay = false;
481
482 Context *context = nullptr;
483 };
484
485 struct Parser {
486 const QString &filename;
487 const QXmlStreamReader &xml;
488 const QXmlStreamAttribute &attr;
489 bool success;
490
491 //! Read a string type attribute, \c success = \c false when \p str is not empty
492 //! \return \c true when attr.name() == attrName, otherwise false
493 bool extractString(QString &str, QStringView attrName)
494 {
495 if (attr.name() != attrName) {
496 return false;
497 }
498
499 str = attr.value().toString();
500 if (str.isEmpty()) {
501 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty";
502 success = false;
503 }
504
505 return true;
506 }
507
508 //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified.
509 //! \return \c true when attr.name() == attrName, otherwise false
510 bool extractXmlBool(XmlBool &xmlBool, QStringView attrName)
511 {
512 if (attr.name() != attrName) {
513 return false;
514 }
515
516 xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False;
517
518 return true;
519 }
520
521 //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0
522 //! \return \c true when attr.name() == attrName, otherwise false
523 bool extractPositive(int &positive, QStringView attrName)
524 {
525 if (attr.name() != attrName) {
526 return false;
527 }
528
529 bool ok = true;
530 positive = attr.value().toInt(&ok);
531
532 if (!ok || positive < 0) {
533 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value();
534 success = false;
535 }
536
537 return true;
538 }
539
540 //! Read a color, \c success = \c false when \p color is already greater than or equal to 0
541 //! \return \c true when attr.name() == attrName, otherwise false
542 bool checkColor(QStringView attrName)
543 {
544 if (attr.name() != attrName) {
545 return false;
546 }
547
548 const auto value = attr.value();
549 if (value.isEmpty() /*|| QColor(value).isValid()*/) {
550 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value;
551 success = false;
552 }
553
554 return true;
555 }
556
557 //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char
558 //! \return \c true when attr.name() == attrName, otherwise false
559 bool extractChar(QChar &c, QStringView attrName)
560 {
561 if (attr.name() != attrName) {
562 return false;
563 }
564
565 if (attr.value().size() == 1) {
566 c = attr.value()[0];
567 } else {
568 c = u'_';
569 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value();
570 success = false;
571 }
572
573 return true;
574 }
575
576 //! \return parsing status when \p isExtracted is \c true, otherwise \c false
577 bool checkIfExtracted(bool isExtracted)
578 {
579 if (isExtracted) {
580 return success;
581 }
582
583 qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name();
584 return false;
585 }
586 };
587
588 struct Keywords {
589 struct Items {
590 struct Item {
591 QString content;
592 int line;
593
594 friend size_t qHash(const Item &item, size_t seed = 0)
595 {
596 return qHash(item.content, seed);
597 }
598
599 friend bool operator==(const Item &item0, const Item &item1)
600 {
601 return item0.content == item1.content;
602 }
603 };
604
605 QList<Item> keywords;
606 QSet<Item> includes;
607
608 bool parseElement(const QString &filename, const QXmlStreamReader &xml, const QString &content)
609 {
610 bool success = true;
611
612 const int line = xml.lineNumber();
613
614 if (content.isEmpty()) {
615 qWarning() << filename << "line" << line << "is empty:" << xml.name();
616 success = false;
617 }
618
619 if (xml.name() == u"include"_sv) {
620 includes.insert({content, line});
621 } else if (xml.name() == u"item"_sv) {
622 keywords.append({content, line});
623 } else {
624 qWarning() << filename << "line" << line << "invalid element:" << xml.name();
625 success = false;
626 }
627
628 return success;
629 }
630 };
631
632 QString name;
633 Items items;
634 int line;
635
636 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
637 {
638 line = xml.lineNumber();
639
640 bool success = true;
641 const auto attrs = xml.attributes();
642 for (const auto &attr : attrs) {
643 Parser parser{filename, xml, attr, success};
644
645 const bool isExtracted = parser.extractString(name, u"name"_sv);
646
647 success = parser.checkIfExtracted(isExtracted);
648 }
649 return success;
650 }
651 };
652
653 struct Context {
654 struct Rule {
655 enum class Type {
656 Unknown,
657 AnyChar,
658 Detect2Chars,
659 DetectChar,
660 DetectIdentifier,
661 DetectSpaces,
662 Float,
663 HlCChar,
664 HlCHex,
665 HlCOct,
666 HlCStringChar,
667 IncludeRules,
668 Int,
669 LineContinue,
670 RangeDetect,
671 RegExpr,
672 StringDetect,
673 WordDetect,
674 keyword,
675 };
676
677 Type type{};
678
679 bool isDotRegex = false;
680 int line = -1;
681
682 // commonAttributes
683 QString attribute;
684 ContextName context;
685 QString beginRegion;
686 QString endRegion;
687 int column = -1;
688 XmlBool lookAhead{};
689 XmlBool firstNonSpace{};
690
691 // StringDetect, WordDetect, keyword
692 XmlBool insensitive{};
693
694 // DetectChar, StringDetect, RegExpr, keyword
695 XmlBool dynamic{};
696
697 // Regex
698 XmlBool minimal{};
699
700 // IncludeRule
701 XmlBool includeAttrib{};
702
703 // DetectChar, Detect2Chars, LineContinue, RangeDetect
704 QChar char0;
705 // Detect2Chars, RangeDetect
706 QChar char1;
707
708 // AnyChar, StringDetect, RegExpr, WordDetect, keyword
709 QString string;
710 // RegExpr without .* as suffix
711 QString sanitizedString;
712
713 // Float, HlCHex, HlCOct, Int, WordDetect, keyword
714 QString additionalDeliminator;
715 QString weakDeliminator;
716
717 // rules included by IncludeRules (without IncludeRule)
718 QList<const Rule *> includedRules;
719
720 // IncludeRules included by IncludeRules
721 QSet<const Rule *> includedIncludeRules;
722
723 Context const *parentContext = nullptr;
724
725 QString filename;
726
727 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
728 {
729 this->filename = filename;
730 line = xml.lineNumber();
731
732 using Pair = QPair<QStringView, Type>;
733 static const auto pairs = {
734 Pair{u"AnyChar"_sv, Type::AnyChar},
735 Pair{u"Detect2Chars"_sv, Type::Detect2Chars},
736 Pair{u"DetectChar"_sv, Type::DetectChar},
737 Pair{u"DetectIdentifier"_sv, Type::DetectIdentifier},
738 Pair{u"DetectSpaces"_sv, Type::DetectSpaces},
739 Pair{u"Float"_sv, Type::Float},
740 Pair{u"HlCChar"_sv, Type::HlCChar},
741 Pair{u"HlCHex"_sv, Type::HlCHex},
742 Pair{u"HlCOct"_sv, Type::HlCOct},
743 Pair{u"HlCStringChar"_sv, Type::HlCStringChar},
744 Pair{u"IncludeRules"_sv, Type::IncludeRules},
745 Pair{u"Int"_sv, Type::Int},
746 Pair{u"LineContinue"_sv, Type::LineContinue},
747 Pair{u"RangeDetect"_sv, Type::RangeDetect},
748 Pair{u"RegExpr"_sv, Type::RegExpr},
749 Pair{u"StringDetect"_sv, Type::StringDetect},
750 Pair{u"WordDetect"_sv, Type::WordDetect},
751 Pair{u"keyword", Type::keyword},
752 };
753
754 for (auto pair : pairs) {
755 if (xml.name() == pair.first) {
756 type = pair.second;
757 bool success = parseAttributes(filename, xml);
758 success = checkMandoryAttributes(filename, xml) && success;
759 if (success && type == Type::RegExpr) {
760 // ., (.) followed by *, +, {1} or nothing
761 static const QRegularExpression isDot(QStringLiteral(R"(^\‍(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
762 // remove "(?:" and ")"
763 static const QRegularExpression removeParentheses(QStringLiteral(R"(\‍((?:\?:)?|\))"));
764 // remove parentheses on a copy of string
765 auto reg = QString(string).replace(removeParentheses, QString());
766 isDotRegex = reg.contains(isDot);
767
768 // Remove .* and .*$ suffix.
769 static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$"));
770 sanitizedString = string;
771 sanitizedString.replace(allSuffix, QString());
772 // string is a catch-all, do not sanitize
773 if (sanitizedString.isEmpty() || sanitizedString == u"^"_sv) {
774 sanitizedString = string;
775 }
776 }
777 return success;
778 }
779 }
780
781 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name();
782 return false;
783 }
784
785 private:
786 bool parseAttributes(const QString &filename, const QXmlStreamReader &xml)
787 {
788 bool success = true;
789
790 const auto attrs = xml.attributes();
791 for (const auto &attr : attrs) {
792 Parser parser{filename, xml, attr, success};
793
794 // clang-format off
795 const bool isExtracted
796 = parser.extractString(attribute, u"attribute"_sv)
797 || parser.extractString(context.name, u"context"_sv)
798 || parser.extractXmlBool(lookAhead, u"lookAhead"_sv)
799 || parser.extractXmlBool(firstNonSpace, u"firstNonSpace"_sv)
800 || parser.extractString(beginRegion, u"beginRegion"_sv)
801 || parser.extractString(endRegion, u"endRegion"_sv)
802 || parser.extractPositive(column, u"column"_sv)
803 || ((type == Type::RegExpr
804 || type == Type::StringDetect
805 || type == Type::WordDetect
806 || type == Type::keyword
807 ) && parser.extractXmlBool(insensitive, u"insensitive"_sv))
808 || ((type == Type::DetectChar
809 || type == Type::RegExpr
810 || type == Type::StringDetect
811 || type == Type::keyword
812 ) && parser.extractXmlBool(dynamic, u"dynamic"_sv))
813 || ((type == Type::RegExpr)
814 && parser.extractXmlBool(minimal, u"minimal"_sv))
815 || ((type == Type::DetectChar
816 || type == Type::Detect2Chars
817 || type == Type::LineContinue
818 || type == Type::RangeDetect
819 ) && parser.extractChar(char0, u"char"_sv))
820 || ((type == Type::Detect2Chars
821 || type == Type::RangeDetect
822 ) && parser.extractChar(char1, u"char1"_sv))
823 || ((type == Type::AnyChar
824 || type == Type::RegExpr
825 || type == Type::StringDetect
826 || type == Type::WordDetect
827 || type == Type::keyword
828 ) && parser.extractString(string, u"String"_sv))
829 || ((type == Type::IncludeRules)
830 && parser.extractXmlBool(includeAttrib, u"includeAttrib"_sv))
831 || ((type == Type::Float
832 || type == Type::HlCHex
833 || type == Type::HlCOct
834 || type == Type::Int
835 || type == Type::keyword
836 || type == Type::WordDetect
837 ) && (parser.extractString(additionalDeliminator, u"additionalDeliminator"_sv)
838 || parser.extractString(weakDeliminator, u"weakDeliminator"_sv)))
839 ;
840 // clang-format on
841
842 success = parser.checkIfExtracted(isExtracted);
843 }
844
845 if (type == Type::LineContinue && char0 == u'\0') {
846 char0 = u'\\';
847 }
848
849 return success;
850 }
851
852 bool checkMandoryAttributes(const QString &filename, const QXmlStreamReader &xml)
853 {
854 QString missingAttr;
855
856 switch (type) {
857 case Type::Unknown:
858 return false;
859
860 case Type::AnyChar:
861 case Type::RegExpr:
862 case Type::StringDetect:
863 case Type::WordDetect:
864 case Type::keyword:
865 missingAttr = string.isEmpty() ? QStringLiteral("String") : QString();
866 break;
867
868 case Type::DetectChar:
869 missingAttr = !char0.unicode() ? QStringLiteral("char") : QString();
870 break;
871
872 case Type::Detect2Chars:
873 case Type::RangeDetect:
874 missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1")
875 : !char0.unicode() ? QStringLiteral("char")
876 : !char1.unicode() ? QStringLiteral("char1")
877 : QString();
878 break;
879
880 case Type::IncludeRules:
881 missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString();
882 break;
883
884 case Type::DetectIdentifier:
885 case Type::DetectSpaces:
886 case Type::Float:
887 case Type::HlCChar:
888 case Type::HlCHex:
889 case Type::HlCOct:
890 case Type::HlCStringChar:
891 case Type::Int:
892 case Type::LineContinue:
893 break;
894 }
895
896 if (!missingAttr.isEmpty()) {
897 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr;
898 return false;
899 }
900
901 return true;
902 }
903 };
904
905 int line;
906 // becomes false when a context (except includeRule) refers to it
907 bool isOnlyIncluded = true;
908 // becomes true when an includedRule refers to it with includeAttrib=true
909 bool referencedWithIncludeAttrib = false;
910 bool hasDynamicRule = false;
911 QString name;
912 QString attribute;
913 ContextName lineEndContext;
914 ContextName lineEmptyContext;
915 ContextName fallthroughContext;
916 QList<Rule> rules;
917 XmlBool dynamic{};
918 XmlBool fallthrough{};
919 XmlBool stopEmptyLineContextSwitchLoop{};
920
921 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
922 {
923 line = xml.lineNumber();
924
925 bool success = true;
926
927 const auto attrs = xml.attributes();
928 for (const auto &attr : attrs) {
929 Parser parser{filename, xml, attr, success};
930 XmlBool noIndentationBasedFolding{};
931
932 // clang-format off
933 const bool isExtracted = parser.extractString(name, u"name"_sv)
934 || parser.extractString(attribute, u"attribute"_sv)
935 || parser.extractString(lineEndContext.name, u"lineEndContext"_sv)
936 || parser.extractString(lineEmptyContext.name, u"lineEmptyContext"_sv)
937 || parser.extractString(fallthroughContext.name, u"fallthroughContext"_sv)
938 || parser.extractXmlBool(dynamic, u"dynamic"_sv)
939 || parser.extractXmlBool(fallthrough, u"fallthrough"_sv)
940 || parser.extractXmlBool(stopEmptyLineContextSwitchLoop, u"stopEmptyLineContextSwitchLoop"_sv)
941 || parser.extractXmlBool(noIndentationBasedFolding, u"noIndentationBasedFolding"_sv);
942 // clang-format on
943
944 success = parser.checkIfExtracted(isExtracted);
945 }
946
947 if (name.isEmpty()) {
948 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name";
949 success = false;
950 }
951
952 if (attribute.isEmpty()) {
953 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute";
954 success = false;
955 }
956
957 return success;
958 }
959 };
960
961 struct ItemDatas {
962 struct Style {
963 QString name;
964 int line;
965
966 friend size_t qHash(const Style &style, size_t seed = 0)
967 {
968 return qHash(style.name, seed);
969 }
970
971 friend bool operator==(const Style &style0, const Style &style1)
972 {
973 return style0.name == style1.name;
974 }
975 };
976
977 QSet<Style> styleNames;
978
979 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
980 {
981 bool success = true;
982
983 QString name;
984 QString defStyleNum;
985 XmlBool boolean;
986
987 const auto attrs = xml.attributes();
988 for (const auto &attr : attrs) {
989 Parser parser{filename, xml, attr, success};
990
991 // clang-format off
992 const bool isExtracted
993 = parser.extractString(name, u"name"_sv)
994 || parser.extractString(defStyleNum, u"defStyleNum"_sv)
995 || parser.extractXmlBool(boolean, u"bold"_sv)
996 || parser.extractXmlBool(boolean, u"italic"_sv)
997 || parser.extractXmlBool(boolean, u"underline"_sv)
998 || parser.extractXmlBool(boolean, u"strikeOut"_sv)
999 || parser.extractXmlBool(boolean, u"spellChecking"_sv)
1000 || parser.checkColor(u"color"_sv)
1001 || parser.checkColor(u"selColor"_sv)
1002 || parser.checkColor(u"backgroundColor"_sv)
1003 || parser.checkColor(u"selBackgroundColor"_sv);
1004 // clang-format on
1005
1006 success = parser.checkIfExtracted(isExtracted);
1007 }
1008
1009 if (!name.isEmpty()) {
1010 const auto len = styleNames.size();
1011 styleNames.insert({name, int(xml.lineNumber())});
1012 if (len == styleNames.size()) {
1013 qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name;
1014 success = false;
1015 }
1016 }
1017
1018 return success;
1019 }
1020 };
1021
1022 struct Definition {
1023 QMap<QString, Keywords> keywordsList;
1024 QMap<QString, Context> contexts;
1025 ItemDatas itemDatas;
1026 QString firstContextName;
1027 const Context *firstContext = nullptr;
1028 QString filename;
1029 WordDelimiters wordDelimiters;
1030 KateVersion kateVersion{};
1031 QString kateVersionStr;
1032 QString languageName;
1033 QSet<const Definition *> referencedDefinitions;
1034
1035 // Parse <keywords ...>
1036 bool parseKeywords(const QXmlStreamReader &xml)
1037 {
1038 wordDelimiters.append(xml.attributes().value(u"additionalDeliminator"_sv));
1039 wordDelimiters.remove(xml.attributes().value(u"weakDeliminator"_sv));
1040 return true;
1041 }
1042 };
1043
1044 // Parse <context>
1045 void processContextElement(const QXmlStreamReader &xml)
1046 {
1047 Context context;
1048 m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success;
1049 if (m_currentDefinition->firstContextName.isEmpty()) {
1050 m_currentDefinition->firstContextName = context.name;
1051 }
1052 if (m_currentDefinition->contexts.contains(context.name)) {
1053 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name;
1054 m_success = false;
1055 }
1056 m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context);
1057 }
1058
1059 // Parse <list name="...">
1060 void processListElement(const QXmlStreamReader &xml)
1061 {
1062 Keywords keywords;
1063 m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success;
1064 if (m_currentDefinition->keywordsList.contains(keywords.name)) {
1065 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name;
1066 m_success = false;
1067 }
1068 m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords);
1069 }
1070
1071 const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const
1072 {
1073 auto it = maxVersionByDefinitions.find(&definition);
1074 if (it != maxVersionByDefinitions.end()) {
1075 return it.value();
1076 } else {
1077 auto it = maxVersionByDefinitions.insert(&definition, &definition);
1078 for (const auto &referencedDef : definition.referencedDefinitions) {
1079 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions);
1080 if (it.value()->kateVersion < maxDef->kateVersion) {
1081 it.value() = maxDef;
1082 }
1083 }
1084 return it.value();
1085 }
1086 }
1087
1088 // Initialize the referenced rules (Rule::includedRules)
1089 void resolveIncludeRules()
1090 {
1091 QSet<const Context *> usedContexts;
1092 QList<const Context *> contexts;
1093
1095 while (def.hasNext()) {
1096 def.next();
1097 auto &definition = def.value();
1098 QMutableMapIterator<QString, Context> contextIt(definition.contexts);
1099 while (contextIt.hasNext()) {
1100 contextIt.next();
1101 auto &currentContext = contextIt.value();
1102 for (auto &rule : currentContext.rules) {
1103 if (rule.type != Context::Rule::Type::IncludeRules) {
1104 continue;
1105 }
1106
1107 if (rule.context.stay) {
1108 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself";
1109 m_success = false;
1110 continue;
1111 }
1112
1113 if (rule.context.popCount) {
1114 qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix";
1115 m_success = false;
1116 }
1117
1118 if (!rule.context.context) {
1119 m_success = false;
1120 continue;
1121 }
1122
1123 // resolve includedRules and includedIncludeRules
1124
1125 usedContexts.clear();
1126 usedContexts.insert(rule.context.context);
1127 contexts.clear();
1128 contexts.append(rule.context.context);
1129
1130 for (int i = 0; i < contexts.size(); ++i) {
1131 currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
1132 for (const auto &includedRule : contexts[i]->rules) {
1133 if (includedRule.type != Context::Rule::Type::IncludeRules) {
1134 rule.includedRules.append(&includedRule);
1135 } else if (&rule == &includedRule) {
1136 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity";
1137 m_success = false;
1138 } else {
1139 rule.includedIncludeRules.insert(&includedRule);
1140
1141 if (includedRule.includedRules.isEmpty()) {
1142 const auto *context = includedRule.context.context;
1143 if (context && !usedContexts.contains(context)) {
1144 contexts.append(context);
1145 usedContexts.insert(context);
1146 }
1147 } else {
1148 rule.includedRules.append(includedRule.includedRules);
1149 }
1150 }
1151 }
1152 }
1153 }
1154 }
1155 }
1156 }
1157
1158 //! Recursively extracts the contexts used from the first context of the definitions.
1159 //! This method detects groups of contexts which are only used among themselves.
1160 QSet<const Context *> extractUsedContexts() const
1161 {
1162 QSet<const Context *> usedContexts;
1163 QList<const Context *> contexts;
1164
1165 QMapIterator<QString, Definition> def(m_definitions);
1166 while (def.hasNext()) {
1167 def.next();
1168 const auto &definition = def.value();
1169
1170 if (definition.firstContext) {
1171 usedContexts.insert(definition.firstContext);
1172 contexts.clear();
1173 contexts.append(definition.firstContext);
1174
1175 for (int i = 0; i < contexts.size(); ++i) {
1176 auto appendContext = [&](const Context *context) {
1177 if (context && !usedContexts.contains(context)) {
1178 contexts.append(context);
1179 usedContexts.insert(context);
1180 }
1181 };
1182
1183 const auto *context = contexts[i];
1184 appendContext(context->lineEndContext.context);
1185 appendContext(context->lineEmptyContext.context);
1186 appendContext(context->fallthroughContext.context);
1187
1188 for (auto &rule : context->rules) {
1189 appendContext(rule.context.context);
1190 }
1191 }
1192 }
1193 }
1194
1195 return usedContexts;
1196 }
1197
1198 struct RuleAndInclude {
1199 const Context::Rule *rule;
1200 const Context::Rule *includeRules;
1201
1202 explicit operator bool() const
1203 {
1204 return rule;
1205 }
1206 };
1207
1208 struct IncludedRuleUnreachableBy {
1209 QList<RuleAndInclude> unreachableBy;
1210 bool alwaysUnreachable = true;
1211 };
1212
1213 //! Check contexts and rules
1214 bool checkContexts(const Definition &definition,
1215 QSet<ItemDatas::Style> &usedAttributeNames,
1216 QSet<ItemDatas::Style> &ignoredAttributeNames,
1217 const QSet<const Context *> &usedContexts,
1218 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1219 {
1220 bool success = true;
1221
1222 QMapIterator<QString, Context> contextIt(definition.contexts);
1223 while (contextIt.hasNext()) {
1224 contextIt.next();
1225
1226 const auto &context = contextIt.value();
1227 const auto &filename = definition.filename;
1228
1229 if (!usedContexts.contains(&context)) {
1230 qWarning() << filename << "line" << context.line << "unused context:" << context.name;
1231 success = false;
1232 continue;
1233 }
1234
1235 if (context.name.startsWith(u"#pop"_sv)) {
1236 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name;
1237 success = false;
1238 }
1239
1240 if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1241 usedAttributeNames.insert({context.attribute, context.line});
1242 }
1243
1244 success = checkContextAttribute(definition, context) && success;
1245 success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success;
1246 success = suggestRuleMerger(definition.filename, context) && success;
1247
1248 for (const auto &rule : context.rules) {
1249 if (!rule.attribute.isEmpty()) {
1250 if (rule.lookAhead != XmlBool::True) {
1251 usedAttributeNames.insert({rule.attribute, rule.line});
1252 } else {
1253 ignoredAttributeNames.insert({rule.attribute, rule.line});
1254 }
1255 }
1256 success = checkLookAhead(rule) && success;
1257 success = checkStringDetect(rule) && success;
1258 success = checkWordDetect(rule) && success;
1259 success = checkKeyword(definition, rule) && success;
1260 success = checkRegExpr(filename, rule, context) && success;
1261 success = checkDelimiters(definition, rule) && success;
1262 }
1263 }
1264
1265 return success;
1266 }
1267
1268 //! Check that a regular expression in a RegExpr rule:
1269 //! - isValid()
1270 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1271 //! - dynamic=true but no place holder used?
1272 //! - is not . with lookAhead="1"
1273 //! - is not ^... without column ou firstNonSpace attribute
1274 //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar
1275 //! - has no unused captures
1276 //! - has no unnecessary quantifier with lookAhead
1277 bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const
1278 {
1279 // ignore empty regex because the error is raised during xml parsing
1280 if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) {
1281 const QRegularExpression regexp(rule.string);
1282 if (!checkRegularExpression(rule.filename, regexp, rule.line)) {
1283 return false;
1284 }
1285
1286 // dynamic == true and no place holder?
1287 if (rule.dynamic == XmlBool::True) {
1288 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1289 if (!rule.string.contains(placeHolder)) {
1290 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1291 return false;
1292 }
1293 }
1294
1295 if (rule.lookAhead == XmlBool::True && (rule.string.endsWith(u".*$"_sv) || rule.string.endsWith(u".*"_sv)) && -1 == rule.string.indexOf(u'|')) {
1296 qWarning() << rule.filename << "line" << rule.line << "RegExpr with lookAhead=1 doesn't need to end with '.*' or '.*$':" << rule.string;
1297 return false;
1298 }
1299
1300 auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1301 if (rule.lookAhead == XmlBool::True) {
1302 static const QRegularExpression removeAllSuffix(QStringLiteral(
1303 R"(((?<!\\)\\‍(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1304 reg.replace(removeAllSuffix, QString());
1305 }
1306
1307 reg.replace(QStringLiteral("{1}"), QString());
1308 reg.replace(QStringLiteral("{1,1}"), QString());
1309
1310 // is DetectSpaces
1311 // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, +
1312 static const QRegularExpression isDetectSpaces(
1313 QStringLiteral(R"(^\^?(?:\‍((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1314 if (rule.string.contains(isDetectSpaces)) {
1315 char const *extraMsg = rule.string.contains(u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1316 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":"
1317 << rule.string;
1318 return false;
1319 }
1320
1321#define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1322#define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1323
1324 // is RangeDetect
1325 static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:"
1326 "\\.\\*[?+]?" REG_CHAR "|"
1327 "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1"
1328 ")$"));
1329 if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(u".*?"_sv) || rule.string.contains(u"[^"_sv))
1330 && reg.contains(isRange)) {
1331 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string;
1332 return false;
1333 }
1334
1335 // is AnyChar
1336 static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\‍((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)"));
1337 if (rule.string.contains(isAnyChar)) {
1338 auto extra = (reg[0] == u'^' || reg[1] == u'^') ? "with column=\"0\"" : "";
1339 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra;
1340 return false;
1341 }
1342
1343 // is LineContinue
1344 static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$"));
1345 if (reg.contains(isLineContinue)) {
1346 auto extra = (reg[0] == u'^') ? "with column=\"0\"" : "";
1347 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra;
1348 return false;
1349 }
1350
1351#define REG_DIGIT uR"((\[(0-9|\\d)\]|\\d))"
1352#define REG_DIGITS REG_DIGIT u"([+]|" REG_DIGIT u"[*])"
1353#define REG_DOT uR"((\\[.]|\[.\]))"
1354 // is Int, check \b[0-9]+
1355 static const QRegularExpression isInt(uR"(^(\‍((\?:)?)*\\b(\‍((\?:)?)*)" REG_DIGITS uR"(\)*$)"_s);
1356 if (reg.contains(isInt)) {
1357 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Int:" << rule.string;
1358 return false;
1359 }
1360
1361 // is Float, check (\b[0-9]+\.[0-9]*|\.[0-9]+)([eE][-+]?[0-9]+)?
1362 static const QRegularExpression isFloat(
1363 uR"(^(\\b|\‍((\?:)?)*)" REG_DIGITS REG_DOT
1364 REG_DIGIT u"[*][|]" REG_DOT REG_DIGITS uR"(\)+\‍((\?:)?\[[eE]+\]\[(\\?-\\?\+|\\?\+\\?-)\]\?)" REG_DIGITS uR"(\)\?\)*$)"_s);
1365 if (reg.contains(isFloat)) {
1366 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Float:" << rule.string;
1367 return false;
1368 }
1369#undef REG_DOT
1370#undef REG_DIGIT
1371#undef REG_DIGITS
1372
1373 // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _
1374 static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR));
1375 reg.replace(sanitize1, QStringLiteral("_"));
1376
1377#undef REG_CHAR
1378#undef REG_ESCAPE_CHAR
1379
1380 // use minimal or lazy operator
1381 static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1382 static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]"));
1383
1384 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy)
1385 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0)
1386 && (reg.back() != u'$' || reg.contains(u'|'))) {
1387 qWarning() << rule.filename << "line" << rule.line
1388 << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1389 return false;
1390 }
1391
1392 // replace [:...:] with ___
1393 static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])"));
1394 reg.replace(sanitize2, QStringLiteral("___"));
1395
1396 // replace [ccc...], [special] with ...
1397 static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1398 reg.replace(sanitize3, QStringLiteral("...\\1"));
1399
1400 // replace [c] with _
1401 static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])"));
1402 reg.replace(sanitize4, QStringLiteral("_"));
1403
1404 const int len = reg.size();
1405 // replace [cC] with _
1406 static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])"));
1407 reg = reg.toUpper();
1408 reg.replace(toInsensitive, QString());
1409
1410 // is StringDetect
1411 // ignore (?:, ) and {n}
1412 static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\‍(\?:)+$)"));
1413 if (reg.contains(isStringDetect)) {
1414 char const *extraMsg = rule.string.contains(u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1415 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1416 << ":" << rule.string;
1417 if (len != reg.size()) {
1418 qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string;
1419 }
1420 return false;
1421 }
1422
1423 // column="0"
1424 if (rule.column == -1) {
1425 // ^ without |
1426 // (^sas*) -> ok
1427 // (^sa|s*) -> ko
1428 // (^(sa|s*)) -> ok
1429 auto first = std::as_const(reg).begin();
1430 auto last = std::as_const(reg).end();
1431 int depth = 0;
1432
1433 while (u'(' == *first) {
1434 ++depth;
1435 ++first;
1436 if (u'?' == *first || u':' == first[1]) {
1437 first += 2;
1438 }
1439 }
1440
1441 if (u'^' == *first) {
1442 const int bolDepth = depth;
1443 bool replace = true;
1444
1445 while (++first != last) {
1446 if (u'(' == *first) {
1447 ++depth;
1448 } else if (u')' == *first) {
1449 --depth;
1450 if (depth < bolDepth) {
1451 // (^a)? === (^a|) -> ko
1452 if (first + 1 != last && u"*?"_sv.contains(first[1])) {
1453 replace = false;
1454 break;
1455 }
1456 }
1457 } else if (u'|' == *first) {
1458 // ignore '|' within subgroup
1459 if (depth <= bolDepth) {
1460 replace = false;
1461 break;
1462 }
1463 }
1464 }
1465
1466 if (replace) {
1467 qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string;
1468 return false;
1469 }
1470 }
1471 }
1472
1473 // add ^ with column=0
1474 if (rule.column == 0 && !rule.isDotRegex) {
1475 bool hasStartOfLine = false;
1476 auto first = std::as_const(reg).begin();
1477 auto last = std::as_const(reg).end();
1478 for (; first != last; ++first) {
1479 if (*first == u'^') {
1480 hasStartOfLine = true;
1481 break;
1482 } else if (*first == u'(') {
1483 if (last - first >= 3 && first[1] == u'?' && first[2] == u':') {
1484 first += 2;
1485 }
1486 } else {
1487 break;
1488 }
1489 }
1490
1491 if (!hasStartOfLine) {
1492 qWarning() << rule.filename << "line" << rule.line
1493 << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1494 return false;
1495 }
1496 }
1497
1498 bool useCapture = false;
1499
1500 // detection of unnecessary capture
1501 if (regexp.captureCount()) {
1502 auto maximalCapture = [](const QStringView(&referenceNames)[9], const QString &s) {
1503 int maxCapture = 9;
1504 while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) {
1505 --maxCapture;
1506 }
1507 return maxCapture;
1508 };
1509
1510 int maxCaptureUsed = 0;
1511 // maximal dynamic reference
1512 if (rule.context.context && !rule.context.stay) {
1513 for (const auto &nextRule : std::as_const(rule.context.context->rules)) {
1514 if (nextRule.dynamic == XmlBool::True) {
1515 static const QStringView cap[]{
1516 u"%1"_sv,
1517 u"%2"_sv,
1518 u"%3"_sv,
1519 u"%4"_sv,
1520 u"%5"_sv,
1521 u"%6"_sv,
1522 u"%7"_sv,
1523 u"%8"_sv,
1524 u"%9"_sv,
1525 };
1526 int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1527 maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture);
1528 }
1529 }
1530 }
1531
1532 static const QStringView num1[]{
1533 u"\\1"_sv,
1534 u"\\2"_sv,
1535 u"\\3"_sv,
1536 u"\\4"_sv,
1537 u"\\5"_sv,
1538 u"\\6"_sv,
1539 u"\\7"_sv,
1540 u"\\8"_sv,
1541 u"\\9"_sv,
1542 };
1543 static const QStringView num2[]{
1544 u"\\g1"_sv,
1545 u"\\g2"_sv,
1546 u"\\g3"_sv,
1547 u"\\g4"_sv,
1548 u"\\g5"_sv,
1549 u"\\g6"_sv,
1550 u"\\g7"_sv,
1551 u"\\g8"_sv,
1552 u"\\g9"_sv,
1553 };
1554 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num2, rule.string));
1555
1556 const int maxCapture = std::max(maxCaptureUsed, maxBackReference);
1557
1558 if (maxCapture && regexp.captureCount() > maxCapture) {
1559 qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture
1560 << "are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1561 return false;
1562 }
1563
1564 useCapture = maxCapture;
1565 }
1566
1567 if (!useCapture) {
1568 // is DetectIdentifier
1569 static const QRegularExpression isDetectIdentifier(
1570 QStringLiteral(R"(^(\‍((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)"));
1571 if (rule.string.contains(isDetectIdentifier)) {
1572 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string;
1573 return false;
1574 }
1575 }
1576
1577 if (rule.isDotRegex) {
1578 // search next rule with same column or firstNonSpace
1579 int i = &rule - context.rules.data() + 1;
1580 const bool hasColumn = (rule.column != -1);
1581 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1582 const bool isSpecial = (hasColumn || hasFirstNonSpace);
1583 for (; i < context.rules.size(); ++i) {
1584 auto &rule2 = context.rules[i];
1585 if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1586 i = context.rules.size();
1587 break;
1588 }
1589
1590 const bool hasColumn2 = (rule2.column != -1);
1591 const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1592 if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1593 || (hasFirstNonSpace && hasFirstNonSpace2)) {
1594 break;
1595 }
1596 }
1597
1598 auto ruleFilename = (filename == rule.filename) ? QString() : u"in "_sv + rule.filename;
1599 if (i == context.rules.size()) {
1600 if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1601 && rule.endRegion.isEmpty() && !useCapture) {
1602 qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename
1603 << "should be replaced by fallthroughContext:" << rule.string;
1604 }
1605 } else {
1606 auto &nextRule = context.rules[i];
1607 auto nextRuleFilename = (filename == nextRule.filename) ? QString() : u"in "_sv + nextRule.filename;
1608 qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename
1609 << "because a dot RegExpr is used line" << rule.line << ruleFilename;
1610 }
1611
1612 // unnecessary quantifier
1613 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)"));
1614 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)"));
1615 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1616 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) {
1617 qWarning() << rule.filename << "line" << rule.line
1618 << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1619 return false;
1620 }
1621 }
1622 }
1623
1624 return true;
1625 }
1626
1627 // Parse and check <emptyLine>
1628 bool parseEmptyLine(const QString &filename, const QXmlStreamReader &xml)
1629 {
1630 bool success = true;
1631
1632 QString pattern;
1633 XmlBool casesensitive{};
1634
1635 const auto attrs = xml.attributes();
1636 for (auto &attr : attrs) {
1637 Parser parser{filename, xml, attr, success};
1638
1639 const bool isExtracted = parser.extractString(pattern, u"regexpr"_sv) || parser.extractXmlBool(casesensitive, u"casesensitive"_sv);
1640
1641 success = parser.checkIfExtracted(isExtracted);
1642 }
1643
1644 if (pattern.isEmpty()) {
1645 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr";
1646 success = false;
1647 } else {
1648 success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber());
1649 }
1650
1651 return success;
1652 }
1653
1654 //! Check that a regular expression:
1655 //! - isValid()
1656 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1657 bool checkRegularExpression(const QString &filename, const QRegularExpression &regexp, int line) const
1658 {
1659 const auto pattern = regexp.pattern();
1660
1661 // validate regexp
1662 if (!regexp.isValid()) {
1663 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset"
1664 << regexp.patternErrorOffset();
1665 return false;
1666 }
1667
1668 // catch possible case typos: [A-z] or [a-Z]
1669 const int azOffset = std::max(pattern.indexOf(u"A-z"_sv), pattern.indexOf(u"a-Z"_sv));
1670 if (azOffset >= 0) {
1671 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset;
1672 return false;
1673 }
1674
1675 return true;
1676 }
1677
1678 //! Check fallthrough and fallthroughContext.
1679 //! Check kateversion for stopEmptyLineContextSwitchLoop.
1680 bool checkContextAttribute(const Definition &definition, const Context &context) const
1681 {
1682 bool success = true;
1683
1684 if (!context.fallthroughContext.name.isEmpty()) {
1685 const bool mandatoryFallthroughAttribute = definition.kateVersion < KateVersion{5, 62};
1686 if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1687 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1688 << context.name;
1689 success = false;
1690 } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1691 qWarning() << definition.filename << "line" << context.line
1692 << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1693 << context.name;
1694 success = false;
1695 }
1696 }
1697
1698 if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < KateVersion{5, 103}) {
1699 qWarning() << definition.filename << "line" << context.line
1700 << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name;
1701 success = false;
1702 }
1703
1704 return success;
1705 }
1706
1707 //! Search for additionalDeliminator/weakDeliminator which has no effect.
1708 bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const
1709 {
1710 if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1711 return true;
1712 }
1713
1714 bool success = true;
1715
1716 if (definition.kateVersion < KateVersion{5, 79}) {
1717 qWarning() << definition.filename << "line" << rule.line
1718 << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1719 success = false;
1720 }
1721
1722 for (QChar c : rule.additionalDeliminator) {
1723 if (!definition.wordDelimiters.contains(c)) {
1724 return success;
1725 }
1726 }
1727
1728 for (QChar c : rule.weakDeliminator) {
1729 if (definition.wordDelimiters.contains(c)) {
1730 return success;
1731 }
1732 }
1733
1734 qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1735 return false;
1736 }
1737
1738 //! Check that keyword rule reference an existing keyword list.
1739 bool checkKeyword(const Definition &definition, const Context::Rule &rule) const
1740 {
1741 if (rule.type == Context::Rule::Type::keyword) {
1742 auto it = definition.keywordsList.find(rule.string);
1743 if (it == definition.keywordsList.end()) {
1744 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string;
1745 return false;
1746 }
1747 }
1748 return true;
1749 }
1750
1751 //! Search for rules with lookAhead="true" and context="#stay".
1752 //! This would cause an infinite loop.
1753 bool checkLookAhead(const Context::Rule &rule) const
1754 {
1755 if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1756 qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay";
1757 }
1758 return true;
1759 }
1760
1761 //! Check that StringDetect contains a placeHolder when dynamic="1"
1762 bool checkStringDetect(const Context::Rule &rule) const
1763 {
1764 if (rule.type == Context::Rule::Type::StringDetect) {
1765 // dynamic == true and no place holder?
1766 if (rule.dynamic == XmlBool::True) {
1767 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1768 if (!rule.string.contains(placeHolder)) {
1769 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1770 return false;
1771 }
1772 }
1773 }
1774 return true;
1775 }
1776
1777 //! Check that WordDetect does not contain spaces at the beginning and end of text.
1778 bool checkWordDetect(const Context::Rule &rule) const
1779 {
1780 if (rule.type == Context::Rule::Type::WordDetect) {
1781 if (!rule.string.isEmpty() && (rule.string.front().isSpace() || rule.string.back().isSpace())) {
1782 qWarning() << rule.filename << "line" << rule.line << "contains a space at the beginning or end of the string:" << rule.string;
1783 return false;
1784 }
1785 }
1786 return true;
1787 }
1788
1789 //! Check <include> and delimiter in a keyword list
1790 bool checkKeywordsList(const Definition &definition) const
1791 {
1792 bool success = true;
1793
1794 bool includeNotSupport = (definition.kateVersion < KateVersion{5, 53});
1795 QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList);
1796 while (keywordsIt.hasNext()) {
1797 keywordsIt.next();
1798
1799 for (const auto &include : keywordsIt.value().items.includes) {
1800 if (includeNotSupport) {
1801 qWarning() << definition.filename << "line" << include.line
1802 << "<include> is only available since version \"5.53\". Please, increase kateversion.";
1803 success = false;
1804 }
1805 success = checkKeywordInclude(definition, include) && success;
1806 }
1807
1808 // Check that keyword list items do not have duplicated entries
1809 QSet<QString> entries;
1810 for (const auto &keyword : keywordsIt.value().items.keywords) {
1811 if (entries.contains(keyword.content)) {
1812 qWarning() << definition.filename << "line" << keyword.line << "duplicated keyword" << keyword.content;
1813 // TODO: once all stuff is fixed success = false;
1814 }
1815 entries.insert(keyword.content);
1816 }
1817
1818 // Check that keyword list items do not have deliminator character
1819#if 0
1820 for (const auto& keyword : keywordsIt.value().items.keywords) {
1821 for (QChar c : keyword.content) {
1822 if (definition.wordDelimiters.contains(c)) {
1823 qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content;
1824 success = false;
1825 }
1826 }
1827 }
1828#endif
1829 }
1830
1831 return success;
1832 }
1833
1834 //! Search for non-existing keyword include.
1835 bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const
1836 {
1837 bool containsKeywordName = true;
1838 int const idx = include.content.indexOf(u"##"_sv);
1839 if (idx == -1) {
1840 auto it = definition.keywordsList.find(include.content);
1841 containsKeywordName = (it != definition.keywordsList.end());
1842 } else {
1843 auto defName = include.content.sliced(idx + 2);
1844 auto listName = include.content.sliced(0, idx);
1845 auto it = m_definitions.find(defName);
1846 if (it == m_definitions.end()) {
1847 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content;
1848 return false;
1849 }
1850 containsKeywordName = it->keywordsList.contains(listName);
1851 }
1852
1853 if (!containsKeywordName) {
1854 qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content;
1855 }
1856
1857 return containsKeywordName;
1858 }
1859
1860 //! Check if a rule is hidden by another
1861 //! - rule hidden by DetectChar or AnyChar
1862 //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar
1863 //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings
1864 //! - duplicate rule (Int, Float, keyword with same String, etc)
1865 //! - Rule hidden by a dot regex
1866 bool checkUreachableRules(const QString &filename,
1867 const Context &context,
1868 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1869 {
1870 if (context.isOnlyIncluded) {
1871 return true;
1872 }
1873
1874 struct Rule4 {
1875 RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr)
1876 {
1877 auto set = [&](RuleAndInclude &ruleAndInclude) {
1878 auto old = ruleAndInclude;
1879 ruleAndInclude = {&rule, includeRules};
1880 return old;
1881 };
1882
1883 if (rule.firstNonSpace == XmlBool::True) {
1884 return set(firstNonSpace);
1885 } else if (rule.column == 0) {
1886 return set(column0);
1887 } else if (rule.column > 0) {
1888 return set(columnGreaterThan0[rule.column]);
1889 } else {
1890 return set(normal);
1891 }
1892 }
1893
1894 private:
1895 RuleAndInclude normal;
1896 RuleAndInclude column0;
1897 QMap<int, RuleAndInclude> columnGreaterThan0;
1898 RuleAndInclude firstNonSpace;
1899 };
1900
1901 // Associate QChar with RuleAndInclude
1902 struct CharTable {
1903 /// Search RuleAndInclude associated with @p c.
1904 RuleAndInclude find(QChar c) const
1905 {
1906 if (c.unicode() < 128) {
1907 return m_asciiMap[c.unicode()];
1908 }
1909 auto it = m_utf8Map.find(c);
1910 return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value();
1911 }
1912
1913 /// Search RuleAndInclude associated with the characters of @p s.
1914 /// \return an empty QList when at least one character is not found.
1915 QList<RuleAndInclude> find(QStringView s) const
1916 {
1917 QList<RuleAndInclude> result;
1918
1919 for (QChar c : s) {
1920 if (!find(c)) {
1921 return result;
1922 }
1923 }
1924
1925 for (QChar c : s) {
1926 result.append(find(c));
1927 }
1928
1929 return result;
1930 }
1931
1932 /// Associates @p c with a rule.
1933 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1934 {
1935 if (c.unicode() < 128) {
1936 m_asciiMap[c.unicode()] = {&rule, includeRule};
1937 } else {
1938 m_utf8Map[c] = {&rule, includeRule};
1939 }
1940 }
1941
1942 /// Associates each character of @p s with a rule.
1943 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1944 {
1945 for (QChar c : s) {
1946 append(c, rule, includeRule);
1947 }
1948 }
1949
1950 private:
1951 RuleAndInclude m_asciiMap[127]{};
1952 QMap<QChar, RuleAndInclude> m_utf8Map;
1953 };
1954
1955 struct Char4Tables {
1956 CharTable chars;
1957 CharTable charsColumn0;
1958 QMap<int, CharTable> charsColumnGreaterThan0;
1959 CharTable charsFirstNonSpace;
1960 };
1961
1962 // View on Char4Tables members
1963 struct CharTableArray {
1964 // Append Char4Tables members that satisfies firstNonSpace and column.
1965 // Char4Tables::char is always added.
1966 CharTableArray(Char4Tables &tables, const Context::Rule &rule)
1967 {
1968 if (rule.firstNonSpace == XmlBool::True) {
1969 appendTable(tables.charsFirstNonSpace);
1970 }
1971
1972 if (rule.column == 0) {
1973 appendTable(tables.charsColumn0);
1974 } else if (rule.column > 0) {
1975 appendTable(tables.charsColumnGreaterThan0[rule.column]);
1976 }
1977
1978 appendTable(tables.chars);
1979 }
1980
1981 // Removes Char4Tables::chars when the rule contains firstNonSpace or column
1982 void removeNonSpecialWhenSpecial()
1983 {
1984 if (m_size > 1) {
1985 --m_size;
1986 }
1987 }
1988
1989 /// Search RuleAndInclude associated with @p c.
1990 RuleAndInclude find(QChar c) const
1991 {
1992 for (int i = 0; i < m_size; ++i) {
1993 if (auto ruleAndInclude = m_charTables[i]->find(c)) {
1994 return ruleAndInclude;
1995 }
1996 }
1997 return RuleAndInclude{nullptr, nullptr};
1998 }
1999
2000 /// Search RuleAndInclude associated with the characters of @p s.
2001 /// \return an empty QList when at least one character is not found.
2002 QList<RuleAndInclude> find(QStringView s) const
2003 {
2004 for (int i = 0; i < m_size; ++i) {
2005 auto result = m_charTables[i]->find(s);
2006 if (result.size()) {
2007 while (++i < m_size) {
2008 result.append(m_charTables[i]->find(s));
2009 }
2010 return result;
2011 }
2012 }
2013 return QList<RuleAndInclude>();
2014 }
2015
2016 /// Associates @p c with a rule.
2017 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
2018 {
2019 for (int i = 0; i < m_size; ++i) {
2020 m_charTables[i]->append(c, rule, includeRule);
2021 }
2022 }
2023
2024 /// Associates each character of @p s with a rule.
2025 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
2026 {
2027 for (int i = 0; i < m_size; ++i) {
2028 m_charTables[i]->append(s, rule, includeRule);
2029 }
2030 }
2031
2032 private:
2033 void appendTable(CharTable &t)
2034 {
2035 m_charTables[m_size] = &t;
2036 ++m_size;
2037 }
2038
2039 CharTable *m_charTables[3];
2040 int m_size = 0;
2041 };
2042
2043 struct ObservableRule {
2044 const Context::Rule *rule;
2045 const Context::Rule *includeRules;
2046
2047 bool hasResolvedIncludeRules() const
2048 {
2049 return rule == includeRules;
2050 }
2051 };
2052
2053 // Iterates over all the rules, including those in includedRules
2054 struct RuleIterator {
2055 RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule)
2056 : m_end(&endRule - rules.data())
2057 , m_rules(rules)
2058 {
2059 }
2060
2061 /// \return next rule or nullptr
2062 const Context::Rule *next()
2063 {
2064 // if in includedRules
2065 if (m_includedRules) {
2066 ++m_i2;
2067 if (m_i2 != m_includedRules->size()) {
2068 return (*m_includedRules)[m_i2];
2069 }
2070 ++m_i;
2071 m_includedRules = nullptr;
2072 }
2073
2074 // if is a includedRules
2075 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
2076 if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
2077 m_i2 = 0;
2078 m_includedRules = &m_rules[m_i].rule->includedRules;
2079 return (*m_includedRules)[m_i2];
2080 }
2081 ++m_i;
2082 }
2083
2084 if (m_i < m_end) {
2085 ++m_i;
2086 return m_rules[m_i - 1].rule;
2087 }
2088
2089 return nullptr;
2090 }
2091
2092 /// \return current IncludeRules or nullptr
2093 const Context::Rule *currentIncludeRules() const
2094 {
2095 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
2096 }
2097
2098 private:
2099 int m_i = 0;
2100 int m_i2 = 0;
2101 const int m_end;
2102 const QList<ObservableRule> &m_rules;
2103 const QList<const Context::Rule *> *m_includedRules = nullptr;
2104 };
2105
2106 // Dot regex container that satisfies firstNonSpace and column.
2107 struct DotRegex {
2108 /// Append a dot regex rule.
2109 void append(const Context::Rule &rule, const Context::Rule *includedRule)
2110 {
2111 auto array = extractDotRegexes(rule);
2112 if (array[0]) {
2113 *array[0] = {&rule, includedRule};
2114 }
2115 if (array[1]) {
2116 *array[1] = {&rule, includedRule};
2117 }
2118 }
2119
2120 /// Search dot regex which hides @p rule
2121 RuleAndInclude find(const Context::Rule &rule)
2122 {
2123 auto array = extractDotRegexes(rule);
2124 if (array[0]) {
2125 return *array[0];
2126 }
2127 if (array[1]) {
2128 return *array[1];
2129 }
2130 return RuleAndInclude{};
2131 }
2132
2133 private:
2134 using Array = std::array<RuleAndInclude *, 2>;
2135
2136 Array extractDotRegexes(const Context::Rule &rule)
2137 {
2138 Array ret{};
2139
2140 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
2141 ret[0] = &dotRegex;
2142 } else {
2143 if (rule.firstNonSpace == XmlBool::True) {
2144 ret[0] = &dotRegexFirstNonSpace;
2145 }
2146
2147 if (rule.column == 0) {
2148 ret[1] = &dotRegexColumn0;
2149 } else if (rule.column > 0) {
2150 ret[1] = &dotRegexColumnGreaterThan0[rule.column];
2151 }
2152 }
2153
2154 return ret;
2155 }
2156
2157 RuleAndInclude dotRegex{};
2158 RuleAndInclude dotRegexColumn0{};
2159 QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
2160 RuleAndInclude dotRegexFirstNonSpace{};
2161 };
2162
2163 bool success = true;
2164
2165 // characters of DetectChar/AnyChar
2166 Char4Tables detectChars;
2167 // characters of dynamic DetectChar
2168 Char4Tables dynamicDetectChars;
2169 // characters of LineContinue
2170 Char4Tables lineContinueChars;
2171
2172 Rule4 intRule{};
2173 Rule4 floatRule{};
2174 Rule4 hlCCharRule{};
2175 Rule4 hlCOctRule{};
2176 Rule4 hlCHexRule{};
2177 Rule4 hlCStringCharRule{};
2178 Rule4 detectIdentifierRule{};
2179
2180 // Contains includedRules and included includedRules
2182
2183 DotRegex dotRegex;
2184
2185 QList<ObservableRule> observedRules;
2186 observedRules.reserve(context.rules.size());
2187 for (const Context::Rule &rule : context.rules) {
2188 const Context::Rule *includeRule = nullptr;
2189 if (rule.type == Context::Rule::Type::IncludeRules) {
2190 auto *context = rule.context.context;
2191 if (context && context->isOnlyIncluded) {
2192 includeRule = &rule;
2193 }
2194 }
2195
2196 observedRules.push_back({&rule, includeRule});
2197 if (includeRule) {
2198 for (const Context::Rule *rule2 : rule.includedRules) {
2199 observedRules.push_back({rule2, includeRule});
2200 }
2201 }
2202 }
2203
2204 for (auto &observedRule : observedRules) {
2205 const Context::Rule &rule = *observedRule.rule;
2206 bool isUnreachable = false;
2207 QList<RuleAndInclude> unreachableBy;
2208
2209 // declare rule as unreachable if ruleAndInclude is not empty
2210 auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
2211 if (ruleAndInclude) {
2212 isUnreachable = true;
2213 unreachableBy.append(ruleAndInclude);
2214 }
2215 };
2216
2217 // declare rule as unreachable if ruleAndIncludes is not empty
2218 auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) {
2219 if (!ruleAndIncludes.isEmpty()) {
2220 isUnreachable = true;
2221 unreachableBy.append(ruleAndIncludes);
2222 }
2223 };
2224
2225 // check if rule2.firstNonSpace/column is compatible with those of rule
2226 auto isCompatible = [&rule](Context::Rule const &rule2) {
2227 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
2228 || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
2229 };
2230
2231 updateUnreachable1(dotRegex.find(rule));
2232
2233 switch (rule.type) {
2234 // checks if hidden by DetectChar/AnyChar
2235 // then add the characters to detectChars
2236 case Context::Rule::Type::AnyChar: {
2237 auto tables = CharTableArray(detectChars, rule);
2238 updateUnreachable2(tables.find(rule.string));
2239 tables.removeNonSpecialWhenSpecial();
2240 tables.append(rule.string, rule);
2241 break;
2242 }
2243
2244 // check if is hidden by DetectChar/AnyChar
2245 // then add the characters to detectChars or dynamicDetectChars
2246 case Context::Rule::Type::DetectChar: {
2247 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2248 auto tables = CharTableArray(chars4, rule);
2249 updateUnreachable1(tables.find(rule.char0));
2250 tables.removeNonSpecialWhenSpecial();
2251 tables.append(rule.char0, rule);
2252 break;
2253 }
2254
2255 // check if hidden by DetectChar/AnyChar
2256 // then add spaces characters to detectChars
2257 case Context::Rule::Type::DetectSpaces: {
2258 auto tables = CharTableArray(detectChars, rule);
2259 updateUnreachable2(tables.find(u" \t"_sv));
2260 tables.removeNonSpecialWhenSpecial();
2261 tables.append(u' ', rule);
2262 tables.append(u'\t', rule);
2263 break;
2264 }
2265
2266 // check if hidden by DetectChar/AnyChar
2267 case Context::Rule::Type::HlCChar:
2268 updateUnreachable1(CharTableArray(detectChars, rule).find(u'\''));
2269 updateUnreachable1(hlCCharRule.setRule(rule));
2270 break;
2271
2272 // check if hidden by DetectChar/AnyChar
2273 case Context::Rule::Type::HlCHex:
2274 updateUnreachable1(CharTableArray(detectChars, rule).find(u'0'));
2275 updateUnreachable1(hlCHexRule.setRule(rule));
2276 break;
2277
2278 // check if hidden by DetectChar/AnyChar
2279 case Context::Rule::Type::HlCOct:
2280 updateUnreachable1(CharTableArray(detectChars, rule).find(u'0'));
2281 updateUnreachable1(hlCOctRule.setRule(rule));
2282 break;
2283
2284 // check if hidden by DetectChar/AnyChar
2285 case Context::Rule::Type::HlCStringChar:
2286 updateUnreachable1(CharTableArray(detectChars, rule).find(u'\\'));
2287 updateUnreachable1(hlCStringCharRule.setRule(rule));
2288 break;
2289
2290 // check if hidden by DetectChar/AnyChar
2291 case Context::Rule::Type::Int:
2292 updateUnreachable2(CharTableArray(detectChars, rule).find(u"0123456789"_sv));
2293 updateUnreachable1(intRule.setRule(rule));
2294 break;
2295
2296 // check if hidden by DetectChar/AnyChar
2297 case Context::Rule::Type::Float:
2298 updateUnreachable2(CharTableArray(detectChars, rule).find(u"0123456789."_sv));
2299 updateUnreachable1(floatRule.setRule(rule));
2300 // check that Float is before Int
2301 updateUnreachable1(Rule4(intRule).setRule(rule));
2302 break;
2303
2304 // check if hidden by another DetectIdentifier rule
2305 case Context::Rule::Type::DetectIdentifier:
2306 updateUnreachable1(detectIdentifierRule.setRule(rule));
2307 break;
2308
2309 // check if hidden by DetectChar/AnyChar or another LineContinue
2310 case Context::Rule::Type::LineContinue: {
2311 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2312
2313 auto tables = CharTableArray(lineContinueChars, rule);
2314 updateUnreachable1(tables.find(rule.char0));
2315 tables.removeNonSpecialWhenSpecial();
2316 tables.append(rule.char0, rule);
2317 break;
2318 }
2319
2320 // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect
2321 case Context::Rule::Type::Detect2Chars:
2322 case Context::Rule::Type::RangeDetect:
2323 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2324 if (!isUnreachable) {
2325 RuleIterator ruleIterator(observedRules, observedRule);
2326 while (const auto *rulePtr = ruleIterator.next()) {
2327 if (isUnreachable) {
2328 break;
2329 }
2330 const auto &rule2 = *rulePtr;
2331 if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2332 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2333 }
2334 }
2335 }
2336 break;
2337
2338 case Context::Rule::Type::RegExpr: {
2339 if (rule.isDotRegex) {
2340 dotRegex.append(rule, nullptr);
2341 break;
2342 }
2343
2344 // check that `rule` does not have another RegExpr as a prefix
2345 RuleIterator ruleIterator(observedRules, observedRule);
2346 while (const auto *rulePtr = ruleIterator.next()) {
2347 if (isUnreachable) {
2348 break;
2349 }
2350 const auto &rule2 = *rulePtr;
2351 if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2352 && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) {
2353 bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2354 if (!add) {
2355 // \s.* (sanitized = \s) is considered hiding \s*\S
2356 // we check the quantifiers to see if this is the case
2357 auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2358 auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2359 auto c3 = rule2.sanitizedString.back().unicode();
2360 if (c3 == '*' || c3 == '?' || c3 == '+') {
2361 add = true;
2362 } else if (c1 == '*' || c1 == '?') {
2363 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2364 } else {
2365 add = true;
2366 }
2367 }
2368 if (add) {
2369 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2370 }
2371 }
2372 }
2373
2374 Q_FALLTHROUGH();
2375 }
2376 // check if a rule does not have another rule as a prefix
2377 case Context::Rule::Type::WordDetect:
2378 case Context::Rule::Type::StringDetect: {
2379 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix
2380 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2381 RuleIterator ruleIterator(observedRules, observedRule);
2382 while (const auto *rulePtr = ruleIterator.next()) {
2383 if (isUnreachable) {
2384 break;
2385 }
2386
2387 const auto &rule2 = *rulePtr;
2388 if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2389 continue;
2390 }
2391
2392 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2393 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2394 if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) {
2395 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2396 }
2397 }
2398 }
2399
2400 // string used for comparison and truncated from "dynamic" part
2401 QStringView s = rule.string;
2402
2403 // truncate to '%' with dynamic rules
2404 if (rule.dynamic == XmlBool::True) {
2405 static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)"));
2406 auto result = dynamicPosition.match(rule.string);
2407 s = s.sliced(0, result.capturedLength());
2408 // check if hidden by DetectChar/AnyChar
2409 if (s.size() + 2 <= rule.string.size()) {
2410 auto tables = CharTableArray(dynamicDetectChars, rule);
2411 updateUnreachable1(tables.find(s.data()[s.size() + 2]));
2412 }
2413 }
2414
2415 QString sanitizedRegex;
2416 // truncate to special character with RegExpr.
2417 // If regexp contains '|', `s` becomes empty.
2418 if (rule.type == Context::Rule::Type::RegExpr) {
2419 static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2420 static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\‍([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2421 const qsizetype result = regularChars.match(rule.string).capturedLength();
2422 const qsizetype pos = qMin(result, s.size());
2423 if (rule.string.indexOf(u'|', pos) < pos) {
2424 sanitizedRegex = rule.string.sliced(0, qMin(result, s.size()));
2425 sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1"));
2426 s = sanitizedRegex;
2427 } else {
2428 s = QStringView();
2429 }
2430 }
2431
2432 // check if hidden by DetectChar/AnyChar
2433 if (s.size() > 0) {
2434 auto t = CharTableArray(detectChars, rule);
2435 if (rule.insensitive != XmlBool::True) {
2436 updateUnreachable1(t.find(s[0]));
2437 } else {
2438 QChar c2[]{s[0].toLower(), s[0].toUpper()};
2439 updateUnreachable2(t.find(QStringView(c2, 2)));
2440 }
2441
2442 // StringDetect is a DetectChar
2443 if (rule.type == Context::Rule::Type::StringDetect && rule.string.size() == 1) {
2444 auto tables = CharTableArray(detectChars, rule);
2445 auto c = rule.string[0];
2446 if (rule.insensitive != XmlBool::True) {
2447 c = c.toLower();
2448 tables.removeNonSpecialWhenSpecial();
2449 tables.append(c, rule);
2450 c = c.toUpper();
2451 }
2452 tables.removeNonSpecialWhenSpecial();
2453 tables.append(c, rule);
2454 }
2455 }
2456
2457 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s
2458 if (s.size() > 0 && !isUnreachable) {
2459 // combination of uppercase and lowercase
2460 RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2461
2462 RuleIterator ruleIterator(observedRules, observedRule);
2463 while (const auto *rulePtr = ruleIterator.next()) {
2464 if (isUnreachable) {
2465 break;
2466 }
2467 const auto &rule2 = *rulePtr;
2468 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2469 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2470
2471 switch (rule2.type) {
2472 // check that it is not a detectChars prefix
2473 case Context::Rule::Type::Detect2Chars:
2474 if (isCompatible(rule2) && s.size() >= 2) {
2475 if (rule.insensitive != XmlBool::True) {
2476 if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2477 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2478 }
2479 } else {
2480 // when the string is case insensitive,
2481 // all 4 upper/lower case combinations must be found
2482 auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) {
2483 if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2484 x = {&rule2, ruleIterator.currentIncludeRules()};
2485 }
2486 };
2487 set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2488 set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2489 set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2490 set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2491
2492 if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2493 && detect2CharsInsensitives[3]) {
2494 isUnreachable = true;
2495 unreachableBy.append(detect2CharsInsensitives[0]);
2496 unreachableBy.append(detect2CharsInsensitives[1]);
2497 unreachableBy.append(detect2CharsInsensitives[2]);
2498 unreachableBy.append(detect2CharsInsensitives[3]);
2499 }
2500 }
2501 }
2502 break;
2503
2504 // check that it is not a StringDetect prefix
2505 case Context::Rule::Type::StringDetect:
2506 if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2507 && s.startsWith(rule2.string, caseSensitivity)) {
2508 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2509 }
2510 break;
2511
2512 // check if a WordDetect is hidden by another WordDetect
2513 case Context::Rule::Type::WordDetect:
2514 if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2515 && 0 == rule.string.compare(rule2.string, caseSensitivity)) {
2516 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2517 }
2518 break;
2519
2520 default:;
2521 }
2522 }
2523 }
2524
2525 break;
2526 }
2527
2528 // check if hidden by another keyword rule
2529 case Context::Rule::Type::keyword: {
2530 RuleIterator ruleIterator(observedRules, observedRule);
2531 while (const auto *rulePtr = ruleIterator.next()) {
2532 if (isUnreachable) {
2533 break;
2534 }
2535 const auto &rule2 = *rulePtr;
2536 if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2537 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2538 }
2539 }
2540 // TODO check that all keywords are hidden by another rules
2541 break;
2542 }
2543
2544 // add characters in those used but without checking if they are already.
2545 // <DetectChar char="}" />
2546 // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked
2547 // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added
2548 // <DetectChar char="{" /> <- hidden by previous rule
2549 case Context::Rule::Type::IncludeRules:
2550 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2551 break;
2552 }
2553
2554 if (auto &ruleAndInclude = includeContexts[rule.context.context]) {
2555 updateUnreachable1(ruleAndInclude);
2556 } else {
2557 ruleAndInclude.rule = &rule;
2558 }
2559
2560 for (const auto *rulePtr : rule.includedIncludeRules) {
2561 includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule});
2562 }
2563
2564 if (observedRule.includeRules) {
2565 break;
2566 }
2567
2568 for (const auto *rulePtr : rule.includedRules) {
2569 const auto &rule2 = *rulePtr;
2570 switch (rule2.type) {
2571 case Context::Rule::Type::AnyChar: {
2572 auto tables = CharTableArray(detectChars, rule2);
2573 tables.removeNonSpecialWhenSpecial();
2574 tables.append(rule2.string, rule2, &rule);
2575 break;
2576 }
2577
2578 case Context::Rule::Type::DetectChar: {
2579 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2580 auto tables = CharTableArray(chars4, rule2);
2581 tables.removeNonSpecialWhenSpecial();
2582 tables.append(rule2.char0, rule2, &rule);
2583 break;
2584 }
2585
2586 case Context::Rule::Type::DetectSpaces: {
2587 auto tables = CharTableArray(detectChars, rule2);
2588 tables.removeNonSpecialWhenSpecial();
2589 tables.append(u' ', rule2, &rule);
2590 tables.append(u'\t', rule2, &rule);
2591 break;
2592 }
2593
2594 case Context::Rule::Type::HlCChar:
2595 hlCCharRule.setRule(rule2, &rule);
2596 break;
2597
2598 case Context::Rule::Type::HlCHex:
2599 hlCHexRule.setRule(rule2, &rule);
2600 break;
2601
2602 case Context::Rule::Type::HlCOct:
2603 hlCOctRule.setRule(rule2, &rule);
2604 break;
2605
2606 case Context::Rule::Type::HlCStringChar:
2607 hlCStringCharRule.setRule(rule2, &rule);
2608 break;
2609
2610 case Context::Rule::Type::Int:
2611 intRule.setRule(rule2, &rule);
2612 break;
2613
2614 case Context::Rule::Type::Float:
2615 floatRule.setRule(rule2, &rule);
2616 break;
2617
2618 case Context::Rule::Type::LineContinue: {
2619 auto tables = CharTableArray(lineContinueChars, rule2);
2620 tables.removeNonSpecialWhenSpecial();
2621 tables.append(rule2.char0, rule2, &rule);
2622 break;
2623 }
2624
2625 case Context::Rule::Type::RegExpr:
2626 if (rule2.isDotRegex) {
2627 dotRegex.append(rule2, &rule);
2628 }
2629 break;
2630
2631 case Context::Rule::Type::StringDetect: {
2632 // StringDetect is a DetectChar
2633 if (rule2.string.size() == 1 || (rule2.string.size() == 2 && rule2.dynamic == XmlBool::True)) {
2634 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2635 auto tables = CharTableArray(chars4, rule2);
2636 tables.removeNonSpecialWhenSpecial();
2637 tables.append(rule2.string.back(), rule2, &rule);
2638 }
2639 break;
2640 }
2641
2642 case Context::Rule::Type::WordDetect:
2643 case Context::Rule::Type::Detect2Chars:
2644 case Context::Rule::Type::IncludeRules:
2645 case Context::Rule::Type::DetectIdentifier:
2646 case Context::Rule::Type::keyword:
2647 case Context::Rule::Type::Unknown:
2648 case Context::Rule::Type::RangeDetect:
2649 break;
2650 }
2651 }
2652 break;
2653
2654 case Context::Rule::Type::Unknown:
2655 break;
2656 }
2657
2658 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2659 auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2660 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2661 unreachableIncludedRule.unreachableBy.append(unreachableBy);
2662 } else {
2663 unreachableIncludedRule.alwaysUnreachable = false;
2664 }
2665 } else if (isUnreachable) {
2666 success = false;
2667 QString message;
2668 message.reserve(128);
2669 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
2670 message += u"line "_sv;
2671 if (ruleAndInclude.includeRules) {
2672 message += QString::number(ruleAndInclude.includeRules->line);
2673 message += u" [by '"_sv;
2674 message += ruleAndInclude.includeRules->context.name;
2675 message += u"' line "_sv;
2676 message += QString::number(ruleAndInclude.rule->line);
2677 if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2678 message += u" ("_sv;
2679 message += ruleAndInclude.rule->filename;
2680 message += u')';
2681 }
2682 message += u']';
2683 } else {
2684 message += QString::number(ruleAndInclude.rule->line);
2685 }
2686 message += u", "_sv;
2687 }
2688 message.chop(2);
2689 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message;
2690 }
2691 }
2692
2693 return success;
2694 }
2695
2696 //! Proposes to merge certain rule sequences
2697 //! - several DetectChar/AnyChar into AnyChar
2698 //! - several RegExpr into one RegExpr
2699 bool suggestRuleMerger(const QString &filename, const Context &context) const
2700 {
2701 bool success = true;
2702
2703 if (context.rules.isEmpty()) {
2704 return success;
2705 }
2706
2707 auto it = context.rules.begin();
2708 const auto end = context.rules.end() - 1;
2709
2710 for (; it < end; ++it) {
2711 const auto &rule1 = *it;
2712 const auto &rule2 = it[1];
2713
2714 auto isCommonCompatible = [&] {
2715 if (rule1.lookAhead != rule2.lookAhead) {
2716 return false;
2717 }
2718 // ignore attribute when lookAhead is true
2719 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2720 return false;
2721 }
2722 // clang-format off
2723 return rule1.beginRegion == rule2.beginRegion
2724 && rule1.endRegion == rule2.endRegion
2725 && rule1.firstNonSpace == rule2.firstNonSpace
2726 && rule1.context.context == rule2.context.context
2727 && rule1.context.popCount == rule2.context.popCount;
2728 // clang-format on
2729 };
2730
2731 switch (rule1.type) {
2732 // request to merge StringDetect with AnyChar
2733 case Context::Rule::Type::StringDetect:
2734 if (rule1.string.size() != 1 || rule1.dynamic == XmlBool::True) {
2735 break;
2736 }
2737 Q_FALLTHROUGH();
2738 // request to merge AnyChar/DetectChar
2739 case Context::Rule::Type::AnyChar:
2740 case Context::Rule::Type::DetectChar:
2741 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar
2742 || (rule2.type == Context::Rule::Type::StringDetect && rule2.dynamic != XmlBool::True && rule2.string.size() == 1))
2743 && isCommonCompatible() && rule1.column == rule2.column) {
2744 qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule";
2745 success = false;
2746 }
2747 break;
2748
2749 // request to merge multiple RegExpr
2750 case Context::Rule::Type::RegExpr:
2751 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2752 && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2753 qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule";
2754 success = false;
2755 }
2756 break;
2757
2758 case Context::Rule::Type::DetectSpaces:
2759 case Context::Rule::Type::HlCChar:
2760 case Context::Rule::Type::HlCHex:
2761 case Context::Rule::Type::HlCOct:
2762 case Context::Rule::Type::HlCStringChar:
2763 case Context::Rule::Type::Int:
2764 case Context::Rule::Type::Float:
2765 case Context::Rule::Type::LineContinue:
2766 case Context::Rule::Type::WordDetect:
2767 case Context::Rule::Type::Detect2Chars:
2768 case Context::Rule::Type::IncludeRules:
2769 case Context::Rule::Type::DetectIdentifier:
2770 case Context::Rule::Type::keyword:
2771 case Context::Rule::Type::Unknown:
2772 case Context::Rule::Type::RangeDetect:
2773 break;
2774 }
2775 }
2776
2777 return success;
2778 }
2779
2780 //! Initialize the referenced context (ContextName::context)
2781 //! Some input / output examples are:
2782 //! - "#stay" -> ""
2783 //! - "#pop" -> ""
2784 //! - "Comment" -> "Comment"
2785 //! - "#pop!Comment" -> "Comment"
2786 //! - "##ISO C++" -> ""
2787 //! - "Comment##ISO C++"-> "Comment" in ISO C++
2788 void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line)
2789 {
2790 QStringView name = contextName.name;
2791 if (name.isEmpty()) {
2792 contextName.stay = true;
2793 } else if (name.startsWith(u"#stay"_sv)) {
2794 contextName.stay = true;
2795 if (name.size() > 5) {
2796 qWarning() << definition.filename << "line" << line << "invalid context in" << context.name;
2797 m_success = false;
2798 }
2799 } else {
2800 while (name.startsWith(u"#pop"_sv)) {
2801 name = name.sliced(4);
2802 ++contextName.popCount;
2803 }
2804
2805 if (contextName.popCount && !name.isEmpty()) {
2806 if (name.startsWith(u'!') && name.size() > 1) {
2807 name = name.sliced(1);
2808 } else {
2809 qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name;
2810 m_success = false;
2811 }
2812 }
2813
2814 if (!name.isEmpty()) {
2815 const int idx = name.indexOf(u"##"_sv);
2816 if (idx == -1) {
2817 auto it = definition.contexts.find(name.toString());
2818 if (it != definition.contexts.end()) {
2819 contextName.context = &*it;
2820 }
2821 } else {
2822 auto defName = name.sliced(idx + 2);
2823 auto it = m_definitions.find(defName.toString());
2824 if (it != m_definitions.end()) {
2825 auto listName = name.sliced(0, idx).toString();
2826 definition.referencedDefinitions.insert(&*it);
2827 auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName);
2828 if (ctxIt != it->contexts.end()) {
2829 contextName.context = &*ctxIt;
2830 }
2831 } else {
2832 qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name;
2833 m_success = false;
2834 }
2835 }
2836
2837 if (!contextName.context) {
2838 qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name;
2839 m_success = false;
2840 }
2841 }
2842 }
2843 }
2844
2845 QMap<QString, Definition> m_definitions;
2847 Definition *m_currentDefinition = nullptr;
2848 Keywords *m_currentKeywords = nullptr;
2849 Context *m_currentContext = nullptr;
2850 // xml reader variable
2851 //@{
2852 QString m_textContent;
2853 bool m_inKeywordItem = false;
2854 //@}
2855 bool m_success = true;
2856};
2857
2858class HlCompressor
2859{
2860public:
2861 HlCompressor(const QString &kateVersion)
2862 : m_kateVersion(kateVersion)
2863 {
2864 m_hasElems.push_back(true);
2865 }
2866
2867 const QString &compressedXML() const
2868 {
2869 return m_data;
2870 }
2871
2872 /**
2873 * Reduce xml space by removing what is superfluous.
2874 * - transforms boolean values into 0 or 1.
2875 * - remove unused attributes.
2876 * - remove spaces and comments.
2877 * - remove context attributes referring to #stay (because this is the default).
2878 * - replace Detect2Chars with StringDetect (String="xy" is shorter than char="x" char1="y").
2879 * - sort contexts by frequency of use to accelerate their search during loading.
2880 */
2881 void processElement(const QXmlStreamReader &xml)
2882 {
2883 switch (xml.tokenType()) {
2885 closePreviousOpenTag(m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data);
2886 m_hasElems.push_back(false);
2887
2888 const auto tagName = xml.name();
2889 if (tagName == u"contexts"_sv) {
2890 m_inContexts = true;
2891 m_data += u"<contexts"_sv;
2892 } else if (m_inContexts) {
2893 Context &ctx = (m_contexts.empty() || tagName == u"context"_sv) ? m_contexts.emplace_back() : m_contexts.back();
2894 QString &out = ctx.data;
2895 const bool isDetect2Chars = tagName == u"Detect2Chars"_sv;
2896 out += u'<' % (isDetect2Chars ? u"StringDetect"_sv : tagName);
2897
2898 auto attrs = xml.attributes();
2899 sortAttributes(attrs);
2900 for (const auto &attr : attrs) {
2901 const auto attrName = attr.name();
2902 auto value = attr.value();
2903 // transform Detect2Chars char and char1 attributes to StringDetect String attribute
2904 if (isDetect2Chars && (attrName == u"char"_sv || attrName == u"char1"_sv)) {
2905 if (attrName == u"char"_sv) {
2906 const auto ch0 = value;
2907 const auto ch1 = attrs.value(u"char1"_sv);
2908 QChar chars[]{ch0.isEmpty() ? u' ' : ch0[0], ch1.isEmpty() ? u' ' : ch1[0]};
2909 writeXmlAttribute(out, u"String"_sv, QStringView(chars, 2), tagName);
2910 }
2911 } else if (attrName == u"context"_sv || attrName == u"lineEndContext"_sv || attrName == u"fallthroughContext"_sv
2912 || attrName == u"lineEmptyContext"_sv) {
2913 // ignore #stay context because this is the default
2914 if (value != u"#stay"_sv) {
2915 writeXmlAttribute(out, attrName, value, tagName);
2916
2917 /*
2918 * Extract context name and increment context counter
2919 */
2920 bool hasPop = false;
2921 while (value.startsWith(u"#pop"_sv)) {
2922 hasPop = true;
2923 value = value.sliced(4);
2924 }
2925 if (hasPop && !value.isEmpty()) {
2926 value = value.sliced(1);
2927 }
2928 if (!value.isEmpty() && -1 == value.indexOf(u"##"_sv)) {
2929 m_contextRefs[value.toString()]++;
2930 }
2931 }
2932 } else if (tagName == u"LineContinue"_sv && attrName == u"char"_sv && value == u"\\") {
2933 // ignore char="\\" with LineContinue
2934 } else {
2935 if (attrName == u"name"_sv) {
2936 ctx.name = value.toString();
2937 }
2938 writeXmlAttribute(out, attrName, value, tagName);
2939 }
2940 }
2941 } else {
2942 m_data += u'<' % tagName;
2943 const auto attrs = xml.attributes();
2944 for (const auto &attr : attrs) {
2945 auto name = attr.name();
2946 auto value = (name == u"kateversion") ? QStringView(m_kateVersion) : attr.value();
2947 writeXmlAttribute(m_data, name, value, tagName);
2948 }
2949 }
2950 break;
2951 }
2952
2954 const auto name = xml.name();
2955 if (m_inContexts && !m_contexts.empty() && name == u"contexts"_sv) {
2956 m_inContexts = false;
2957 // sorting contexts by the most used (ignore first context)
2958 std::sort(m_contexts.begin() + 1, m_contexts.end(), [&](auto &ctx1, auto &ctx2) {
2959 auto i1 = m_contextRefs.value(ctx1.name);
2960 auto i2 = m_contextRefs.value(ctx2.name);
2961 if (i1 != i2) {
2962 return i1 > i2;
2963 }
2964 // for a reproducible build, contexts with the same number of uses are sorted by name
2965 return ctx1.name < ctx2.name;
2966 });
2967 for (const auto &ctx : m_contexts) {
2968 m_data += ctx.data;
2969 }
2970 }
2971
2972 QString &out = m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data;
2973 if (m_hasElems.back()) {
2974 out += u"</"_sv % name % u'>';
2975 } else {
2976 out += u"/>"_sv;
2977 }
2978 m_hasElems.pop_back();
2979 break;
2980 }
2981
2984 if (!m_inContexts && !xml.isWhitespace()) {
2985 closePreviousOpenTag(m_data);
2986 writeXmlText(m_data, xml.text());
2987 }
2988 break;
2989
2990 default:;
2991 }
2992 }
2993
2994private:
2995 void closePreviousOpenTag(QString &out)
2996 {
2997 if (!m_hasElems.back()) {
2998 m_hasElems.back() = true;
2999 out += u'>';
3000 }
3001 }
3002
3003 /**
3004 * Write \p text escaping special characters.
3005 */
3006 static void writeXmlText(QString &out, QStringView text, bool escapeDQ = false)
3007 {
3008 for (const QChar &c : text) {
3009 if (c == u'<') {
3010 out += u"&lt;"_sv;
3011 } else if (c == u'&') {
3012 out += u"&amp;"_sv;
3013 } else if (escapeDQ && c == u'"') {
3014 out += u"&#34;"_sv;
3015 } else if (c == u'\t') {
3016 // non-space whitespace character in an attribute is remplaced with space...
3017 out += u"&#9;"_sv;
3018 } else {
3019 out += c;
3020 }
3021 }
3022 }
3023
3024 /**
3025 * Write attribut in \p out.
3026 * Booleans are converted to 0, 1 or ignored if this corresponds to the default value.
3027 * Values will be written with either double quotes or single quotes,
3028 * depending on which takes up the least space
3029 */
3030 static void writeXmlAttribute(QString &out, QStringView attrName, QStringView value, QStringView tagName)
3031 {
3032 enum class DefaultBool {
3033 // default value is false
3034 False,
3035 // default value is true
3036 True,
3037 // manipulate as a tribool whose attribute absence is equivalent to None
3038 None,
3039 // not used
3040 Ignored,
3041 // default value is false, but None for <keyword>
3042 FalseOrKeywordTag,
3043 // default value is true, but depends on another value for <keywords>
3044 TrueOrKeywordsTag,
3045 // default is false, but ignored in <context>
3046 DynamicAttr,
3047 };
3048 static const QHash<QStringView, DefaultBool> booleanAttrs({
3049 {u"fallthrough"_sv, DefaultBool::Ignored},
3050 {u"dynamic"_sv, DefaultBool::DynamicAttr},
3051 {u"hidden"_sv, DefaultBool::False},
3052 {u"indentationsensitive"_sv, DefaultBool::False},
3053 {u"noIndentationBasedFolding"_sv, DefaultBool::False},
3054 {u"lookAhead"_sv, DefaultBool::False},
3055 {u"firstNonSpace"_sv, DefaultBool::False},
3056 {u"insensitive"_sv, DefaultBool::FalseOrKeywordTag},
3057 {u"minimal"_sv, DefaultBool::False},
3058 {u"includeAttrib"_sv, DefaultBool::False},
3059 {u"italic"_sv, DefaultBool::None},
3060 {u"bold"_sv, DefaultBool::None},
3061 {u"underline"_sv, DefaultBool::None},
3062 {u"strikeOut"_sv, DefaultBool::None},
3063 {u"spellChecking"_sv, DefaultBool::True},
3064 {u"casesensitive"_sv, DefaultBool::TrueOrKeywordsTag},
3065 {u"ignored"_sv, DefaultBool::Ignored},
3066 });
3067
3068 auto it = booleanAttrs.find(attrName);
3069 // convert boolean value
3070 if (it != booleanAttrs.end()) {
3071 bool b = KSyntaxHighlighting::Xml::attrToBool(value);
3072 bool ignoreAttr = false;
3073 switch (*it) {
3074 case DefaultBool::Ignored:
3075 ignoreAttr = true;
3076 break;
3077 case DefaultBool::TrueOrKeywordsTag:
3078 ignoreAttr = (tagName == u"keywords"_sv) ? false : b;
3079 break;
3080 case DefaultBool::True:
3081 ignoreAttr = b;
3082 break;
3083 case DefaultBool::FalseOrKeywordTag:
3084 ignoreAttr = (tagName == u"keyword"_sv) ? false : !b;
3085 break;
3086 case DefaultBool::DynamicAttr:
3087 ignoreAttr = (tagName == u"context"_sv) || !b;
3088 break;
3089 case DefaultBool::False:
3090 ignoreAttr = !b;
3091 break;
3092 case DefaultBool::None:
3093 ignoreAttr = false;
3094 break;
3095 }
3096 if (!ignoreAttr) {
3097 out += u' ' % attrName % u"=\""_sv % (b ? u'1' : u'0') % u'"';
3098 }
3099 } else {
3100 const bool hasDQ = value.contains(u'"');
3101 // attribute in double quotes when the value does not contain " or contains " and '
3102 if (!hasDQ || value.contains(u'\'')) {
3103 out += u' ' % attrName % u"=\""_sv;
3104 writeXmlText(out, value, hasDQ);
3105 out += u'"';
3106 // attribute in single quotes because the value contains "
3107 } else {
3108 out += u' ' % attrName % u"='"_sv;
3109 writeXmlText(out, value);
3110 out += u'\'';
3111 }
3112 }
3113 }
3114
3115 /**
3116 * Sort attributes for better compression by rcc.
3117 */
3118 static void sortAttributes(QXmlStreamAttributes &attrs)
3119 {
3120 static const QHash<QStringView, int> priorityAttrs({
3121 // context and rule
3122 {u"attribute"_sv, 5},
3123
3124 // context and itemData
3125 {u"name"_sv, 4},
3126
3127 // context
3128 {u"noIndentationBasedFolding"_sv, 11},
3129 {u"lineEndContext"_sv, 9},
3130 {u"lineEmptyContext"_sv, 8},
3131 {u"fallthroughContext"_sv, 7},
3132
3133 // rule
3134 {u"lookAhead"_sv, 100},
3135 {u"firstNonSpace"_sv, 99},
3136 {u"dynamic"_sv, 98},
3137 {u"minimal"_sv, 97},
3138 {u"includeAttrib"_sv, 96},
3139 {u"insensitive"_sv, 95},
3140 {u"column"_sv, 50},
3141 {u"beginRegion"_sv, 40},
3142 {u"endRegion"_sv, 41},
3143 {u"weakDeliminator"_sv, 31},
3144 {u"additionalDeliminator"_sv, 30},
3145 {u"context"_sv, 20},
3146 {u"String"_sv, 2},
3147 {u"char"_sv, 2},
3148
3149 // itemData
3150 {u"strikeOut"_sv, 100},
3151 {u"underline"_sv, 99},
3152 {u"italic"_sv, 98},
3153 {u"bold"_sv, 97},
3154 {u"spellChecking"_sv, 96},
3155 {u"defStyleNum"_sv, 95},
3156 {u"color"_sv, 94},
3157 {u"backgroundColor"_sv, 93},
3158 {u"selBackgroundColor"_sv, 92},
3159 {u"selColor"_sv, 91},
3160 });
3161 std::sort(attrs.begin(), attrs.end(), [](auto &attr1, auto &attr2) {
3162 auto i1 = priorityAttrs.value(attr1.name());
3163 auto i2 = priorityAttrs.value(attr2.name());
3164 if (i1 != i2) {
3165 return i1 < i2;
3166 }
3167 return attr1.name() < attr2.name();
3168 });
3169 }
3170
3171 struct Context {
3172 QString name;
3173 QString data;
3174 };
3175 QString m_data = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE language>"_s;
3176 std::vector<Context> m_contexts;
3177 QHash<QString, int> m_contextRefs;
3178 QVarLengthArray<bool, 8> m_hasElems;
3179 QString m_kateVersion;
3180 bool m_inContexts = false;
3181};
3182
3183void printFileError(const QFile &file)
3184{
3185 qWarning() << "Failed to open" << file.fileName() << "-" << file.errorString();
3186}
3187
3188void printXmlError(const QString &fileName, const QXmlStreamReader &xml)
3189{
3190 qWarning() << fileName << "-" << xml.errorString() << "@ offset" << xml.characterOffset();
3191};
3192
3193QStringList readListing(const QString &fileName)
3194{
3195 QFile file(fileName);
3196 if (!file.open(QIODevice::ReadOnly)) {
3197 printFileError(file);
3198 return QStringList();
3199 }
3200
3201 QXmlStreamReader xml(&file);
3202 QStringList listing;
3203 while (!xml.atEnd()) {
3204 xml.readNext();
3205
3206 // add only .xml files, no .json or stuff
3207 if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) {
3208 listing.append(xml.text().toString());
3209 }
3210 }
3211
3212 if (xml.hasError()) {
3213 printXmlError(fileName, xml);
3214 listing.clear();
3215 }
3216
3217 return listing;
3218}
3219
3220/**
3221 * check if the "extensions" attribute have valid wildcards
3222 * @param extensions extensions string to check
3223 * @return valid?
3224 */
3225bool checkExtensions(QStringView extensions)
3226{
3227 // get list of extensions
3228 const QList<QStringView> extensionParts = extensions.split(u';', Qt::SkipEmptyParts);
3229
3230 // ok if empty
3231 if (extensionParts.isEmpty()) {
3232 return true;
3233 }
3234
3235 // check that only valid wildcard things are inside the parts
3236 for (const auto &extension : extensionParts) {
3237 for (const auto c : extension) {
3238 // eat normal things
3239 if (c.isDigit() || c.isLetter()) {
3240 continue;
3241 }
3242
3243 // allow some special characters
3244 if (c == u'.' || c == u'-' || c == u'_' || c == u'+') {
3245 continue;
3246 }
3247
3248 // only allowed wildcard things: '?' and '*'
3249 if (c == u'?' || c == u'*') {
3250 continue;
3251 }
3252
3253 qWarning() << "invalid character" << c << "seen in extensions wildcard";
3254 return false;
3255 }
3256 }
3257
3258 // all checks passed
3259 return true;
3260}
3261
3262struct CompressedFile {
3263 QString fileName;
3264 QString xmlData;
3265};
3266
3267}
3268
3269int main(int argc, char *argv[])
3270{
3271 // get app instance
3272 QCoreApplication app(argc, argv);
3273
3274 // ensure enough arguments are passed
3275 if (app.arguments().size() < 4) {
3276 return 1;
3277 }
3278
3279#ifdef HAS_XERCESC
3280 // care for proper init and cleanup
3281 XMLPlatformUtils::Initialize();
3282 auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate);
3283
3284 /*
3285 * parse XSD first time and cache it
3286 */
3287 XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager);
3288
3289 // create parser for the XSD
3290 CustomXMLValidator parser(&xsd);
3291
3292 // load grammar into the pool, on error just abort
3293 const auto xsdFile = app.arguments().at(2);
3294 if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || parser.eh.failed()) {
3295 qWarning("Failed to parse XSD %s: %s", qPrintable(xsdFile), qPrintable(parser.messages));
3296 return 2;
3297 }
3298
3299 // lock the pool, no later modifications wanted!
3300 xsd.lockPool();
3301#endif
3302
3303 const QString hlFilenamesListing = app.arguments().value(3);
3304 if (hlFilenamesListing.isEmpty()) {
3305 return 1;
3306 }
3307
3308 QStringList hlFilenames = readListing(hlFilenamesListing);
3309 if (hlFilenames.isEmpty()) {
3310 qWarning("Failed to read %s", qPrintable(hlFilenamesListing));
3311 return 3;
3312 }
3313
3314 // text attributes
3315 const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("alternativeNames") << QStringLiteral("section")
3316 << QStringLiteral("mimetype") << QStringLiteral("extensions") << QStringLiteral("style")
3317 << QStringLiteral("author") << QStringLiteral("license") << QStringLiteral("indenter");
3318
3319 // index all given highlightings
3320 HlFilesChecker filesChecker;
3321 QVariantMap hls;
3322 int anyError = 0;
3323 std::vector<CompressedFile> compressedFiles;
3324 for (const QString &hlFilename : std::as_const(hlFilenames)) {
3325 QFile hlFile(hlFilename);
3326 if (!hlFile.open(QIODevice::ReadOnly)) {
3327 printFileError(hlFile);
3328 anyError = 3;
3329 continue;
3330 }
3331
3332#ifdef HAS_XERCESC
3333 // create parser
3334 CustomXMLValidator parser(&xsd);
3335
3336 // parse the XML file
3337 parser.parse((const char16_t *)hlFile.fileName().utf16());
3338
3339 // report issues
3340 if (parser.eh.failed()) {
3341 qWarning("Failed to validate XML %s: %s", qPrintable(hlFile.fileName()), qPrintable(parser.messages));
3342 anyError = 4;
3343 continue;
3344 }
3345#endif
3346
3347 // read the needed attributes from toplevel language tag
3348 hlFile.reset();
3349 QXmlStreamReader xml(&hlFile);
3350 if (xml.readNextStartElement()) {
3351 if (xml.name() != QLatin1String("language")) {
3352 anyError = 5;
3353 continue;
3354 }
3355 } else {
3356 anyError = 6;
3357 continue;
3358 }
3359
3360 // map to store hl info
3361 QVariantMap hl;
3362
3363 // transfer text attributes
3364 for (const QString &attribute : std::as_const(textAttributes)) {
3365 hl[attribute] = xml.attributes().value(attribute).toString();
3366 }
3367
3368 // check if extensions have the right format
3369 if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) {
3370 qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString();
3371 anyError = 23;
3372 }
3373
3374 // numerical attributes
3375 hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt();
3376 hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt();
3377
3378 // add boolean one
3379 hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden")));
3380
3381 // keep some strings as UTF-8 for faster translations
3382 hl[QStringLiteral("nameUtf8")] = hl[QStringLiteral("name")].toString().toUtf8();
3383 hl[QStringLiteral("sectionUtf8")] = hl[QStringLiteral("section")].toString().toUtf8();
3384
3385 // remember hl
3386 hls[QFileInfo(hlFile).fileName()] = hl;
3387
3388 const QStringView kateversion = xml.attributes().value(QStringLiteral("kateversion"));
3389 const QString hlName = hl[QStringLiteral("name")].toString();
3390 const QString hlAlternativeNames = hl[QStringLiteral("alternativeNames")].toString();
3391
3392 filesChecker.setDefinition(kateversion, hlFilename, hlName, hlAlternativeNames.split(u';', Qt::SkipEmptyParts));
3393
3394 // As the compressor removes "fallthrough" attribute which is required with
3395 // "fallthroughContext" before the 5.62 version, the minimum version is
3396 // automatically increased
3397 HlCompressor compressor((filesChecker.currentVersion() < KateVersion{5, 62}) ? u"5.62"_s : kateversion.toString());
3398 compressor.processElement(xml);
3399
3400 // scan for broken regex or keywords with spaces
3401 while (!xml.atEnd()) {
3402 xml.readNext();
3403 filesChecker.processElement(xml);
3404 compressor.processElement(xml);
3405 }
3406
3407 if (xml.hasError()) {
3408 anyError = 33;
3409 printXmlError(hlFilename, xml);
3410 }
3411
3412 compressedFiles.emplace_back(CompressedFile{
3413 QFileInfo(hlFilename).fileName(),
3414 compressor.compressedXML(),
3415 });
3416 }
3417
3418 filesChecker.resolveContexts();
3419
3420 if (!filesChecker.check()) {
3421 anyError = 7;
3422 }
3423
3424 // bail out if any problem was seen
3425 if (anyError) {
3426 return anyError;
3427 }
3428
3429 // check compressed file
3430 HlFilesChecker filesChecker2;
3431 const QString compressedDir = app.arguments().at(4) + u"/"_sv;
3432 for (const auto &compressedFile : std::as_const(compressedFiles)) {
3433 const auto outFileName = compressedDir + compressedFile.fileName;
3434 auto utf8Data = compressedFile.xmlData.toUtf8();
3435
3436#ifdef HAS_XERCESC
3437 // create parser
3438 CustomXMLValidator parser(&xsd);
3439
3440 auto utf8Filename = outFileName.toUtf8();
3441 utf8Filename.append('\0');
3442 // parse the XML file
3443 MemBufInputSource membuf(reinterpret_cast<const XMLByte *>(utf8Data.constData()), utf8Data.size(), utf8Filename.data());
3444
3445 // report issues
3446 if (parser.eh.failed()) {
3447 qWarning("Failed to validate XML %s: %s", qPrintable(outFileName), qPrintable(parser.messages));
3448 return 8;
3449 }
3450#endif
3451
3452 QBuffer buffer(&utf8Data);
3453 buffer.open(QBuffer::ReadOnly);
3454 QXmlStreamReader xml(&buffer);
3455 // scan for broken file
3456 while (!xml.atEnd()) {
3457 if (xml.readNext() == QXmlStreamReader::TokenType::StartElement && xml.name() == u"language"_sv) {
3458 const auto attrs = xml.attributes();
3459 const auto version = attrs.value(u"kateversion"_sv);
3460 const QString hlName = attrs.value(u"name"_sv).toString();
3461 const QString hlAlternativeNames = attrs.value(u"alternativeNames"_sv).toString();
3462 filesChecker2.setDefinition(version, outFileName, hlName, hlAlternativeNames.split(u';', Qt::SkipEmptyParts));
3463 }
3464 filesChecker2.processElement(xml);
3465 }
3466
3467 if (xml.hasError()) {
3468 printXmlError(outFileName, xml);
3469 return 9;
3470 }
3471
3472 // create outfile, after all has worked!
3473 QFile outFile(outFileName);
3474 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
3475 return 10;
3476 }
3477 outFile.write(utf8Data);
3478 }
3479
3480 filesChecker2.resolveContexts();
3481
3482 // bail out if any problem was seen
3483 if (!filesChecker2.check()) {
3484 return 11;
3485 }
3486
3487 // create outfile, after all has worked!
3488 QFile outFile(app.arguments().at(1));
3489 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
3490 return 12;
3491 }
3492
3493 // write out json
3494 outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor());
3495
3496 // be done
3497 return 0;
3498}
AKONADI_MIME_EXPORT const char Ignored[]
Type type(const QSqlDatabase &db)
char * toString(const EngineQuery &query)
KDB_EXPORT KDbVersionInfo version()
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
KIOCORE_EXPORT void add(const QString &fileClass, const QString &directory)
QString name(StandardAction id)
const QList< QKeySequence > & next()
const QList< QKeySequence > & find()
const QList< QKeySequence > & end()
const QList< QKeySequence > & replace()
KTEXTEDITOR_EXPORT size_t qHash(KTextEditor::Cursor cursor, size_t seed=0) noexcept
bool operator<(const PosRange< Trait > &l, const PosRange< Trait > &r)
bool operator==(const StyleDelim &l, const StyleDelim &r)
QCborValue fromVariant(const QVariant &variant)
bool isDigit(char32_t ucs4)
bool isLetter(char32_t ucs4)
char32_t toLower(char32_t ucs4)
char32_t toUpper(char32_t ucs4)
char16_t & unicode()
virtual QString fileName() const const override
bool open(FILE *fh, OpenMode mode, FileHandleFlags handleFlags)
QString fileName() const const
iterator find(const Key &key)
QString errorString() const const
void append(QList< T > &&value)
iterator begin()
void clear()
iterator end()
bool isEmpty() const const
void push_back(parameter_type value)
void reserve(qsizetype size)
qsizetype size() const const
iterator end()
iterator find(const Key &key)
iterator insert(const Key &key, const T &value)
QString errorString() const const
bool isValid() const const
QString pattern() const const
qsizetype patternErrorOffset() const const
void clear()
bool contains(const QSet< T > &other) const const
iterator insert(const T &value)
qsizetype size() const const
QString & append(QChar ch)
const QChar at(qsizetype position) const const
QChar & back()
void chop(qsizetype n)
QString fromUtf16(const char16_t *unicode, qsizetype size)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString number(double n, char format, int precision)
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
void reserve(qsizetype size)
qsizetype size() const const
QString sliced(qsizetype pos) const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QByteArray toUtf8() const const
bool contains(QChar c, Qt::CaseSensitivity cs) const const
const_pointer data() const const
QChar first() const const
qsizetype indexOf(QChar c, qsizetype from, Qt::CaseSensitivity cs) const const
bool isNull() const const
qsizetype size() const const
QStringView sliced(qsizetype pos) const const
QList< QStringView > split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar ch) const const
int toInt(bool *ok, int base) const const
QString toString() const const
CaseInsensitive
SkipEmptyParts
QTextStream & endl(QTextStream &stream)
QStringView name() const const
QStringView value() const const
QStringView value(QAnyStringView namespaceUri, QAnyStringView name) const const
bool atEnd() const const
QXmlStreamAttributes attributes() const const
qint64 characterOffset() const const
QString errorString() const const
bool hasError() const const
bool isCharacters() const const
bool isWhitespace() const const
qint64 lineNumber() const const
QStringView name() const const
TokenType readNext()
bool readNextStartElement()
QStringView text() const const
TokenType tokenType() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri May 2 2025 12:03:13 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.