10#include <QCoreApplication>
14#include <QMutableMapIterator>
15#include <QRegularExpression>
19#include <QXmlStreamReader>
23#include <xercesc/framework/MemBufInputSource.hpp>
24#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
26#include <xercesc/parsers/SAX2XMLReaderImpl.hpp>
28#include <xercesc/sax/ErrorHandler.hpp>
29#include <xercesc/sax/SAXParseException.hpp>
31#include <xercesc/util/PlatformUtils.hpp>
32#include <xercesc/util/XMLString.hpp>
33#include <xercesc/util/XMLUni.hpp>
35#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
36#include <xercesc/validators/common/Grammar.hpp>
38using namespace xercesc;
57class CustomErrorHandler :
public ErrorHandler
64 CustomErrorHandler(QString *messages)
65 : m_messages(messages)
82 enum severity { s_warning, s_error, s_fatal };
88 void warning(
const SAXParseException &e)
override
98 void error(
const SAXParseException &e)
override
108 void fatalError(
const SAXParseException &e)
override
117 void resetErrors()
override
127 void handle(
const SAXParseException &e, severity s)
130 const XMLCh *xid(e.getPublicId());
132 xid = e.getSystemId();
134 m_messages <<
QString::fromUtf16(xid) <<
":" << e.getLineNumber() <<
":" << e.getColumnNumber() <<
" " << (s == s_warning ?
"warning: " :
"error: ")
142 QTextStream m_messages;
147 bool m_failed =
false;
150class CustomXMLValidator :
public SAX2XMLReaderImpl
154 CustomErrorHandler eh{&messages};
156 CustomXMLValidator(XMLGrammarPool *xsd)
157 : SAX2XMLReaderImpl(XMLPlatformUtils::fgMemoryManager, xsd)
161 setFeature(XMLUni::fgSAX2CoreNameSpaces,
true);
162 setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes,
true);
163 setFeature(XMLUni::fgSAX2CoreValidation,
true);
167 setFeature(XMLUni::fgXercesSchema,
true);
168 setFeature(XMLUni::fgXercesSchemaFullChecking,
true);
169 setFeature(XMLUni::fgXercesValidationErrorAsFatal,
true);
173 setFeature(XMLUni::fgXercesUseCachedGrammarInParse,
true);
178 setFeature(XMLUni::fgXercesLoadSchema,
false);
183 setFeature(XMLUni::fgXercesHandleMultipleImports,
true);
185 setErrorHandler(&eh);
191#include "../lib/worddelimiters_p.h"
192#include "../lib/xml_p.h"
196using KSyntaxHighlighting::WordDelimiters;
197using KSyntaxHighlighting::Xml::attrToBool;
201#if QT_VERSION < QT_VERSION_CHECK(6, 10, 0)
202static constexpr QStringView operator""_sv(
const char16_t *s, std::size_t n)
215 KateVersion(
int majorRevision = 0,
int minorRevision = 0)
216 : majorRevision(majorRevision)
217 , minorRevision(minorRevision)
221 bool operator<(
const KateVersion &version)
const
223 return majorRevision <
version.majorRevision || (majorRevision ==
version.majorRevision && minorRevision <
version.minorRevision);
230 void setDefinition(QStringView verStr,
const QString &filename,
const QString &name,
const QStringList &alternativeNames)
232 m_currentDefinition = &*m_definitions.
insert(name, Definition{});
233 m_currentDefinition->languageName =
name;
234 m_currentDefinition->filename = filename;
235 m_currentDefinition->kateVersionStr = verStr.
toString();
236 m_currentKeywords =
nullptr;
237 m_currentContext =
nullptr;
239 const auto idx = verStr.
indexOf(u
'.');
241 qWarning() << filename <<
"invalid kateversion" << verStr;
247 auto checkName = [
this, &filename](
char const *nameType,
const QString &
name) {
248 auto it = m_names.find(name);
249 if (it != m_names.end()) {
250 qWarning() << filename <<
"duplicate" << nameType <<
"with" << it.value();
253 m_names.insert(name, filename);
256 checkName(
"name", name);
257 for (
const auto &alternativeName : alternativeNames) {
258 checkName(
"alternative name", alternativeName);
262 KateVersion currentVersion()
const
264 return m_currentDefinition->kateVersion;
267 void processElement(
const QXmlStreamReader &xml)
271 if (m_currentContext) {
272 m_currentContext->rules.push_back(Context::Rule{});
273 auto &rule = m_currentContext->rules.back();
274 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success;
275 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
276 }
else if (m_currentKeywords) {
277 m_inKeywordItem =
true;
278 }
else if (xml.
name() == u
"context"_sv) {
279 processContextElement(xml);
280 }
else if (xml.
name() == u
"list"_sv) {
281 processListElement(xml);
282 }
else if (xml.
name() == u
"keywords"_sv) {
283 m_success = m_currentDefinition->parseKeywords(xml) && m_success;
284 }
else if (xml.
name() == u
"emptyLine"_sv) {
285 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success;
286 }
else if (xml.
name() == u
"itemData"_sv) {
287 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success;
292 if (m_currentContext && xml.
name() == u
"context"_sv) {
293 m_currentContext =
nullptr;
294 }
else if (m_currentKeywords && xml.
name() == u
"list"_sv) {
295 m_currentKeywords =
nullptr;
296 }
else if (m_currentKeywords) {
297 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml, m_textContent) && m_success;
298 m_textContent.clear();
299 m_inKeywordItem =
false;
305 if (m_inKeywordItem) {
306 m_textContent += xml.
text();
315 void resolveContexts()
317 QMutableMapIterator<QString, Definition> def(m_definitions);
318 while (def.hasNext()) {
320 auto &definition = def.value();
321 auto &contexts = definition.contexts;
323 if (contexts.isEmpty()) {
324 qWarning() << definition.filename <<
"has no context";
329 auto markAsUsedContext = [](ContextName &contextName) {
330 if (!contextName.stay && contextName.context) {
331 contextName.context->isOnlyIncluded =
false;
335 QMutableMapIterator<QString, Context> contextIt(contexts);
336 while (contextIt.hasNext()) {
338 auto &context = contextIt.value();
339 resolveContextName(definition, context, context.lineEndContext, context.line);
340 resolveContextName(definition, context, context.lineEmptyContext, context.line);
341 resolveContextName(definition, context, context.fallthroughContext, context.line);
342 markAsUsedContext(context.lineEndContext);
343 markAsUsedContext(context.lineEmptyContext);
344 markAsUsedContext(context.fallthroughContext);
345 for (
auto &rule : context.rules) {
346 rule.parentContext = &context;
347 resolveContextName(definition, context, rule.context, rule.line);
348 if (rule.type != Context::Rule::Type::IncludeRules) {
349 markAsUsedContext(rule.context);
350 }
else if (rule.includeAttrib == XmlBool::True && rule.context.context) {
351 rule.context.context->referencedWithIncludeAttrib =
true;
356 auto *firstContext = &*definition.contexts.find(definition.firstContextName);
357 firstContext->isOnlyIncluded =
false;
358 definition.firstContext = firstContext;
361 resolveIncludeRules();
366 bool success = m_success;
368 const auto usedContexts = extractUsedContexts();
370 QMap<const Definition *, const Definition *> maxVersionByDefinitions;
371 QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules;
373 QMapIterator<QString, Definition> def(m_definitions);
374 while (def.hasNext()) {
376 const auto &definition = def.value();
377 const auto &filename = definition.filename;
379 auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
380 if (maxDef != &definition) {
381 qWarning() << definition.filename <<
"depends on a language" << maxDef->languageName <<
"in version" << maxDef->kateVersionStr
382 <<
". Please, increase kateversion.";
386 QSet<ItemDatas::Style> usedAttributeNames;
387 QSet<ItemDatas::Style> ignoredAttributeNames;
388 success = checkKeywordsList(definition) && success;
389 success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
392 const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
393 for (
const auto &styleName : invalidNames) {
394 qWarning() << filename <<
"line" << styleName.line <<
"reference of non-existing itemData attributes:" << styleName.name;
399 const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
400 for (
const auto &styleName : ignoredNames) {
401 qWarning() << filename <<
"line" << styleName.line <<
"attribute" << styleName.name
402 <<
"is never used. All uses are with lookAhead=true or <IncludeRules/>";
407 auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
408 unusedNames -= ignoredNames;
409 for (
const auto &styleName : std::as_const(unusedNames)) {
410 qWarning() << filename <<
"line" << styleName.line <<
"unused itemData:" << styleName.name;
415 QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
416 while (unreachableIncludedRuleIt.hasNext()) {
417 unreachableIncludedRuleIt.next();
418 IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
419 if (unreachableRulesBy.alwaysUnreachable) {
420 auto *rule = unreachableIncludedRuleIt.key();
422 if (!rule->parentContext->isOnlyIncluded) {
427 QSet<const Context::Rule *> rules;
428 auto &unreachableBy = unreachableRulesBy.unreachableBy;
429 unreachableBy.erase(std::remove_if(unreachableBy.begin(),
431 [&](
const RuleAndInclude &ruleAndInclude) {
432 if (rules.contains(ruleAndInclude.rule)) {
435 rules.
insert(ruleAndInclude.rule);
438 unreachableBy.end());
442 for (
auto &ruleAndInclude : std::as_const(unreachableBy)) {
443 message += u
"line "_sv;
446 message += ruleAndInclude.rule->parentContext->name;
447 if (rule->filename != ruleAndInclude.rule->filename) {
449 message += ruleAndInclude.rule->filename;
452 if (ruleAndInclude.includeRules) {
453 message += u
" via line "_sv;
456 message += u
"], "_sv;
460 qWarning() << rule->filename <<
"line" << rule->line <<
"no IncludeRule can reach this rule, hidden by" << message;
482 Context *context =
nullptr;
486 const QString &filename;
487 const QXmlStreamReader &xml;
488 const QXmlStreamAttribute &attr;
493 bool extractString(QString &str, QStringView attrName)
495 if (attr.
name() != attrName) {
501 qWarning() << filename <<
"line" << xml.
lineNumber() << attrName <<
"attribute is empty";
510 bool extractXmlBool(XmlBool &xmlBool, QStringView attrName)
512 if (attr.
name() != attrName) {
516 xmlBool = attr.
value().
isNull() ? XmlBool::Unspecified : attrToBool(attr.
value()) ? XmlBool::True : XmlBool::False;
523 bool extractPositive(
int &positive, QStringView attrName)
525 if (attr.
name() != attrName) {
532 if (!ok || positive < 0) {
533 qWarning() << filename <<
"line" << xml.
lineNumber() << attrName <<
"should be a positive integer:" << attr.
value();
542 bool checkColor(QStringView attrName)
544 if (attr.
name() != attrName) {
548 const auto value = attr.
value();
549 if (value.isEmpty() ) {
550 qWarning() << filename <<
"line" << xml.
lineNumber() << attrName <<
"should be a color:" << value;
559 bool extractChar(QChar &c, QStringView attrName)
561 if (attr.
name() != attrName) {
569 qWarning() << filename <<
"line" << xml.
lineNumber() << attrName <<
"must contain exactly one char:" << attr.
value();
577 bool checkIfExtracted(
bool isExtracted)
583 qWarning() << filename <<
"line" << xml.
lineNumber() <<
"unknown attribute:" << attr.
name();
594 friend size_t qHash(
const Item &item,
size_t seed = 0)
596 return qHash(item.content, seed);
599 friend bool operator==(
const Item &item0,
const Item &item1)
601 return item0.content == item1.content;
605 QList<Item> keywords;
608 bool parseElement(
const QString &filename,
const QXmlStreamReader &xml,
const QString &content)
615 qWarning() << filename <<
"line" << line <<
"is empty:" << xml.
name();
619 if (xml.
name() == u
"include"_sv) {
620 includes.
insert({content, line});
621 }
else if (xml.
name() == u
"item"_sv) {
622 keywords.
append({content, line});
624 qWarning() << filename <<
"line" << line <<
"invalid element:" << xml.
name();
636 bool parseElement(
const QString &filename,
const QXmlStreamReader &xml)
642 for (
const auto &attr : attrs) {
643 Parser parser{filename, xml, attr, success};
645 const bool isExtracted = parser.extractString(name, u
"name"_sv);
647 success = parser.checkIfExtracted(isExtracted);
679 bool isDotRegex =
false;
689 XmlBool firstNonSpace{};
692 XmlBool insensitive{};
701 XmlBool includeAttrib{};
711 QString sanitizedString;
714 QString additionalDeliminator;
715 QString weakDeliminator;
718 QList<const Rule *> includedRules;
721 QSet<const Rule *> includedIncludeRules;
723 Context
const *parentContext =
nullptr;
727 bool parseElement(
const QString &filename,
const QXmlStreamReader &xml)
729 this->filename = filename;
732 using Pair = QPair<QStringView, Type>;
733 static const auto pairs = {
734 Pair{u
"AnyChar"_sv, Type::AnyChar},
735 Pair{u
"Detect2Chars"_sv, Type::Detect2Chars},
736 Pair{u
"DetectChar"_sv, Type::DetectChar},
737 Pair{u
"DetectIdentifier"_sv, Type::DetectIdentifier},
738 Pair{u
"DetectSpaces"_sv, Type::DetectSpaces},
739 Pair{u
"Float"_sv, Type::Float},
740 Pair{u
"HlCChar"_sv, Type::HlCChar},
741 Pair{u
"HlCHex"_sv, Type::HlCHex},
742 Pair{u
"HlCOct"_sv, Type::HlCOct},
743 Pair{u
"HlCStringChar"_sv, Type::HlCStringChar},
744 Pair{u
"IncludeRules"_sv, Type::IncludeRules},
745 Pair{u
"Int"_sv, Type::Int},
746 Pair{u
"LineContinue"_sv, Type::LineContinue},
747 Pair{u
"RangeDetect"_sv, Type::RangeDetect},
748 Pair{u
"RegExpr"_sv, Type::RegExpr},
749 Pair{u
"StringDetect"_sv, Type::StringDetect},
750 Pair{u
"WordDetect"_sv, Type::WordDetect},
751 Pair{u
"keyword", Type::keyword},
754 for (
auto pair : pairs) {
757 bool success = parseAttributes(filename, xml);
758 success = checkMandoryAttributes(filename, xml) && success;
759 if (success && type == Type::RegExpr) {
761 static const QRegularExpression isDot(QStringLiteral(R
"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
763 static const QRegularExpression removeParentheses(QStringLiteral(R
"(\((?:\?:)?|\))"));
765 auto reg = QString(
string).replace(removeParentheses, QString());
766 isDotRegex = reg.contains(isDot);
769 static const QRegularExpression allSuffix(QStringLiteral(
"(?<!\\\\)[.][*][?+]?[$]?$"));
770 sanitizedString = string;
771 sanitizedString.
replace(allSuffix, QString());
773 if (sanitizedString.
isEmpty() || sanitizedString == u
"^"_sv) {
774 sanitizedString = string;
781 qWarning() << filename <<
"line" << xml.
lineNumber() <<
"unknown element:" << xml.
name();
786 bool parseAttributes(
const QString &filename,
const QXmlStreamReader &xml)
791 for (
const auto &attr : attrs) {
792 Parser parser{filename, xml, attr, success};
795 const bool isExtracted
796 = parser.extractString(attribute, u
"attribute"_sv)
797 || parser.extractString(context.name, u
"context"_sv)
798 || parser.extractXmlBool(lookAhead, u
"lookAhead"_sv)
799 || parser.extractXmlBool(firstNonSpace, u
"firstNonSpace"_sv)
800 || parser.extractString(beginRegion, u
"beginRegion"_sv)
801 || parser.extractString(endRegion, u
"endRegion"_sv)
802 || parser.extractPositive(column, u
"column"_sv)
803 || ((
type == Type::RegExpr
804 ||
type == Type::StringDetect
805 ||
type == Type::WordDetect
806 ||
type == Type::keyword
807 ) && parser.extractXmlBool(insensitive, u
"insensitive"_sv))
808 || ((
type == Type::DetectChar
809 ||
type == Type::RegExpr
810 ||
type == Type::StringDetect
811 ||
type == Type::keyword
812 ) && parser.extractXmlBool(dynamic, u
"dynamic"_sv))
813 || ((
type == Type::RegExpr)
814 && parser.extractXmlBool(minimal, u
"minimal"_sv))
815 || ((
type == Type::DetectChar
816 ||
type == Type::Detect2Chars
817 ||
type == Type::LineContinue
818 ||
type == Type::RangeDetect
819 ) && parser.extractChar(char0, u
"char"_sv))
820 || ((
type == Type::Detect2Chars
821 ||
type == Type::RangeDetect
822 ) && parser.extractChar(char1, u
"char1"_sv))
823 || ((
type == Type::AnyChar
824 ||
type == Type::RegExpr
825 ||
type == Type::StringDetect
826 ||
type == Type::WordDetect
827 ||
type == Type::keyword
828 ) && parser.extractString(
string, u
"String"_sv))
829 || ((
type == Type::IncludeRules)
830 && parser.extractXmlBool(includeAttrib, u
"includeAttrib"_sv))
831 || ((
type == Type::Float
832 ||
type == Type::HlCHex
833 ||
type == Type::HlCOct
835 ||
type == Type::keyword
836 ||
type == Type::WordDetect
837 ) && (parser.extractString(additionalDeliminator, u
"additionalDeliminator"_sv)
838 || parser.extractString(weakDeliminator, u
"weakDeliminator"_sv)))
842 success = parser.checkIfExtracted(isExtracted);
845 if (type == Type::LineContinue && char0 == u
'\0') {
852 bool checkMandoryAttributes(
const QString &filename,
const QXmlStreamReader &xml)
862 case Type::StringDetect:
863 case Type::WordDetect:
865 missingAttr =
string.
isEmpty() ? QStringLiteral(
"String") : QString();
868 case Type::DetectChar:
869 missingAttr = !char0.
unicode() ? QStringLiteral(
"char") : QString();
872 case Type::Detect2Chars:
873 case Type::RangeDetect:
874 missingAttr = !char0.
unicode() && !char1.
unicode() ? QStringLiteral(
"char and char1")
875 : !char0.
unicode() ? QStringLiteral(
"char")
876 : !char1.
unicode() ? QStringLiteral(
"char1")
880 case Type::IncludeRules:
881 missingAttr = context.name.isEmpty() ? QStringLiteral(
"context") : QString();
884 case Type::DetectIdentifier:
885 case Type::DetectSpaces:
890 case Type::HlCStringChar:
892 case Type::LineContinue:
897 qWarning() << filename <<
"line" << xml.
lineNumber() <<
"missing attribute:" << missingAttr;
907 bool isOnlyIncluded =
true;
909 bool referencedWithIncludeAttrib =
false;
910 bool hasDynamicRule =
false;
913 ContextName lineEndContext;
914 ContextName lineEmptyContext;
915 ContextName fallthroughContext;
918 XmlBool fallthrough{};
919 XmlBool stopEmptyLineContextSwitchLoop{};
921 bool parseElement(
const QString &filename,
const QXmlStreamReader &xml)
928 for (
const auto &attr : attrs) {
929 Parser parser{filename, xml, attr, success};
930 XmlBool noIndentationBasedFolding{};
933 const bool isExtracted = parser.extractString(name, u
"name"_sv)
934 || parser.extractString(attribute, u
"attribute"_sv)
935 || parser.extractString(lineEndContext.name, u
"lineEndContext"_sv)
936 || parser.extractString(lineEmptyContext.name, u
"lineEmptyContext"_sv)
937 || parser.extractString(fallthroughContext.name, u
"fallthroughContext"_sv)
938 || parser.extractXmlBool(dynamic, u
"dynamic"_sv)
939 || parser.extractXmlBool(fallthrough, u
"fallthrough"_sv)
940 || parser.extractXmlBool(stopEmptyLineContextSwitchLoop, u
"stopEmptyLineContextSwitchLoop"_sv)
941 || parser.extractXmlBool(noIndentationBasedFolding, u
"noIndentationBasedFolding"_sv);
944 success = parser.checkIfExtracted(isExtracted);
948 qWarning() << filename <<
"line" << xml.
lineNumber() <<
"missing attribute: name";
953 qWarning() << filename <<
"line" << xml.
lineNumber() <<
"missing attribute: attribute";
966 friend size_t qHash(
const Style &style,
size_t seed = 0)
968 return qHash(style.name, seed);
971 friend bool operator==(
const Style &style0,
const Style &style1)
973 return style0.name == style1.name;
977 QSet<Style> styleNames;
979 bool parseElement(
const QString &filename,
const QXmlStreamReader &xml)
988 for (
const auto &attr : attrs) {
989 Parser parser{filename, xml, attr, success};
992 const bool isExtracted
993 = parser.extractString(name, u
"name"_sv)
994 || parser.extractString(defStyleNum, u
"defStyleNum"_sv)
995 || parser.extractXmlBool(
boolean, u
"bold"_sv)
996 || parser.extractXmlBool(
boolean, u
"italic"_sv)
997 || parser.extractXmlBool(
boolean, u
"underline"_sv)
998 || parser.extractXmlBool(
boolean, u
"strikeOut"_sv)
999 || parser.extractXmlBool(
boolean, u
"spellChecking"_sv)
1000 || parser.checkColor(u
"color"_sv)
1001 || parser.checkColor(u
"selColor"_sv)
1002 || parser.checkColor(u
"backgroundColor"_sv)
1003 || parser.checkColor(u
"selBackgroundColor"_sv);
1006 success = parser.checkIfExtracted(isExtracted);
1010 const auto len = styleNames.
size();
1012 if (len == styleNames.
size()) {
1013 qWarning() << filename <<
"line" << xml.
lineNumber() <<
"itemData duplicate:" <<
name;
1023 QMap<QString, Keywords> keywordsList;
1024 QMap<QString, Context> contexts;
1025 ItemDatas itemDatas;
1026 QString firstContextName;
1027 const Context *firstContext =
nullptr;
1029 WordDelimiters wordDelimiters;
1030 KateVersion kateVersion{};
1031 QString kateVersionStr;
1032 QString languageName;
1033 QSet<const Definition *> referencedDefinitions;
1036 bool parseKeywords(
const QXmlStreamReader &xml)
1048 m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success;
1049 if (m_currentDefinition->firstContextName.isEmpty()) {
1050 m_currentDefinition->firstContextName = context.name;
1052 if (m_currentDefinition->contexts.contains(context.name)) {
1053 qWarning() << m_currentDefinition->filename <<
"line" << xml.
lineNumber() <<
"duplicate context:" << context.name;
1056 m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context);
1063 m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success;
1064 if (m_currentDefinition->keywordsList.contains(keywords.name)) {
1065 qWarning() << m_currentDefinition->filename <<
"line" << xml.
lineNumber() <<
"duplicate list:" << keywords.name;
1068 m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords);
1073 auto it = maxVersionByDefinitions.
find(&definition);
1074 if (it != maxVersionByDefinitions.
end()) {
1077 auto it = maxVersionByDefinitions.
insert(&definition, &definition);
1078 for (
const auto &referencedDef : definition.referencedDefinitions) {
1079 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions);
1080 if (it.value()->kateVersion < maxDef->kateVersion) {
1081 it.value() = maxDef;
1089 void resolveIncludeRules()
1095 while (def.hasNext()) {
1097 auto &definition = def.value();
1099 while (contextIt.hasNext()) {
1101 auto ¤tContext = contextIt.value();
1102 for (
auto &rule : currentContext.rules) {
1103 if (rule.type != Context::Rule::Type::IncludeRules) {
1107 if (rule.context.stay) {
1108 qWarning() << definition.filename <<
"line" << rule.line <<
"IncludeRules refers to himself";
1113 if (rule.context.popCount) {
1114 qWarning() << definition.filename <<
"line" << rule.line <<
"IncludeRules with #pop prefix";
1118 if (!rule.context.context) {
1125 usedContexts.
clear();
1126 usedContexts.
insert(rule.context.context);
1128 contexts.
append(rule.context.context);
1130 for (
int i = 0; i < contexts.
size(); ++i) {
1131 currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
1132 for (
const auto &includedRule : contexts[i]->rules) {
1133 if (includedRule.type != Context::Rule::Type::IncludeRules) {
1134 rule.includedRules.append(&includedRule);
1135 }
else if (&rule == &includedRule) {
1136 qWarning() << definition.filename <<
"line" << rule.line <<
"IncludeRules refers to himself by recursivity";
1139 rule.includedIncludeRules.insert(&includedRule);
1141 if (includedRule.includedRules.isEmpty()) {
1142 const auto *context = includedRule.context.context;
1143 if (context && !usedContexts.
contains(context)) {
1144 contexts.
append(context);
1145 usedContexts.
insert(context);
1148 rule.includedRules.append(includedRule.includedRules);
1166 while (def.hasNext()) {
1168 const auto &definition = def.value();
1170 if (definition.firstContext) {
1171 usedContexts.
insert(definition.firstContext);
1173 contexts.
append(definition.firstContext);
1175 for (
int i = 0; i < contexts.
size(); ++i) {
1176 auto appendContext = [&](
const Context *context) {
1177 if (context && !usedContexts.
contains(context)) {
1178 contexts.
append(context);
1179 usedContexts.
insert(context);
1183 const auto *context = contexts[i];
1184 appendContext(context->lineEndContext.context);
1185 appendContext(context->lineEmptyContext.context);
1186 appendContext(context->fallthroughContext.context);
1188 for (
auto &rule : context->rules) {
1189 appendContext(rule.context.context);
1195 return usedContexts;
1198 struct RuleAndInclude {
1199 const Context::Rule *rule;
1200 const Context::Rule *includeRules;
1202 explicit operator bool()
const
1208 struct IncludedRuleUnreachableBy {
1209 QList<RuleAndInclude> unreachableBy;
1210 bool alwaysUnreachable =
true;
1214 bool checkContexts(
const Definition &definition,
1220 bool success =
true;
1223 while (contextIt.hasNext()) {
1226 const auto &context = contextIt.value();
1227 const auto &filename = definition.filename;
1229 if (!usedContexts.
contains(&context)) {
1230 qWarning() << filename <<
"line" << context.line <<
"unused context:" << context.name;
1235 if (context.name.startsWith(u
"#pop"_sv)) {
1236 qWarning() << filename <<
"line" << context.line <<
"the context name must not start with '#pop':" << context.name;
1240 if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1241 usedAttributeNames.
insert({context.attribute, context.line});
1244 success = checkContextAttribute(definition, context) && success;
1245 success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success;
1246 success = suggestRuleMerger(definition.filename, context) && success;
1248 for (
const auto &rule : context.rules) {
1249 if (!rule.attribute.isEmpty()) {
1250 if (rule.lookAhead != XmlBool::True) {
1251 usedAttributeNames.
insert({rule.attribute, rule.line});
1253 ignoredAttributeNames.
insert({rule.attribute, rule.line});
1256 success = checkLookAhead(rule) && success;
1257 success = checkStringDetect(rule) && success;
1258 success = checkWordDetect(rule) && success;
1259 success = checkKeyword(definition, rule) && success;
1260 success = checkRegExpr(filename, rule, context) && success;
1261 success = checkDelimiters(definition, rule) && success;
1277 bool checkRegExpr(
const QString &filename,
const Context::Rule &rule,
const Context &context)
const
1280 if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) {
1282 if (!checkRegularExpression(rule.filename, regexp, rule.line)) {
1287 if (rule.dynamic == XmlBool::True) {
1289 if (!rule.string.contains(placeHolder)) {
1290 qWarning() << rule.filename <<
"line" << rule.line <<
"broken regex:" << rule.string <<
"problem: dynamic=true but no %\\d+ placeholder";
1295 if (rule.lookAhead == XmlBool::True && (rule.string.endsWith(u
".*$"_sv) || rule.string.endsWith(u
".*"_sv)) && -1 == rule.string.indexOf(u
'|')) {
1296 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr with lookAhead=1 doesn't need to end with '.*' or '.*$':" << rule.string;
1300 auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1301 if (rule.lookAhead == XmlBool::True) {
1303 R
"(((?<!\\)\\(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1304 reg.replace(removeAllSuffix, QString());
1313 QStringLiteral(R
"(^\^?(?:\((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1314 if (rule.string.contains(isDetectSpaces)) {
1315 char const *extraMsg = rule.string.contains(u
'^') ?
"+ column=\"0\" or firstNonSpace=\"1\"" :
"";
1316 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg <<
":"
1321#define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1322#define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1326 "\\.\\*[?+]?" REG_CHAR
"|"
1327 "\\[\\^(" REG_ESCAPE_CHAR
"|.)\\]\\*[?+]?\\1"
1329 if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(u
".*?"_sv) || rule.string.contains(u
"[^"_sv))
1330 && reg.contains(isRange)) {
1331 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr should be replaced by RangeDetect:" << rule.string;
1336 static const QRegularExpression isAnyChar(QStringLiteral(R
"(^(\^|\((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)"));
1337 if (rule.string.contains(isAnyChar)) {
1338 auto extra = (reg[0] == u
'^' || reg[1] == u
'^') ?
"with column=\"0\"" :
"";
1339 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr should be replaced by AnyChar:" << rule.string << extra;
1344 static const QRegularExpression isLineContinue(QStringLiteral(
"^\\^?" REG_CHAR
"\\$$"));
1345 if (reg.contains(isLineContinue)) {
1346 auto extra = (reg[0] == u
'^') ?
"with column=\"0\"" :
"";
1347 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr should be replaced by LineContinue:" << rule.string << extra;
1351#define REG_DIGIT uR"((\[(0-9|\\d)\]|\\d))"
1352#define REG_DIGITS REG_DIGIT u"([+]|" REG_DIGIT u"[*])"
1353#define REG_DOT uR"((\\[.]|\[.\]))"
1355 static const QRegularExpression isInt(uR
"(^(\((\?:)?)*\\b(\((\?:)?)*)" REG_DIGITS uR"(\)*$)"_s);
1356 if (reg.contains(isInt)) {
1357 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr should be replaced by Int:" << rule.string;
1363 uR
"(^(\\b|\((\?:)?)*)" REG_DIGITS REG_DOT
1364 REG_DIGIT u"[*][|]" REG_DOT REG_DIGITS uR
"(\)+\((\?:)?\[[eE]+\]\[(\\?-\\?\+|\\?\+\\?-)\]\?)" REG_DIGITS uR"(\)\?\)*$)"_s);
1365 if (reg.contains(isFloat)) {
1366 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr should be replaced by Float:" << rule.string;
1375 reg.replace(sanitize1, QStringLiteral(
"_"));
1378#undef REG_ESCAPE_CHAR
1381 static const QRegularExpression isMinimal(QStringLiteral(
"(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1384 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy)
1385 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0)
1386 && (reg.back() != u
'$' || reg.contains(u
'|'))) {
1387 qWarning() << rule.filename <<
"line" << rule.line
1388 <<
"RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1394 reg.replace(sanitize2, QStringLiteral("___"));
1397 static const QRegularExpression sanitize3(QStringLiteral(R
"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1398 reg.replace(sanitize3, QStringLiteral("...\\1"));
1402 reg.replace(sanitize4, QStringLiteral("_"));
1404 const int len = reg.size();
1406 static const QRegularExpression toInsensitive(QStringLiteral(R
"(\[(?:([^]])\1)\])"));
1407 reg = reg.toUpper();
1408 reg.replace(toInsensitive, QString());
1412 static const QRegularExpression isStringDetect(QStringLiteral(R
"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\(\?:)+$)"));
1413 if (reg.contains(isStringDetect)) {
1414 char const *extraMsg = rule.string.contains(u
'^') ?
"+ column=\"0\" or firstNonSpace=\"1\"" :
"";
1415 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1416 <<
":" << rule.string;
1417 if (len != reg.size()) {
1418 qWarning() << rule.filename <<
"line" << rule.line <<
"insensitive=\"1\" missing:" << rule.string;
1424 if (rule.column == -1) {
1429 auto first = std::as_const(reg).begin();
1430 auto last = std::as_const(reg).end();
1433 while (u
'(' == *first) {
1436 if (u
'?' == *first || u
':' == first[1]) {
1441 if (u
'^' == *first) {
1442 const int bolDepth = depth;
1445 while (++first != last) {
1446 if (u
'(' == *first) {
1448 }
else if (u
')' == *first) {
1450 if (depth < bolDepth) {
1452 if (first + 1 != last && u
"*?"_sv.contains(first[1])) {
1457 }
else if (u
'|' == *first) {
1459 if (depth <= bolDepth) {
1467 qWarning() << rule.filename <<
"line" << rule.line <<
"column=\"0\" missing with RegExpr:" << rule.string;
1474 if (rule.column == 0 && !rule.isDotRegex) {
1475 bool hasStartOfLine =
false;
1476 auto first = std::as_const(reg).begin();
1477 auto last = std::as_const(reg).end();
1478 for (; first != last; ++first) {
1479 if (*first == u
'^') {
1480 hasStartOfLine =
true;
1482 }
else if (*first == u
'(') {
1483 if (last - first >= 3 && first[1] == u
'?' && first[2] == u
':') {
1491 if (!hasStartOfLine) {
1492 qWarning() << rule.filename <<
"line" << rule.line
1493 <<
"start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1498 bool useCapture =
false;
1501 if (regexp.captureCount()) {
1504 while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) {
1510 int maxCaptureUsed = 0;
1512 if (rule.context.context && !rule.context.stay) {
1513 for (
const auto &nextRule : std::as_const(rule.context.context->rules)) {
1514 if (nextRule.dynamic == XmlBool::True) {
1526 int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1527 maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture);
1554 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num2, rule.string));
1556 const int maxCapture = std::max(maxCaptureUsed, maxBackReference);
1558 if (maxCapture && regexp.captureCount() > maxCapture) {
1559 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr with" << regexp.captureCount() <<
"captures but only" << maxCapture
1560 <<
"are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1564 useCapture = maxCapture;
1570 QStringLiteral(R
"(^(\((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)"));
1571 if (rule.string.contains(isDetectIdentifier)) {
1572 qWarning() << rule.filename <<
"line" << rule.line <<
"RegExpr should be replaced by DetectIdentifier:" << rule.string;
1577 if (rule.isDotRegex) {
1579 int i = &rule - context.rules.data() + 1;
1580 const bool hasColumn = (rule.column != -1);
1581 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1582 const bool isSpecial = (hasColumn || hasFirstNonSpace);
1583 for (; i < context.rules.size(); ++i) {
1584 auto &rule2 = context.rules[i];
1585 if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1586 i = context.rules.size();
1590 const bool hasColumn2 = (rule2.column != -1);
1591 const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1592 if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1593 || (hasFirstNonSpace && hasFirstNonSpace2)) {
1598 auto ruleFilename = (filename == rule.filename) ?
QString() : u
"in "_sv + rule.filename;
1599 if (i == context.rules.size()) {
1600 if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1601 && rule.endRegion.isEmpty() && !useCapture) {
1602 qWarning() << filename <<
"context line" << context.line <<
": RegExpr line" << rule.line << ruleFilename
1603 <<
"should be replaced by fallthroughContext:" << rule.string;
1606 auto &nextRule = context.rules[i];
1607 auto nextRuleFilename = (filename == nextRule.filename) ?
QString() : u
"in "_sv + nextRule.filename;
1608 qWarning() << filename <<
"context line" << context.line <<
"contains unreachable element line" << nextRule.line << nextRuleFilename
1609 <<
"because a dot RegExpr is used line" << rule.line << ruleFilename;
1613 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R
"([*+?]([.][*+?]{0,2})?$)"));
1614 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R
"([*+?]([.][*+?]{0,2})?[)]*$)"));
1615 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1616 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) {
1617 qWarning() << rule.filename <<
"line" << rule.line
1618 <<
"Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1630 bool success =
true;
1633 XmlBool casesensitive{};
1636 for (
auto &attr : attrs) {
1637 Parser parser{filename, xml, attr, success};
1639 const bool isExtracted = parser.extractString(pattern, u
"regexpr"_sv) || parser.extractXmlBool(casesensitive, u
"casesensitive"_sv);
1641 success = parser.checkIfExtracted(isExtracted);
1645 qWarning() << filename <<
"line" << xml.
lineNumber() <<
"missing attribute: regexpr";
1659 const auto pattern = regexp.
pattern();
1663 qWarning() << filename <<
"line" << line <<
"broken regex:" << pattern <<
"problem:" << regexp.
errorString() <<
"at offset"
1669 const int azOffset = std::max(pattern.
indexOf(u
"A-z"_sv), pattern.
indexOf(u
"a-Z"_sv));
1670 if (azOffset >= 0) {
1671 qWarning() << filename <<
"line" << line <<
"broken regex:" << pattern <<
"problem: [a-Z] or [A-z] at offset" << azOffset;
1680 bool checkContextAttribute(
const Definition &definition,
const Context &context)
const
1682 bool success =
true;
1684 if (!context.fallthroughContext.name.isEmpty()) {
1685 const bool mandatoryFallthroughAttribute = definition.kateVersion < KateVersion{5, 62};
1686 if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1687 qWarning() << definition.filename <<
"line" << context.line <<
"fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1690 }
else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1691 qWarning() << definition.filename <<
"line" << context.line
1692 <<
"fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1698 if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < KateVersion{5, 103}) {
1699 qWarning() << definition.filename <<
"line" << context.line
1700 <<
"stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name;
1708 bool checkDelimiters(
const Definition &definition,
const Context::Rule &rule)
const
1710 if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1714 bool success =
true;
1716 if (definition.kateVersion < KateVersion{5, 79}) {
1717 qWarning() << definition.filename <<
"line" << rule.line
1718 <<
"additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1722 for (
QChar c : rule.additionalDeliminator) {
1723 if (!definition.wordDelimiters.contains(c)) {
1728 for (
QChar c : rule.weakDeliminator) {
1729 if (definition.wordDelimiters.contains(c)) {
1734 qWarning() << rule.filename <<
"line" << rule.line <<
"unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1739 bool checkKeyword(
const Definition &definition,
const Context::Rule &rule)
const
1741 if (rule.type == Context::Rule::Type::keyword) {
1742 auto it = definition.keywordsList.find(rule.string);
1743 if (it == definition.keywordsList.end()) {
1744 qWarning() << rule.filename <<
"line" << rule.line <<
"reference of non-existing keyword list:" << rule.string;
1753 bool checkLookAhead(
const Context::Rule &rule)
const
1755 if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1756 qWarning() << rule.filename <<
"line" << rule.line <<
"infinite loop: lookAhead with context #stay";
1762 bool checkStringDetect(
const Context::Rule &rule)
const
1764 if (rule.type == Context::Rule::Type::StringDetect) {
1766 if (rule.dynamic == XmlBool::True) {
1768 if (!rule.string.contains(placeHolder)) {
1769 qWarning() << rule.filename <<
"line" << rule.line <<
"broken regex:" << rule.string <<
"problem: dynamic=true but no %\\d+ placeholder";
1778 bool checkWordDetect(
const Context::Rule &rule)
const
1780 if (rule.type == Context::Rule::Type::WordDetect) {
1781 if (!rule.string.isEmpty() && (rule.string.front().isSpace() || rule.string.back().isSpace())) {
1782 qWarning() << rule.filename <<
"line" << rule.line <<
"contains a space at the beginning or end of the string:" << rule.string;
1790 bool checkKeywordsList(
const Definition &definition)
const
1792 bool success =
true;
1794 bool includeNotSupport = (definition.kateVersion < KateVersion{5, 53});
1796 while (keywordsIt.hasNext()) {
1799 for (
const auto &include : keywordsIt.value().items.includes) {
1800 if (includeNotSupport) {
1801 qWarning() << definition.filename <<
"line" << include.line
1802 <<
"<include> is only available since version \"5.53\". Please, increase kateversion.";
1805 success = checkKeywordInclude(definition, include) && success;
1810 for (
const auto& keyword : keywordsIt.value().items.keywords) {
1811 for (
QChar c : keyword.content) {
1812 if (definition.wordDelimiters.contains(c)) {
1813 qWarning() << definition.filename <<
"line" << keyword.line <<
"keyword with delimiter:" << c <<
"in" << keyword.content;
1825 bool checkKeywordInclude(
const Definition &definition,
const Keywords::Items::Item &include)
const
1827 bool containsKeywordName =
true;
1828 int const idx = include.content.indexOf(u
"##"_sv);
1830 auto it = definition.keywordsList.find(include.content);
1831 containsKeywordName = (it != definition.keywordsList.end());
1833 auto defName = include.content.sliced(idx + 2);
1834 auto listName = include.content.sliced(0, idx);
1835 auto it = m_definitions.find(defName);
1836 if (it == m_definitions.end()) {
1837 qWarning() << definition.filename <<
"line" << include.line <<
"unknown definition in" << include.content;
1840 containsKeywordName = it->keywordsList.contains(listName);
1843 if (!containsKeywordName) {
1844 qWarning() << definition.filename <<
"line" << include.line <<
"unknown keyword name in" << include.content;
1847 return containsKeywordName;
1856 bool checkUreachableRules(
const QString &filename,
1857 const Context &context,
1860 if (context.isOnlyIncluded) {
1865 RuleAndInclude setRule(
const Context::Rule &rule,
const Context::Rule *includeRules =
nullptr)
1867 auto set = [&](RuleAndInclude &ruleAndInclude) {
1868 auto old = ruleAndInclude;
1869 ruleAndInclude = {&rule, includeRules};
1873 if (rule.firstNonSpace == XmlBool::True) {
1874 return set(firstNonSpace);
1875 }
else if (rule.column == 0) {
1876 return set(column0);
1877 }
else if (rule.column > 0) {
1878 return set(columnGreaterThan0[rule.column]);
1885 RuleAndInclude normal;
1886 RuleAndInclude column0;
1887 QMap<int, RuleAndInclude> columnGreaterThan0;
1888 RuleAndInclude firstNonSpace;
1894 RuleAndInclude
find(QChar c)
const
1897 return m_asciiMap[c.
unicode()];
1899 auto it = m_utf8Map.find(c);
1900 return it == m_utf8Map.end() ? RuleAndInclude{
nullptr,
nullptr} : it.value();
1905 QList<RuleAndInclude>
find(QStringView s)
const
1907 QList<RuleAndInclude> result;
1923 void append(QChar c,
const Context::Rule &rule,
const Context::Rule *includeRule =
nullptr)
1926 m_asciiMap[c.
unicode()] = {&rule, includeRule};
1928 m_utf8Map[c] = {&rule, includeRule};
1933 void append(QStringView s,
const Context::Rule &rule,
const Context::Rule *includeRule =
nullptr)
1936 append(c, rule, includeRule);
1941 RuleAndInclude m_asciiMap[127]{};
1942 QMap<QChar, RuleAndInclude> m_utf8Map;
1945 struct Char4Tables {
1947 CharTable charsColumn0;
1948 QMap<int, CharTable> charsColumnGreaterThan0;
1949 CharTable charsFirstNonSpace;
1953 struct CharTableArray {
1956 CharTableArray(Char4Tables &tables,
const Context::Rule &rule)
1958 if (rule.firstNonSpace == XmlBool::True) {
1959 appendTable(tables.charsFirstNonSpace);
1962 if (rule.column == 0) {
1963 appendTable(tables.charsColumn0);
1964 }
else if (rule.column > 0) {
1965 appendTable(tables.charsColumnGreaterThan0[rule.column]);
1968 appendTable(tables.chars);
1972 void removeNonSpecialWhenSpecial()
1980 RuleAndInclude
find(QChar c)
const
1982 for (
int i = 0; i < m_size; ++i) {
1983 if (
auto ruleAndInclude = m_charTables[i]->
find(c)) {
1984 return ruleAndInclude;
1987 return RuleAndInclude{
nullptr,
nullptr};
1992 QList<RuleAndInclude>
find(QStringView s)
const
1994 for (
int i = 0; i < m_size; ++i) {
1995 auto result = m_charTables[i]->find(s);
1996 if (result.
size()) {
1997 while (++i < m_size) {
2003 return QList<RuleAndInclude>();
2007 void append(QChar c,
const Context::Rule &rule,
const Context::Rule *includeRule =
nullptr)
2009 for (
int i = 0; i < m_size; ++i) {
2010 m_charTables[i]->append(c, rule, includeRule);
2015 void append(QStringView s,
const Context::Rule &rule,
const Context::Rule *includeRule =
nullptr)
2017 for (
int i = 0; i < m_size; ++i) {
2018 m_charTables[i]->append(s, rule, includeRule);
2023 void appendTable(CharTable &t)
2025 m_charTables[m_size] = &t;
2029 CharTable *m_charTables[3];
2033 struct ObservableRule {
2034 const Context::Rule *rule;
2035 const Context::Rule *includeRules;
2037 bool hasResolvedIncludeRules()
const
2039 return rule == includeRules;
2044 struct RuleIterator {
2045 RuleIterator(
const QList<ObservableRule> &rules,
const ObservableRule &endRule)
2046 : m_end(&endRule - rules.data())
2052 const Context::Rule *
next()
2055 if (m_includedRules) {
2057 if (m_i2 != m_includedRules->size()) {
2058 return (*m_includedRules)[m_i2];
2061 m_includedRules =
nullptr;
2065 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
2066 if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
2068 m_includedRules = &m_rules[m_i].rule->includedRules;
2069 return (*m_includedRules)[m_i2];
2076 return m_rules[m_i - 1].rule;
2083 const Context::Rule *currentIncludeRules()
const
2085 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
2092 const QList<ObservableRule> &m_rules;
2093 const QList<const Context::Rule *> *m_includedRules =
nullptr;
2099 void append(
const Context::Rule &rule,
const Context::Rule *includedRule)
2101 auto array = extractDotRegexes(rule);
2103 *array[0] = {&rule, includedRule};
2106 *array[1] = {&rule, includedRule};
2111 RuleAndInclude
find(
const Context::Rule &rule)
2113 auto array = extractDotRegexes(rule);
2120 return RuleAndInclude{};
2124 using Array = std::array<RuleAndInclude *, 2>;
2126 Array extractDotRegexes(
const Context::Rule &rule)
2130 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
2133 if (rule.firstNonSpace == XmlBool::True) {
2134 ret[0] = &dotRegexFirstNonSpace;
2137 if (rule.column == 0) {
2138 ret[1] = &dotRegexColumn0;
2139 }
else if (rule.column > 0) {
2140 ret[1] = &dotRegexColumnGreaterThan0[rule.column];
2147 RuleAndInclude dotRegex{};
2148 RuleAndInclude dotRegexColumn0{};
2149 QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
2150 RuleAndInclude dotRegexFirstNonSpace{};
2153 bool success =
true;
2156 Char4Tables detectChars;
2158 Char4Tables dynamicDetectChars;
2160 Char4Tables lineContinueChars;
2164 Rule4 hlCCharRule{};
2167 Rule4 hlCStringCharRule{};
2168 Rule4 detectIdentifierRule{};
2176 observedRules.
reserve(context.rules.size());
2177 for (
const Context::Rule &rule : context.rules) {
2178 const Context::Rule *includeRule =
nullptr;
2179 if (rule.type == Context::Rule::Type::IncludeRules) {
2180 auto *context = rule.context.context;
2181 if (context && context->isOnlyIncluded) {
2182 includeRule = &rule;
2186 observedRules.
push_back({&rule, includeRule});
2188 for (
const Context::Rule *rule2 : rule.includedRules) {
2189 observedRules.
push_back({rule2, includeRule});
2194 for (
auto &observedRule : observedRules) {
2195 const Context::Rule &rule = *observedRule.rule;
2196 bool isUnreachable =
false;
2200 auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
2201 if (ruleAndInclude) {
2202 isUnreachable =
true;
2203 unreachableBy.
append(ruleAndInclude);
2209 if (!ruleAndIncludes.isEmpty()) {
2210 isUnreachable =
true;
2211 unreachableBy.
append(ruleAndIncludes);
2216 auto isCompatible = [&rule](Context::Rule
const &rule2) {
2217 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
2218 || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
2221 updateUnreachable1(dotRegex.find(rule));
2223 switch (rule.type) {
2226 case Context::Rule::Type::AnyChar: {
2227 auto tables = CharTableArray(detectChars, rule);
2228 updateUnreachable2(tables.find(rule.string));
2229 tables.removeNonSpecialWhenSpecial();
2230 tables.append(rule.string, rule);
2236 case Context::Rule::Type::DetectChar: {
2237 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2238 auto tables = CharTableArray(chars4, rule);
2239 updateUnreachable1(tables.find(rule.char0));
2240 tables.removeNonSpecialWhenSpecial();
2241 tables.append(rule.char0, rule);
2247 case Context::Rule::Type::DetectSpaces: {
2248 auto tables = CharTableArray(detectChars, rule);
2249 updateUnreachable2(tables.find(u
" \t"_sv));
2250 tables.removeNonSpecialWhenSpecial();
2251 tables.append(u
' ', rule);
2252 tables.append(u
'\t', rule);
2257 case Context::Rule::Type::HlCChar:
2258 updateUnreachable1(CharTableArray(detectChars, rule).
find(u
'\''));
2259 updateUnreachable1(hlCCharRule.setRule(rule));
2263 case Context::Rule::Type::HlCHex:
2264 updateUnreachable1(CharTableArray(detectChars, rule).
find(u
'0'));
2265 updateUnreachable1(hlCHexRule.setRule(rule));
2269 case Context::Rule::Type::HlCOct:
2270 updateUnreachable1(CharTableArray(detectChars, rule).
find(u
'0'));
2271 updateUnreachable1(hlCOctRule.setRule(rule));
2275 case Context::Rule::Type::HlCStringChar:
2276 updateUnreachable1(CharTableArray(detectChars, rule).
find(u
'\\'));
2277 updateUnreachable1(hlCStringCharRule.setRule(rule));
2281 case Context::Rule::Type::Int:
2282 updateUnreachable2(CharTableArray(detectChars, rule).
find(u
"0123456789"_sv));
2283 updateUnreachable1(intRule.setRule(rule));
2287 case Context::Rule::Type::Float:
2288 updateUnreachable2(CharTableArray(detectChars, rule).
find(u
"0123456789."_sv));
2289 updateUnreachable1(floatRule.setRule(rule));
2291 updateUnreachable1(Rule4(intRule).setRule(rule));
2295 case Context::Rule::Type::DetectIdentifier:
2296 updateUnreachable1(detectIdentifierRule.setRule(rule));
2300 case Context::Rule::Type::LineContinue: {
2301 updateUnreachable1(CharTableArray(detectChars, rule).
find(rule.char0));
2303 auto tables = CharTableArray(lineContinueChars, rule);
2304 updateUnreachable1(tables.find(rule.char0));
2305 tables.removeNonSpecialWhenSpecial();
2306 tables.append(rule.char0, rule);
2311 case Context::Rule::Type::Detect2Chars:
2312 case Context::Rule::Type::RangeDetect:
2313 updateUnreachable1(CharTableArray(detectChars, rule).
find(rule.char0));
2314 if (!isUnreachable) {
2315 RuleIterator ruleIterator(observedRules, observedRule);
2316 while (
const auto *rulePtr = ruleIterator.next()) {
2317 if (isUnreachable) {
2320 const auto &rule2 = *rulePtr;
2321 if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2322 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2328 case Context::Rule::Type::RegExpr: {
2329 if (rule.isDotRegex) {
2330 dotRegex.append(rule,
nullptr);
2335 RuleIterator ruleIterator(observedRules, observedRule);
2336 while (
const auto *rulePtr = ruleIterator.next()) {
2337 if (isUnreachable) {
2340 const auto &rule2 = *rulePtr;
2341 if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2342 && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) {
2343 bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2347 auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2348 auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2349 auto c3 = rule2.sanitizedString.back().unicode();
2350 if (c3 ==
'*' || c3 ==
'?' || c3 ==
'+') {
2352 }
else if (c1 ==
'*' || c1 ==
'?') {
2353 add = !((c2 ==
'?' || c2 ==
'+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2359 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2367 case Context::Rule::Type::WordDetect:
2368 case Context::Rule::Type::StringDetect: {
2370 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2371 RuleIterator ruleIterator(observedRules, observedRule);
2372 while (
const auto *rulePtr = ruleIterator.next()) {
2373 if (isUnreachable) {
2377 const auto &rule2 = *rulePtr;
2378 if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2382 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2384 if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) {
2385 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2394 if (rule.dynamic == XmlBool::True) {
2395 static const QRegularExpression dynamicPosition(QStringLiteral(R
"(^(?:[^%]*|%(?![1-9]))*)"));
2396 auto result = dynamicPosition.match(rule.string);
2397 s = s.
sliced(0, result.capturedLength());
2399 if (s.
size() + 2 <= rule.string.size()) {
2400 auto tables = CharTableArray(dynamicDetectChars, rule);
2401 updateUnreachable1(tables.find(s.
data()[s.
size() + 2]));
2408 if (rule.type == Context::Rule::Type::RegExpr) {
2409 static const QRegularExpression regularChars(QStringLiteral(R
"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2410 static const QRegularExpression sanitizeChars(QStringLiteral(R
"(\\([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2411 const qsizetype result = regularChars.match(rule.string).capturedLength();
2412 const qsizetype pos = qMin(result, s.
size());
2413 if (rule.string.indexOf(u
'|', pos) < pos) {
2414 sanitizedRegex = rule.string.
sliced(0, qMin(result, s.
size()));
2415 sanitizedRegex.
replace(sanitizeChars, QStringLiteral(
"\\1"));
2424 auto t = CharTableArray(detectChars, rule);
2425 if (rule.insensitive != XmlBool::True) {
2426 updateUnreachable1(t.find(s[0]));
2428 QChar c2[]{s[0].toLower(), s[0].toUpper()};
2433 if (rule.type == Context::Rule::Type::StringDetect && rule.string.size() == 1) {
2434 auto tables = CharTableArray(detectChars, rule);
2435 auto c = rule.string[0];
2436 if (rule.insensitive != XmlBool::True) {
2438 tables.removeNonSpecialWhenSpecial();
2439 tables.append(c, rule);
2442 tables.removeNonSpecialWhenSpecial();
2443 tables.append(c, rule);
2448 if (s.
size() > 0 && !isUnreachable) {
2450 RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2452 RuleIterator ruleIterator(observedRules, observedRule);
2453 while (
const auto *rulePtr = ruleIterator.next()) {
2454 if (isUnreachable) {
2457 const auto &rule2 = *rulePtr;
2458 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2461 switch (rule2.type) {
2463 case Context::Rule::Type::Detect2Chars:
2464 if (isCompatible(rule2) && s.
size() >= 2) {
2465 if (rule.insensitive != XmlBool::True) {
2466 if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2467 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2472 auto set = [&](RuleAndInclude &x,
QChar c1,
QChar c2) {
2473 if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2474 x = {&rule2, ruleIterator.currentIncludeRules()};
2477 set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2478 set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2479 set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2480 set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2482 if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2483 && detect2CharsInsensitives[3]) {
2484 isUnreachable =
true;
2485 unreachableBy.
append(detect2CharsInsensitives[0]);
2486 unreachableBy.
append(detect2CharsInsensitives[1]);
2487 unreachableBy.
append(detect2CharsInsensitives[2]);
2488 unreachableBy.
append(detect2CharsInsensitives[3]);
2495 case Context::Rule::Type::StringDetect:
2496 if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2497 && s.
startsWith(rule2.string, caseSensitivity)) {
2498 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2503 case Context::Rule::Type::WordDetect:
2504 if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2505 && 0 == rule.string.compare(rule2.string, caseSensitivity)) {
2506 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2519 case Context::Rule::Type::keyword: {
2520 RuleIterator ruleIterator(observedRules, observedRule);
2521 while (
const auto *rulePtr = ruleIterator.next()) {
2522 if (isUnreachable) {
2525 const auto &rule2 = *rulePtr;
2526 if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2527 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2539 case Context::Rule::Type::IncludeRules:
2540 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2544 if (
auto &ruleAndInclude = includeContexts[rule.context.context]) {
2545 updateUnreachable1(ruleAndInclude);
2547 ruleAndInclude.rule = &rule;
2550 for (
const auto *rulePtr : rule.includedIncludeRules) {
2551 includeContexts.
insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule});
2554 if (observedRule.includeRules) {
2558 for (
const auto *rulePtr : rule.includedRules) {
2559 const auto &rule2 = *rulePtr;
2560 switch (rule2.type) {
2561 case Context::Rule::Type::AnyChar: {
2562 auto tables = CharTableArray(detectChars, rule2);
2563 tables.removeNonSpecialWhenSpecial();
2564 tables.append(rule2.string, rule2, &rule);
2568 case Context::Rule::Type::DetectChar: {
2569 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2570 auto tables = CharTableArray(chars4, rule2);
2571 tables.removeNonSpecialWhenSpecial();
2572 tables.append(rule2.char0, rule2, &rule);
2576 case Context::Rule::Type::DetectSpaces: {
2577 auto tables = CharTableArray(detectChars, rule2);
2578 tables.removeNonSpecialWhenSpecial();
2579 tables.append(u
' ', rule2, &rule);
2580 tables.append(u
'\t', rule2, &rule);
2584 case Context::Rule::Type::HlCChar:
2585 hlCCharRule.setRule(rule2, &rule);
2588 case Context::Rule::Type::HlCHex:
2589 hlCHexRule.setRule(rule2, &rule);
2592 case Context::Rule::Type::HlCOct:
2593 hlCOctRule.setRule(rule2, &rule);
2596 case Context::Rule::Type::HlCStringChar:
2597 hlCStringCharRule.setRule(rule2, &rule);
2600 case Context::Rule::Type::Int:
2601 intRule.setRule(rule2, &rule);
2604 case Context::Rule::Type::Float:
2605 floatRule.setRule(rule2, &rule);
2608 case Context::Rule::Type::LineContinue: {
2609 auto tables = CharTableArray(lineContinueChars, rule2);
2610 tables.removeNonSpecialWhenSpecial();
2611 tables.append(rule2.char0, rule2, &rule);
2615 case Context::Rule::Type::RegExpr:
2616 if (rule2.isDotRegex) {
2617 dotRegex.append(rule2, &rule);
2621 case Context::Rule::Type::StringDetect: {
2623 if (rule2.string.size() == 1 || (rule2.string.size() == 2 && rule2.dynamic == XmlBool::True)) {
2624 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2625 auto tables = CharTableArray(chars4, rule2);
2626 tables.removeNonSpecialWhenSpecial();
2627 tables.append(rule2.string.back(), rule2, &rule);
2632 case Context::Rule::Type::WordDetect:
2633 case Context::Rule::Type::Detect2Chars:
2634 case Context::Rule::Type::IncludeRules:
2635 case Context::Rule::Type::DetectIdentifier:
2636 case Context::Rule::Type::keyword:
2637 case Context::Rule::Type::Unknown:
2638 case Context::Rule::Type::RangeDetect:
2644 case Context::Rule::Type::Unknown:
2648 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2649 auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2650 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2651 unreachableIncludedRule.unreachableBy.append(unreachableBy);
2653 unreachableIncludedRule.alwaysUnreachable =
false;
2655 }
else if (isUnreachable) {
2659 for (
auto &ruleAndInclude : std::as_const(unreachableBy)) {
2660 message += u
"line "_sv;
2661 if (ruleAndInclude.includeRules) {
2663 message += u
" [by '"_sv;
2664 message += ruleAndInclude.includeRules->context.name;
2665 message += u
"' line "_sv;
2667 if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2668 message += u
" ("_sv;
2669 message += ruleAndInclude.rule->filename;
2676 message += u
", "_sv;
2679 qWarning() << filename <<
"line" << rule.line <<
"unreachable rule by" << message;
2689 bool suggestRuleMerger(
const QString &filename,
const Context &context)
const
2691 bool success =
true;
2693 if (context.rules.isEmpty()) {
2697 auto it = context.rules.begin();
2698 const auto end = context.rules.end() - 1;
2700 for (; it <
end; ++it) {
2701 const auto &rule1 = *it;
2702 const auto &rule2 = it[1];
2704 auto isCommonCompatible = [&] {
2705 if (rule1.lookAhead != rule2.lookAhead) {
2709 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2713 return rule1.beginRegion == rule2.beginRegion
2714 && rule1.endRegion == rule2.endRegion
2715 && rule1.firstNonSpace == rule2.firstNonSpace
2716 && rule1.context.context == rule2.context.context
2717 && rule1.context.popCount == rule2.context.popCount;
2721 switch (rule1.type) {
2723 case Context::Rule::Type::StringDetect:
2724 if (rule1.string.size() != 1 || rule1.dynamic == XmlBool::True) {
2729 case Context::Rule::Type::AnyChar:
2730 case Context::Rule::Type::DetectChar:
2731 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar
2732 || (rule2.type == Context::Rule::Type::StringDetect && rule2.dynamic != XmlBool::True && rule2.string.size() == 1))
2733 && isCommonCompatible() && rule1.column == rule2.column) {
2734 qWarning() << filename <<
"line" << rule2.line <<
"can be merged as AnyChar with the previous rule";
2740 case Context::Rule::Type::RegExpr:
2741 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2742 && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2743 qWarning() << filename <<
"line" << rule2.line <<
"can be merged with the previous rule";
2748 case Context::Rule::Type::DetectSpaces:
2749 case Context::Rule::Type::HlCChar:
2750 case Context::Rule::Type::HlCHex:
2751 case Context::Rule::Type::HlCOct:
2752 case Context::Rule::Type::HlCStringChar:
2753 case Context::Rule::Type::Int:
2754 case Context::Rule::Type::Float:
2755 case Context::Rule::Type::LineContinue:
2756 case Context::Rule::Type::WordDetect:
2757 case Context::Rule::Type::Detect2Chars:
2758 case Context::Rule::Type::IncludeRules:
2759 case Context::Rule::Type::DetectIdentifier:
2760 case Context::Rule::Type::keyword:
2761 case Context::Rule::Type::Unknown:
2762 case Context::Rule::Type::RangeDetect:
2778 void resolveContextName(Definition &definition, Context &context, ContextName &contextName,
int line)
2782 contextName.stay =
true;
2784 contextName.stay =
true;
2786 qWarning() << definition.filename <<
"line" << line <<
"invalid context in" << context.name;
2792 ++contextName.popCount;
2799 qWarning() << definition.filename <<
"line" << line <<
"'!' missing between '#pop' and context name" << context.name;
2807 auto it = definition.contexts.find(
name.toString());
2808 if (it != definition.contexts.end()) {
2809 contextName.context = &*it;
2813 auto it = m_definitions.find(defName.toString());
2814 if (it != m_definitions.end()) {
2815 auto listName =
name.
sliced(0, idx).toString();
2816 definition.referencedDefinitions.insert(&*it);
2817 auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName);
2818 if (ctxIt != it->contexts.end()) {
2819 contextName.context = &*ctxIt;
2822 qWarning() << definition.filename <<
"line" << line <<
"unknown definition in" << context.name;
2827 if (!contextName.context) {
2828 qWarning() << definition.filename <<
"line" << line <<
"unknown context" <<
name <<
"in" << context.name;
2837 Definition *m_currentDefinition =
nullptr;
2838 Keywords *m_currentKeywords =
nullptr;
2839 Context *m_currentContext =
nullptr;
2843 bool m_inKeywordItem =
false;
2845 bool m_success =
true;
2851 HlCompressor(
const QString &kateVersion)
2852 : m_kateVersion(kateVersion)
2854 m_hasElems.push_back(
true);
2857 const QString &compressedXML()
const
2871 void processElement(
const QXmlStreamReader &xml)
2875 closePreviousOpenTag(m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data);
2876 m_hasElems.push_back(
false);
2878 const auto tagName = xml.
name();
2879 if (tagName == u
"contexts"_sv) {
2880 m_inContexts =
true;
2881 m_data += u
"<contexts"_sv;
2882 }
else if (m_inContexts) {
2883 Context &ctx = (m_contexts.empty() || tagName == u
"context"_sv) ? m_contexts.emplace_back() : m_contexts.back();
2884 QString &out = ctx.data;
2885 const bool isDetect2Chars = tagName == u
"Detect2Chars"_sv;
2886 out += u
'<' % (isDetect2Chars ? u
"StringDetect"_sv : tagName);
2889 sortAttributes(attrs);
2890 for (
const auto &attr : attrs) {
2891 const auto attrName = attr.
name();
2892 auto value = attr.
value();
2894 if (isDetect2Chars && (attrName == u
"char"_sv || attrName == u
"char1"_sv)) {
2895 if (attrName == u
"char"_sv) {
2896 const auto ch0 = value;
2897 const auto ch1 = attrs.value(u
"char1"_sv);
2898 QChar chars[]{ch0.isEmpty() ? u
' ' : ch0[0], ch1.isEmpty() ? u
' ' : ch1[0]};
2899 writeXmlAttribute(out, u
"String"_sv, QStringView(chars, 2), tagName);
2901 }
else if (attrName == u
"context"_sv || attrName == u
"lineEndContext"_sv || attrName == u
"fallthroughContext"_sv
2902 || attrName == u
"lineEmptyContext"_sv) {
2904 if (value != u
"#stay"_sv) {
2905 writeXmlAttribute(out, attrName, value, tagName);
2910 bool hasPop =
false;
2911 while (value.startsWith(u
"#pop"_sv)) {
2913 value = value.sliced(4);
2915 if (hasPop && !value.isEmpty()) {
2916 value = value.sliced(1);
2918 if (!value.isEmpty() && -1 == value.indexOf(u
"##"_sv)) {
2919 m_contextRefs[value.toString()]++;
2922 }
else if (tagName == u
"LineContinue"_sv && attrName == u
"char"_sv && value == u
"\\") {
2925 if (attrName == u
"name"_sv) {
2926 ctx.name = value.toString();
2928 writeXmlAttribute(out, attrName, value, tagName);
2932 m_data += u
'<' % tagName;
2934 for (
const auto &attr : attrs) {
2936 auto value = (
name == u
"kateversion") ? QStringView(m_kateVersion) : attr.
value();
2937 writeXmlAttribute(m_data, name, value, tagName);
2945 if (m_inContexts && !m_contexts.empty() && name == u
"contexts"_sv) {
2946 m_inContexts =
false;
2948 std::sort(m_contexts.begin() + 1, m_contexts.end(), [&](
auto &ctx1,
auto &ctx2) {
2949 auto i1 = m_contextRefs.value(ctx1.name);
2950 auto i2 = m_contextRefs.value(ctx2.name);
2955 return ctx1.name < ctx2.name;
2957 for (
const auto &ctx : m_contexts) {
2962 QString &out = m_inContexts && !m_contexts.empty() ? m_contexts.
back().data : m_data;
2963 if (m_hasElems.back()) {
2964 out += u
"</"_sv %
name % u
'>';
2968 m_hasElems.pop_back();
2975 closePreviousOpenTag(m_data);
2976 writeXmlText(m_data, xml.
text());
2985 void closePreviousOpenTag(
QString &out)
2987 if (!m_hasElems.back()) {
2988 m_hasElems.back() =
true;
2998 for (
const QChar &c : text) {
3001 }
else if (c == u
'&') {
3003 }
else if (escapeDQ && c == u
'"') {
3005 }
else if (c == u
'\t') {
3022 enum class DefaultBool {
3039 {u
"fallthrough"_sv, DefaultBool::Ignored},
3040 {u
"dynamic"_sv, DefaultBool::DynamicAttr},
3041 {u
"hidden"_sv, DefaultBool::False},
3042 {u
"indentationsensitive"_sv, DefaultBool::False},
3043 {u
"noIndentationBasedFolding"_sv, DefaultBool::False},
3044 {u
"lookAhead"_sv, DefaultBool::False},
3045 {u
"firstNonSpace"_sv, DefaultBool::False},
3046 {u
"insensitive"_sv, DefaultBool::FalseOrKeywordTag},
3047 {u
"minimal"_sv, DefaultBool::False},
3048 {u
"includeAttrib"_sv, DefaultBool::False},
3049 {u
"italic"_sv, DefaultBool::None},
3050 {u
"bold"_sv, DefaultBool::None},
3051 {u
"underline"_sv, DefaultBool::None},
3052 {u
"strikeOut"_sv, DefaultBool::None},
3053 {u
"spellChecking"_sv, DefaultBool::True},
3054 {u
"casesensitive"_sv, DefaultBool::TrueOrKeywordsTag},
3055 {u
"ignored"_sv, DefaultBool::Ignored},
3058 auto it = booleanAttrs.
find(attrName);
3060 if (it != booleanAttrs.end()) {
3061 bool b = KSyntaxHighlighting::Xml::attrToBool(value);
3062 bool ignoreAttr =
false;
3064 case DefaultBool::Ignored:
3067 case DefaultBool::TrueOrKeywordsTag:
3068 ignoreAttr = (tagName == u
"keywords"_sv) ?
false : b;
3070 case DefaultBool::True:
3073 case DefaultBool::FalseOrKeywordTag:
3074 ignoreAttr = (tagName == u
"keyword"_sv) ?
false : !b;
3076 case DefaultBool::DynamicAttr:
3077 ignoreAttr = (tagName == u
"context"_sv) || !b;
3079 case DefaultBool::False:
3082 case DefaultBool::None:
3087 out += u
' ' % attrName % u
"=\""_sv % (b ? u
'1' : u
'0') % u
'"';
3090 const bool hasDQ = value.
contains(u
'"');
3092 if (!hasDQ || value.
contains(u
'\'')) {
3093 out += u
' ' % attrName % u
"=\""_sv;
3094 writeXmlText(out, value, hasDQ);
3098 out += u
' ' % attrName % u
"='"_sv;
3099 writeXmlText(out, value);
3112 {u
"attribute"_sv, 5},
3118 {u
"noIndentationBasedFolding"_sv, 11},
3119 {u
"lineEndContext"_sv, 9},
3120 {u
"lineEmptyContext"_sv, 8},
3121 {u
"fallthroughContext"_sv, 7},
3124 {u
"lookAhead"_sv, 100},
3125 {u
"firstNonSpace"_sv, 99},
3126 {u
"dynamic"_sv, 98},
3127 {u
"minimal"_sv, 97},
3128 {u
"includeAttrib"_sv, 96},
3129 {u
"insensitive"_sv, 95},
3131 {u
"beginRegion"_sv, 40},
3132 {u
"endRegion"_sv, 41},
3133 {u
"weakDeliminator"_sv, 31},
3134 {u
"additionalDeliminator"_sv, 30},
3135 {u
"context"_sv, 20},
3140 {u
"strikeOut"_sv, 100},
3141 {u
"underline"_sv, 99},
3144 {u
"spellChecking"_sv, 96},
3145 {u
"defStyleNum"_sv, 95},
3147 {u
"backgroundColor"_sv, 93},
3148 {u
"selBackgroundColor"_sv, 92},
3149 {u
"selColor"_sv, 91},
3151 std::sort(attrs.
begin(), attrs.
end(), [](
auto &attr1,
auto &attr2) {
3152 auto i1 = priorityAttrs.value(attr1.name());
3153 auto i2 = priorityAttrs.value(attr2.name());
3157 return attr1.name() < attr2.name();
3165 QString m_data = u
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE language>"_s;
3166 std::vector<Context> m_contexts;
3170 bool m_inContexts =
false;
3173void printFileError(
const QFile &file)
3185 QFile file(fileName);
3187 printFileError(file);
3193 while (!xml.
atEnd()) {
3203 printXmlError(fileName, xml);
3221 if (extensionParts.
isEmpty()) {
3226 for (
const auto &extension : extensionParts) {
3227 for (
const auto c : extension) {
3234 if (c == u
'.' || c == u
'-' || c == u
'_' || c == u
'+') {
3239 if (c == u
'?' || c == u
'*') {
3243 qWarning() <<
"invalid character" << c <<
"seen in extensions wildcard";
3252struct CompressedFile {
3259int main(
int argc,
char *argv[])
3265 if (app.arguments().size() < 4) {
3271 XMLPlatformUtils::Initialize();
3272 auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate);
3277 XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager);
3280 CustomXMLValidator parser(&xsd);
3283 const auto xsdFile = app.arguments().at(2);
3284 if (!parser.loadGrammar((
const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType,
true) || parser.eh.failed()) {
3285 qWarning(
"Failed to parse XSD %s: %s", qPrintable(xsdFile), qPrintable(parser.messages));
3293 const QString hlFilenamesListing = app.arguments().value(3);
3294 if (hlFilenamesListing.
isEmpty()) {
3298 QStringList hlFilenames = readListing(hlFilenamesListing);
3300 qWarning(
"Failed to read %s", qPrintable(hlFilenamesListing));
3305 const QStringList textAttributes =
QStringList() << QStringLiteral(
"name") << QStringLiteral(
"alternativeNames") << QStringLiteral(
"section")
3306 << QStringLiteral(
"mimetype") << QStringLiteral(
"extensions") << QStringLiteral(
"style")
3307 << QStringLiteral(
"author") << QStringLiteral(
"license") << QStringLiteral(
"indenter");
3310 HlFilesChecker filesChecker;
3313 std::vector<CompressedFile> compressedFiles;
3314 for (
const QString &hlFilename : std::as_const(hlFilenames)) {
3315 QFile hlFile(hlFilename);
3317 printFileError(hlFile);
3324 CustomXMLValidator parser(&xsd);
3327 parser.parse((
const char16_t *)hlFile.fileName().utf16());
3330 if (parser.eh.failed()) {
3331 qWarning(
"Failed to validate XML %s: %s", qPrintable(hlFile.fileName()), qPrintable(parser.messages));
3354 for (
const QString &attribute : std::as_const(textAttributes)) {
3359 if (!checkExtensions(hl[QStringLiteral(
"extensions")].
toString())) {
3360 qWarning() << hlFilename <<
"'extensions' wildcards invalid:" << hl[QStringLiteral(
"extensions")].toString();
3372 hl[QStringLiteral(
"nameUtf8")] = hl[QStringLiteral(
"name")].toString().toUtf8();
3373 hl[QStringLiteral(
"sectionUtf8")] = hl[QStringLiteral(
"section")].toString().toUtf8();
3379 const QString hlName = hl[QStringLiteral(
"name")].toString();
3380 const QString hlAlternativeNames = hl[QStringLiteral(
"alternativeNames")].toString();
3382 filesChecker.setDefinition(kateversion, hlFilename, hlName, hlAlternativeNames.
split(u
';',
Qt::SkipEmptyParts));
3387 HlCompressor compressor((filesChecker.currentVersion() < KateVersion{5, 62}) ? u
"5.62"_s : kateversion.
toString());
3388 compressor.processElement(xml);
3391 while (!xml.
atEnd()) {
3393 filesChecker.processElement(xml);
3394 compressor.processElement(xml);
3399 printXmlError(hlFilename, xml);
3402 compressedFiles.emplace_back(CompressedFile{
3404 compressor.compressedXML(),
3408 filesChecker.resolveContexts();
3410 if (!filesChecker.check()) {
3420 HlFilesChecker filesChecker2;
3421 const QString compressedDir = app.arguments().
at(4) + u
"/"_sv;
3422 for (
const auto &compressedFile : std::as_const(compressedFiles)) {
3423 const auto outFileName = compressedDir + compressedFile.fileName;
3424 auto utf8Data = compressedFile.xmlData.
toUtf8();
3428 CustomXMLValidator parser(&xsd);
3430 auto utf8Filename = outFileName.toUtf8();
3431 utf8Filename.append(
'\0');
3433 MemBufInputSource membuf(
reinterpret_cast<const XMLByte *
>(utf8Data.constData()), utf8Data.size(), utf8Filename.data());
3436 if (parser.eh.failed()) {
3437 qWarning(
"Failed to validate XML %s: %s", qPrintable(outFileName), qPrintable(parser.messages));
3446 while (!xml.
atEnd()) {
3447 if (xml.
readNext() == QXmlStreamReader::TokenType::StartElement && xml.
name() == u
"language"_sv) {
3449 const auto version = attrs.value(u
"kateversion"_sv);
3450 const QString hlName = attrs.value(u
"name"_sv).toString();
3451 const QString hlAlternativeNames = attrs.value(u
"alternativeNames"_sv).toString();
3452 filesChecker2.setDefinition(version, outFileName, hlName, hlAlternativeNames.
split(u
';',
Qt::SkipEmptyParts));
3454 filesChecker2.processElement(xml);
3458 printXmlError(outFileName, xml);
3463 QFile outFile(outFileName);
3467 outFile.write(utf8Data);
3470 filesChecker2.resolveContexts();
3473 if (!filesChecker2.check()) {
3478 QFile outFile(app.arguments().at(1));
AKONADI_MIME_EXPORT const char Ignored[]
Type type(const QSqlDatabase &db)
char * toString(const EngineQuery &query)
QAction * end(const QObject *recvr, const char *slot, QObject *parent)
KIOCORE_EXPORT bool operator==(const UDSEntry &entry, const UDSEntry &other)
QString name(const QVariant &location)
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
const QList< QKeySequence > & next()
const QList< QKeySequence > & replace()
KTEXTEDITOR_EXPORT size_t qHash(KTextEditor::Cursor cursor, size_t seed=0) noexcept
bool operator<(const PosRange< Trait > &l, const PosRange< Trait > &r)
NETWORKMANAGERQT_EXPORT QString version()
QCborValue fromVariant(const QVariant &variant)
bool isDigit(char32_t ucs4)
bool isLetter(char32_t ucs4)
char32_t toLower(char32_t ucs4)
char32_t toUpper(char32_t ucs4)
virtual QString fileName() const const override
bool open(FILE *fh, OpenMode mode, FileHandleFlags handleFlags)
QString fileName() const const
iterator find(const Key &key)
QString errorString() const const
void append(QList< T > &&value)
bool isEmpty() const const
void push_back(parameter_type value)
void reserve(qsizetype size)
qsizetype size() const const
iterator find(const Key &key)
iterator insert(const Key &key, const T &value)
QString errorString() const const
bool isValid() const const
QString pattern() const const
qsizetype patternErrorOffset() const const
bool contains(const QSet< T > &other) const const
iterator insert(const T &value)
qsizetype size() const const
QString & append(QChar ch)
const QChar at(qsizetype position) const const
QString fromUtf16(const char16_t *unicode, qsizetype size)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
QString & insert(qsizetype position, QChar ch)
bool isEmpty() const const
QString number(double n, char format, int precision)
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
void reserve(qsizetype size)
qsizetype size() const const
QString sliced(qsizetype pos) const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QByteArray toUtf8() const const
bool contains(QChar c, Qt::CaseSensitivity cs) const const
const_pointer data() const const
QChar first() const const
qsizetype indexOf(QChar c, qsizetype from, Qt::CaseSensitivity cs) const const
bool isNull() const const
qsizetype size() const const
QStringView sliced(qsizetype pos) const const
QList< QStringView > split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar ch) const const
int toInt(bool *ok, int base) const const
QString toString() const const
QTextStream & endl(QTextStream &stream)
QStringView name() const const
QStringView value() const const
QStringView value(QAnyStringView namespaceUri, QAnyStringView name) const const
QXmlStreamAttributes attributes() const const
qint64 characterOffset() const const
QString errorString() const const
bool hasError() const const
bool isCharacters() const const
bool isWhitespace() const const
qint64 lineNumber() const const
QStringView name() const const
bool readNextStartElement()
QStringView text() const const
TokenType tokenType() const const