KFileMetaData

xmlextractor.cpp
1/*
2 SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <stefan.bruens@rwth-aachen.de>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7
8#include "xmlextractor.h"
9#include "kfilemetadata_debug.h"
10#include "dublincoreextractor.h"
11
12#include <QDomDocument>
13#include <QFile>
14#include <QXmlStreamReader>
15
16#ifdef SVG_XML_COMPRESSED_SUPPORT
17#include <KCompressionDevice>
18#endif
19
20namespace {
21
22//inline QString dcElementNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); }
23inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); }
24inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); }
25inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); }
26
27void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node)
28{
29 if (node.namespaceURI() != svgNS()) {
30 return;
31 }
32
33 if ((node.localName() == QLatin1String("g")) ||
34 (node.localName() == QLatin1String("a"))) {
36 for (; !e.isNull(); e = e.nextSiblingElement()) {
37 extractSvgText(result, e);
38 }
39 } else if (node.localName() == QLatin1String("text")) {
40 qCDebug(KFILEMETADATA_LOG) << node.text();
41 result->append(node.text());
42 }
43}
44
45static const QStringList supportedMimeTypes = {
46 QStringLiteral("application/xml"),
47 QStringLiteral("image/svg+xml"),
48 QStringLiteral("image/svg+xml-compressed"),
49 QStringLiteral("image/svg"),
50};
51
52}
53
54namespace KFileMetaData
55{
56
57XmlExtractor::XmlExtractor(QObject* parent)
58 : ExtractorPlugin(parent)
59{
60
61}
62
63QStringList XmlExtractor::mimetypes() const
64{
65 return supportedMimeTypes;
66}
67
68void XmlExtractor::extract(ExtractionResult* result)
69{
70 auto flags = result->inputFlags();
71
72 QFile file(result->inputUrl());
73 if (!file.open(QIODevice::ReadOnly)) {
74 qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file";
75 return;
76 }
77
78
79 if ((result->inputMimetype() == QLatin1String("image/svg")) ||
80 (result->inputMimetype() == QLatin1String("image/svg+xml-compressed")) ||
81 (result->inputMimetype() == QLatin1String("image/svg+xml"))) {
82
83 result->addType(Type::Image);
84
85 QIODevice *ioDevice = &file;
86#ifdef SVG_XML_COMPRESSED_SUPPORT
87 std::unique_ptr<KCompressionDevice> gzReader;
88 if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed")) {
89 gzReader.reset(new KCompressionDevice(&file, false, KCompressionDevice::CompressionType::GZip));
90 if (!gzReader->open(QIODevice::ReadOnly)) {
91 return;
92 }
93 ioDevice = gzReader.get();
94 }
95#else
96 if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed")) {
97 return;
98 }
99#endif
100
101 QDomDocument doc;
102 const bool processNamespaces = true;
103 doc.setContent(ioDevice, processNamespaces);
104 QDomElement svg = doc.firstChildElement();
105
106 if (!svg.isNull()
107 && svg.localName() == QLatin1String("svg")
108 && svg.namespaceURI() == svgNS()) {
109
111 for (; !e.isNull(); e = e.nextSiblingElement()) {
112 if (e.namespaceURI() != svgNS()) {
113 continue;
114 }
115
116 if (e.localName() == QLatin1String("metadata")) {
117 if (!(flags & ExtractionResult::ExtractMetaData)) {
118 continue;
119 }
120
121 auto rdf = e.firstChildElement(QLatin1String("RDF"));
122 if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) {
123 continue;
124 }
125
126 auto cc = rdf.firstChildElement(QLatin1String("Work"));
127 if (cc.isNull() || cc.namespaceURI() != ccNS()) {
128 continue;
129 }
130
131 DublinCoreExtractor::extract(result, cc);
132
133 } else if (e.localName() == QLatin1String("defs")) {
134 // skip
135 continue;
136 } else if (flags & ExtractionResult::ExtractPlainText) {
137 // extract
138 extractSvgText(result, e);
139 }
140 }
141 }
142 } else {
143 result->addType(Type::Text);
144
145 if (flags & ExtractionResult::ExtractPlainText) {
146 QXmlStreamReader stream(&file);
147 while (!stream.atEnd()) {
148 QXmlStreamReader::TokenType token = stream.readNext();
149
150 if (token == QXmlStreamReader::Characters) {
151 QString text = stream.text().trimmed().toString();
152 if (!text.isEmpty()) {
153 result->append(text);
154 }
155 }
156 }
157 }
158 }
159}
160
161} // namespace KFileMetaData
162
163#include "moc_xmlextractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
The KFileMetaData namespace.
ParseResult setContent(QAnyStringView text, ParseOptions options)
QString text() const const
QDomElement firstChildElement(const QString &tagName, const QString &namespaceURI) const const
bool isNull() const const
QString localName() const const
QString namespaceURI() const const
QDomElement nextSiblingElement(const QString &tagName, const QString &namespaceURI) const const
bool isEmpty() const const
QString trimmed() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:48:11 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.