KFileMetaData

xmlextractor.cpp
1/*
2 SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <stefan.bruens@rwth-aachen.de>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7
8#include "xmlextractor.h"
9#include "kfilemetadata_debug.h"
10#include "dublincoreextractor.h"
11
12#include <QDomDocument>
13#include <QFile>
14#include <QXmlStreamReader>
15
16namespace {
17
18//inline QString dcElementNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); }
19inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); }
20inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); }
21inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); }
22
23void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node)
24{
25 if (node.namespaceURI() != svgNS()) {
26 return;
27 }
28
29 if ((node.localName() == QLatin1String("g")) ||
30 (node.localName() == QLatin1String("a"))) {
32 for (; !e.isNull(); e = e.nextSiblingElement()) {
33 extractSvgText(result, e);
34 }
35 } else if (node.localName() == QLatin1String("text")) {
36 qCDebug(KFILEMETADATA_LOG) << node.text();
37 result->append(node.text());
38 }
39}
40
41static const QStringList supportedMimeTypes = {
42 QStringLiteral("application/xml"),
43 QStringLiteral("image/svg+xml"),
44 QStringLiteral("image/svg"),
45};
46
47}
48
49namespace KFileMetaData
50{
51
52XmlExtractor::XmlExtractor(QObject* parent)
53 : ExtractorPlugin(parent)
54{
55
56}
57
58QStringList XmlExtractor::mimetypes() const
59{
60 return supportedMimeTypes;
61}
62
63void XmlExtractor::extract(ExtractionResult* result)
64{
65 auto flags = result->inputFlags();
66 QFile file(result->inputUrl());
67 if (!file.open(QIODevice::ReadOnly)) {
68 qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file";
69 return;
70 }
71
72 if ((result->inputMimetype() == QLatin1String("image/svg")) ||
73 (result->inputMimetype() == QLatin1String("image/svg+xml"))) {
74 result->addType(Type::Image);
75
76 QDomDocument doc;
77 const bool processNamespaces = true;
78 doc.setContent(&file, processNamespaces);
80
81 if (!svg.isNull()
82 && svg.localName() == QLatin1String("svg")
83 && svg.namespaceURI() == svgNS()) {
84
86 for (; !e.isNull(); e = e.nextSiblingElement()) {
87 if (e.namespaceURI() != svgNS()) {
88 continue;
89 }
90
91 if (e.localName() == QLatin1String("metadata")) {
92 if (!(flags & ExtractionResult::ExtractMetaData)) {
93 continue;
94 }
95
96 auto rdf = e.firstChildElement(QLatin1String("RDF"));
97 if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) {
98 continue;
99 }
100
101 auto cc = rdf.firstChildElement(QLatin1String("Work"));
102 if (cc.isNull() || cc.namespaceURI() != ccNS()) {
103 continue;
104 }
105
106 DublinCoreExtractor::extract(result, cc);
107
108 } else if (e.localName() == QLatin1String("defs")) {
109 // skip
110 continue;
111 } else if (flags & ExtractionResult::ExtractPlainText) {
112 // extract
113 extractSvgText(result, e);
114 }
115 }
116 }
117 } else {
118 result->addType(Type::Text);
119
120 if (flags & ExtractionResult::ExtractPlainText) {
121 QXmlStreamReader stream(&file);
122 while (!stream.atEnd()) {
123 QXmlStreamReader::TokenType token = stream.readNext();
124
125 if (token == QXmlStreamReader::Characters) {
126 QString text = stream.text().trimmed().toString();
127 if (!text.isEmpty()) {
128 result->append(text);
129 }
130 }
131 }
132 }
133 }
134}
135
136} // namespace KFileMetaData
137
138#include "moc_xmlextractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
bool setContent(const QByteArray &data, bool namespaceProcessing, QString *errorMsg, int *errorLine, int *errorColumn)
QString text() const const
QDomElement firstChildElement(const QString &tagName) const const
bool isNull() const const
QString localName() const const
QString namespaceURI() const const
QDomElement nextSiblingElement(const QString &tagName) const const
QString & append(QChar ch)
bool isEmpty() const const
QString trimmed() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Sun Feb 25 2024 18:44:24 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.