KFileMetaData

xmlextractor.cpp
1/*
2 SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <stefan.bruens@rwth-aachen.de>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7
8#include "xmlextractor.h"
9#include "kfilemetadata_debug.h"
10#include "dublincoreextractor.h"
11
12#include <QDomDocument>
13#include <QFile>
14#include <QXmlStreamReader>
15
16namespace {
17
18//inline QString dcElementNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); }
19inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); }
20inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); }
21inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); }
22
23void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node)
24{
25 if (node.namespaceURI() != svgNS()) {
26 return;
27 }
28
29 if ((node.localName() == QLatin1String("g")) ||
30 (node.localName() == QLatin1String("a"))) {
32 for (; !e.isNull(); e = e.nextSiblingElement()) {
33 extractSvgText(result, e);
34 }
35 } else if (node.localName() == QLatin1String("text")) {
36 qCDebug(KFILEMETADATA_LOG) << node.text();
37 result->append(node.text());
38 }
39}
40
41static const QStringList supportedMimeTypes = {
42 QStringLiteral("application/xml"),
43 QStringLiteral("image/svg+xml"),
44 QStringLiteral("image/svg"),
45};
46
47}
48
49namespace KFileMetaData
50{
51
52XmlExtractor::XmlExtractor(QObject* parent)
53 : ExtractorPlugin(parent)
54{
55
56}
57
58QStringList XmlExtractor::mimetypes() const
59{
60 return supportedMimeTypes;
61}
62
63void XmlExtractor::extract(ExtractionResult* result)
64{
65 auto flags = result->inputFlags();
66 QFile file(result->inputUrl());
67 if (!file.open(QIODevice::ReadOnly)) {
68 qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file";
69 return;
70 }
71
72 if ((result->inputMimetype() == QLatin1String("image/svg")) ||
73 (result->inputMimetype() == QLatin1String("image/svg+xml"))) {
74 result->addType(Type::Image);
75
76 QDomDocument doc;
77 const bool processNamespaces = true;
78 doc.setContent(&file, processNamespaces);
80
81 if (!svg.isNull()
82 && svg.localName() == QLatin1String("svg")
83 && svg.namespaceURI() == svgNS()) {
84
86 for (; !e.isNull(); e = e.nextSiblingElement()) {
87 if (e.namespaceURI() != svgNS()) {
88 continue;
89 }
90
91 if (e.localName() == QLatin1String("metadata")) {
92 if (!(flags & ExtractionResult::ExtractMetaData)) {
93 continue;
94 }
95
96 auto rdf = e.firstChildElement(QLatin1String("RDF"));
97 if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) {
98 continue;
99 }
100
101 auto cc = rdf.firstChildElement(QLatin1String("Work"));
102 if (cc.isNull() || cc.namespaceURI() != ccNS()) {
103 continue;
104 }
105
106 DublinCoreExtractor::extract(result, cc);
107
108 } else if (e.localName() == QLatin1String("defs")) {
109 // skip
110 continue;
111 } else if (flags & ExtractionResult::ExtractPlainText) {
112 // extract
113 extractSvgText(result, e);
114 }
115 }
116 }
117 } else {
118 result->addType(Type::Text);
119
120 if (flags & ExtractionResult::ExtractPlainText) {
121 QXmlStreamReader stream(&file);
122 while (!stream.atEnd()) {
123 QXmlStreamReader::TokenType token = stream.readNext();
124
125 if (token == QXmlStreamReader::Characters) {
126 QString text = stream.text().trimmed().toString();
127 if (!text.isEmpty()) {
128 result->append(text);
129 }
130 }
131 }
132 }
133 }
134}
135
136} // namespace KFileMetaData
137
138#include "moc_xmlextractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
ParseResult setContent(QAnyStringView text, ParseOptions options)
QString text() const const
QDomElement firstChildElement(const QString &tagName, const QString &namespaceURI) const const
bool isNull() const const
QString localName() const const
QString namespaceURI() const const
QDomElement nextSiblingElement(const QString &tagName, const QString &namespaceURI) const const
QString & append(QChar ch)
bool isEmpty() const const
QString trimmed() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:17:54 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.