KFileMetaData

xmlextractor.cpp
1 /*
2  SPDX-FileCopyrightText: 2018 Stefan Brüns <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.1-or-later
5 */
6 
7 
8 #include "xmlextractor.h"
9 #include "kfilemetadata_debug.h"
10 #include "dublincoreextractor.h"
11 
12 #include <QDomDocument>
13 #include <QFile>
14 #include <QXmlStreamReader>
15 
16 namespace {
17 
18 inline QString dcNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); }
19 inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); }
20 inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); }
21 inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); }
22 
23 void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node)
24 {
25  if (node.namespaceURI() != svgNS()) {
26  return;
27  }
28 
29  if ((node.localName() == QLatin1String("g")) ||
30  (node.localName() == QLatin1String("a"))) {
31  QDomElement e = node.firstChildElement();
32  for (; !e.isNull(); e = e.nextSiblingElement()) {
33  extractSvgText(result, e);
34  }
35  } else if (node.localName() == QLatin1String("text")) {
36  qCDebug(KFILEMETADATA_LOG) << node.text();
37  result->append(node.text());
38  }
39 }
40 
41 static const QStringList supportedMimeTypes = {
42  QStringLiteral("application/xml"),
43  QStringLiteral("image/svg+xml"),
44  QStringLiteral("image/svg"),
45 };
46 
47 }
48 
49 namespace KFileMetaData
50 {
51 
52 XmlExtractor::XmlExtractor(QObject* parent)
53  : ExtractorPlugin(parent)
54 {
55 
56 }
57 
58 QStringList XmlExtractor::mimetypes() const
59 {
60  return supportedMimeTypes;
61 }
62 
63 void XmlExtractor::extract(ExtractionResult* result)
64 {
65  auto flags = result->inputFlags();
66  QFile file(result->inputUrl());
67  if (!file.open(QIODevice::ReadOnly)) {
68  qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file";
69  return;
70  }
71 
72  if ((result->inputMimetype() == QLatin1String("image/svg")) ||
73  (result->inputMimetype() == QLatin1String("image/svg+xml"))) {
74  result->addType(Type::Image);
75 
76  QDomDocument doc;
77  const bool processNamespaces = true;
78  doc.setContent(&file, processNamespaces);
79  QDomElement svg = doc.firstChildElement();
80 
81  if (!svg.isNull()
82  && svg.localName() == QLatin1String("svg")
83  && svg.namespaceURI() == svgNS()) {
84 
86  for (; !e.isNull(); e = e.nextSiblingElement()) {
87  if (e.namespaceURI() != svgNS()) {
88  continue;
89  }
90 
91  if (e.localName() == QLatin1String("metadata")) {
92  if (!(flags & ExtractionResult::ExtractMetaData)) {
93  continue;
94  }
95 
96  auto rdf = e.firstChildElement(QLatin1String("RDF"));
97  if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) {
98  continue;
99  }
100 
101  auto cc = rdf.firstChildElement(QLatin1String("Work"));
102  if (cc.isNull() || cc.namespaceURI() != ccNS()) {
103  continue;
104  }
105 
106  DublinCoreExtractor::extract(result, cc);
107 
108  } else if (e.localName() == QLatin1String("defs")) {
109  // skip
110  continue;
111  } else if (flags & ExtractionResult::ExtractPlainText) {
112  // extract
113  extractSvgText(result, e);
114  }
115  }
116  }
117  } else {
118  result->addType(Type::Text);
119 
120  if (flags & ExtractionResult::ExtractPlainText) {
121  QXmlStreamReader stream(&file);
122  while (!stream.atEnd()) {
123  QXmlStreamReader::TokenType token = stream.readNext();
124 
125  if (token == QXmlStreamReader::Characters) {
126  QString text = stream.text().trimmed().toString();
127  if (!text.isEmpty()) {
128  result->append(text);
129  }
130  }
131  }
132  }
133  }
134 }
135 
136 } // namespace KFileMetaData
ExtractionResult(const QString &url, const QString &mimetype=QString(), const Flags &flags=Flags{ExtractPlainText|ExtractMetaData})
Create an ExtractionResult which can be passed be to Extractors.
QString & append(QChar ch)
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
QString namespaceURI() const const
QDomElement nextSiblingElement(const QString &tagName) const const
QString localName() const const
QString text() const const
bool isEmpty() const const
QString trimmed() const const
bool isNull() const const
QDomElement firstChildElement(const QString &tagName) const const
The ExtractionResult class is where all the data extracted by the indexer is saved.
bool setContent(const QByteArray &data, bool namespaceProcessing, QString *errorMsg, int *errorLine, int *errorColumn)
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Wed Jan 20 2021 22:58:09 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.