KFileMetaData

xmlextractor.cpp
1 /*
2  Copyright (C) 2018 Stefan Brüns <[email protected]>
3 
4  This library is free software; you can redistribute it and/or
5  modify it under the terms of the GNU Lesser General Public
6  License as published by the Free Software Foundation; either
7  version 2.1 of the License, or (at your option) any later version.
8 
9  This library is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  Lesser General Public License for more details.
13 
14  You should have received a copy of the GNU Lesser General Public
15  License along with this library; if not, write to the Free Software
16  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 
19 
20 #include "xmlextractor.h"
21 #include "kfilemetadata_debug.h"
22 #include "dublincoreextractor.h"
23 
24 #include <QDomDocument>
25 #include <QFile>
26 #include <QXmlStreamReader>
27 
28 namespace {
29 
30 inline QString dcNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); }
31 inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); }
32 inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); }
33 inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); }
34 
35 void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node)
36 {
37  if (node.namespaceURI() != svgNS()) {
38  return;
39  }
40 
41  if ((node.localName() == QLatin1String("g")) ||
42  (node.localName() == QLatin1String("a"))) {
43  QDomElement e = node.firstChildElement();
44  for (; !e.isNull(); e = e.nextSiblingElement()) {
45  extractSvgText(result, e);
46  }
47  } else if (node.localName() == QLatin1String("text")) {
48  qCDebug(KFILEMETADATA_LOG) << node.text();
49  result->append(node.text());
50  }
51 }
52 
53 static const QStringList supportedMimeTypes = {
54  QStringLiteral("application/xml"),
55  QStringLiteral("image/svg+xml"),
56  QStringLiteral("image/svg"),
57 };
58 
59 }
60 
61 namespace KFileMetaData
62 {
63 
64 XmlExtractor::XmlExtractor(QObject* parent)
65  : ExtractorPlugin(parent)
66 {
67 
68 }
69 
70 QStringList XmlExtractor::mimetypes() const
71 {
72  return supportedMimeTypes;
73 }
74 
75 void XmlExtractor::extract(ExtractionResult* result)
76 {
77  auto flags = result->inputFlags();
78  QFile file(result->inputUrl());
79  if (!file.open(QIODevice::ReadOnly)) {
80  qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file";
81  return;
82  }
83 
84  if ((result->inputMimetype() == QLatin1String("image/svg")) ||
85  (result->inputMimetype() == QLatin1String("image/svg+xml"))) {
86  result->addType(Type::Image);
87 
88  QDomDocument doc;
89  const bool processNamespaces = true;
90  doc.setContent(&file, processNamespaces);
91  QDomElement svg = doc.firstChildElement();
92 
93  if (!svg.isNull()
94  && svg.localName() == QLatin1String("svg")
95  && svg.namespaceURI() == svgNS()) {
96 
98  for (; !e.isNull(); e = e.nextSiblingElement()) {
99  if (e.namespaceURI() != svgNS()) {
100  continue;
101  }
102 
103  if (e.localName() == QLatin1String("metadata")) {
104  if (!(flags & ExtractionResult::ExtractMetaData)) {
105  continue;
106  }
107 
108  auto rdf = e.firstChildElement(QLatin1String("RDF"));
109  if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) {
110  continue;
111  }
112 
113  auto cc = rdf.firstChildElement(QLatin1String("Work"));
114  if (cc.isNull() || cc.namespaceURI() != ccNS()) {
115  continue;
116  }
117 
118  DublinCoreExtractor::extract(result, cc);
119 
120  } else if (e.localName() == QLatin1String("defs")) {
121  // skip
122  continue;
123  } else if (flags & ExtractionResult::ExtractPlainText) {
124  // extract
125  extractSvgText(result, e);
126  }
127  }
128  }
129  } else {
130  result->addType(Type::Text);
131 
132  if (flags & ExtractionResult::ExtractPlainText) {
133  QXmlStreamReader stream(&file);
134  while (!stream.atEnd()) {
135  QXmlStreamReader::TokenType token = stream.readNext();
136 
137  if (token == QXmlStreamReader::Characters) {
138  QString text = stream.text().trimmed().toString();
139  if (!text.isEmpty()) {
140  result->append(text);
141  }
142  }
143  }
144  }
145  }
146 }
147 
148 } // namespace KFileMetaData
QString & append(QChar ch)
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
QString namespaceURI() const const
QDomElement nextSiblingElement(const QString &tagName) const const
QString localName() const const
QString text() const const
bool isEmpty() const const
QString trimmed() const const
bool isNull() const const
QDomElement firstChildElement(const QString &tagName) const const
ExtractionResult(const QString &url, const QString &mimetype=QString(), const Flags &flags=ExtractEverything)
Create an ExtractionResult which can be passed be to Extractors.
The ExtractionResult class is where all the data extracted by the indexer is saved.
bool setContent(const QByteArray &data, bool namespaceProcessing, QString *errorMsg, int *errorLine, int *errorColumn)
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Tue Jun 2 2020 22:55:51 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.