KFileMetaData

xmlextractor.cpp
1 /*
2  SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.1-or-later
5 */
6 
7 
8 #include "xmlextractor.h"
9 #include "kfilemetadata_debug.h"
10 #include "dublincoreextractor.h"
11 
12 #include <QDomDocument>
13 #include <QFile>
14 #include <QXmlStreamReader>
15 
16 namespace {
17 
18 //inline QString dcElementNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); }
19 inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); }
20 inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); }
21 inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); }
22 
23 void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node)
24 {
25  if (node.namespaceURI() != svgNS()) {
26  return;
27  }
28 
29  if ((node.localName() == QLatin1String("g")) ||
30  (node.localName() == QLatin1String("a"))) {
31  QDomElement e = node.firstChildElement();
32  for (; !e.isNull(); e = e.nextSiblingElement()) {
33  extractSvgText(result, e);
34  }
35  } else if (node.localName() == QLatin1String("text")) {
36  qCDebug(KFILEMETADATA_LOG) << node.text();
37  result->append(node.text());
38  }
39 }
40 
41 static const QStringList supportedMimeTypes = {
42  QStringLiteral("application/xml"),
43  QStringLiteral("image/svg+xml"),
44  QStringLiteral("image/svg"),
45 };
46 
47 }
48 
49 namespace KFileMetaData
50 {
51 
52 XmlExtractor::XmlExtractor(QObject* parent)
53  : ExtractorPlugin(parent)
54 {
55 
56 }
57 
58 QStringList XmlExtractor::mimetypes() const
59 {
60  return supportedMimeTypes;
61 }
62 
63 void XmlExtractor::extract(ExtractionResult* result)
64 {
65  auto flags = result->inputFlags();
66  QFile file(result->inputUrl());
67  if (!file.open(QIODevice::ReadOnly)) {
68  qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file";
69  return;
70  }
71 
72  if ((result->inputMimetype() == QLatin1String("image/svg")) ||
73  (result->inputMimetype() == QLatin1String("image/svg+xml"))) {
74  result->addType(Type::Image);
75 
76  QDomDocument doc;
77  const bool processNamespaces = true;
78  doc.setContent(&file, processNamespaces);
79  QDomElement svg = doc.firstChildElement();
80 
81  if (!svg.isNull()
82  && svg.localName() == QLatin1String("svg")
83  && svg.namespaceURI() == svgNS()) {
84 
86  for (; !e.isNull(); e = e.nextSiblingElement()) {
87  if (e.namespaceURI() != svgNS()) {
88  continue;
89  }
90 
91  if (e.localName() == QLatin1String("metadata")) {
92  if (!(flags & ExtractionResult::ExtractMetaData)) {
93  continue;
94  }
95 
96  auto rdf = e.firstChildElement(QLatin1String("RDF"));
97  if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) {
98  continue;
99  }
100 
101  auto cc = rdf.firstChildElement(QLatin1String("Work"));
102  if (cc.isNull() || cc.namespaceURI() != ccNS()) {
103  continue;
104  }
105 
106  DublinCoreExtractor::extract(result, cc);
107 
108  } else if (e.localName() == QLatin1String("defs")) {
109  // skip
110  continue;
111  } else if (flags & ExtractionResult::ExtractPlainText) {
112  // extract
113  extractSvgText(result, e);
114  }
115  }
116  }
117  } else {
118  result->addType(Type::Text);
119 
120  if (flags & ExtractionResult::ExtractPlainText) {
121  QXmlStreamReader stream(&file);
122  while (!stream.atEnd()) {
123  QXmlStreamReader::TokenType token = stream.readNext();
124 
125  if (token == QXmlStreamReader::Characters) {
126  QString text = stream.text().trimmed().toString();
127  if (!text.isEmpty()) {
128  result->append(text);
129  }
130  }
131  }
132  }
133  }
134 }
135 
136 } // namespace KFileMetaData
QString text() const const
The ExtractionResult class is where all the data extracted by the indexer is saved....
bool isNull() const const
QString trimmed() const const
bool setContent(const QByteArray &data, bool namespaceProcessing, QString *errorMsg, int *errorLine, int *errorColumn)
QString namespaceURI() const const
QString localName() const const
bool isEmpty() const const
QDomElement nextSiblingElement(const QString &tagName) const const
QDomElement firstChildElement(const QString &tagName) const const
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
QString & append(QChar ch)
This file is part of the KDE documentation.
Documentation copyright © 1996-2022 The KDE developers.
Generated on Fri May 27 2022 03:47:54 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.