KFileMetaData

fb2extractor.cpp
1 /*
2  SPDX-FileCopyrightText: 2022 Kai Uwe Broulik <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.1-or-later
5 */
6 
7 #include "fb2extractor.h"
8 #include "kfilemetadata_debug.h"
9 
10 #include <QDateTime>
11 #include <QFile>
12 #include <QXmlStreamReader>
13 
14 #include <KZip>
15 
16 #include <memory>
17 
18 using namespace KFileMetaData;
19 
20 Fb2Extractor::Fb2Extractor(QObject *parent)
21  : ExtractorPlugin(parent)
22 {
23 }
24 
25 namespace
26 {
27 static const QString regularMimeType()
28 {
29  return QStringLiteral("application/x-fictionbook+xml");
30 }
31 
32 static const QString compressedMimeType()
33 {
34  return QStringLiteral("application/x-zip-compressed-fb2");
35 }
36 
37 static const QStringList supportedMimeTypes = {regularMimeType(), compressedMimeType()};
38 
39 }
40 
41 QStringList Fb2Extractor::mimetypes() const
42 {
43  return supportedMimeTypes;
44 }
45 
46 void Fb2Extractor::extract(ExtractionResult *result)
47 {
48  std::unique_ptr<QIODevice> device;
49  std::unique_ptr<KZip> zip;
50 
51  if (result->inputMimetype() == regularMimeType()) {
52  device.reset(new QFile(result->inputUrl()));
53  if (!device->open(QIODevice::ReadOnly | QIODevice::Text)) {
54  return;
55  }
56 
57  } else if (result->inputMimetype() == compressedMimeType()) {
58  zip.reset(new KZip(result->inputUrl()));
59  if (!zip->open(QIODevice::ReadOnly)) {
60  return;
61  }
62 
63  const auto entries = zip->directory()->entries();
64  if (entries.count() != 1) {
65  return;
66  }
67 
68  const QString entryPath = entries.first();
69  if (!entryPath.endsWith(QLatin1String(".fb2"))) {
70  return;
71  }
72 
73  const auto *entry = zip->directory()->file(entryPath);
74  if (!entry) {
75  return;
76  }
77 
78  device.reset(entry->createDevice());
79  }
80 
81  result->addType(Type::Document);
82 
83  QXmlStreamReader xml(device.get());
84 
85  bool inFictionBook = false;
86  bool inDescription = false;
87  bool inTitleInfo = false;
88  bool inAuthor = false;
89  bool inDocumentInfo = false;
90  bool inPublishInfo = false;
91  bool inBody = false;
92 
93  QString authorFirstName;
94  QString authorMiddleName;
95  QString authorLastName;
96  QString authorNickName;
97 
98  while (!xml.atEnd() && !xml.hasError()) {
99  xml.readNext();
100 
101  if (xml.name() == QLatin1String("FictionBook")) {
102  if (xml.isStartElement()) {
103  inFictionBook = true;
104  } else if (xml.isEndElement()) {
105  break;
106  }
107  } else if (xml.name() == QLatin1String("description")) {
108  if (xml.isStartElement()) {
109  inDescription = true;
110  } else if (xml.isEndElement()) {
111  inDescription = false;
112  }
113  } else if (xml.name() == QLatin1String("title-info")) {
114  if (xml.isStartElement()) {
115  inTitleInfo = true;
116  } else if (xml.isEndElement()) {
117  inTitleInfo = false;
118  }
119  } else if (xml.name() == QLatin1String("document-info")) {
120  if (xml.isStartElement()) {
121  inDocumentInfo = true;
122  } else if (xml.isEndElement()) {
123  inDocumentInfo = false;
124  }
125  } else if (xml.name() == QLatin1String("publish-info")) {
126  if (xml.isStartElement()) {
127  inPublishInfo = true;
128  } else if (xml.isEndElement()) {
129  inPublishInfo = false;
130  }
131  } else if (xml.name() == QLatin1String("body")) {
132  if (xml.isStartElement()) {
133  inBody = true;
134  } else if (xml.isEndElement()) {
135  inBody = false;
136  }
137  }
138 
139  if (!inFictionBook) {
140  continue;
141  }
142 
143  if (inDescription && result->inputFlags() & ExtractionResult::ExtractMetaData) {
144  if (inTitleInfo) {
145  if (xml.isStartElement()) {
146  if (xml.name() == QLatin1String("author")) {
147  inAuthor = true;
148  } else if (inAuthor) {
149  if (xml.name() == QLatin1String("first-name")) {
150  authorFirstName = xml.readElementText();
151  } else if (xml.name() == QLatin1String("middle-name")) {
152  authorMiddleName = xml.readElementText();
153  } else if (xml.name() == QLatin1String("last-name")) {
154  authorLastName = xml.readElementText();
155  } else if (xml.name() == QLatin1String("nickname")) {
156  authorNickName = xml.readElementText();
157  }
158  } else if (xml.name() == QLatin1String("book-title")) {
159  result->add(Property::Title, xml.readElementText());
160  } else if (xml.name() == QLatin1String("annotation")) {
161  result->add(Property::Description, xml.readElementText(QXmlStreamReader::IncludeChildElements).trimmed());
162  } else if (xml.name() == QLatin1String("lang")) {
163  result->add(Property::Language, xml.readElementText());
164  } else if (xml.name() == QLatin1String("genre")) {
165  result->add(Property::Genre, xml.readElementText());
166  }
167  } else if (xml.isEndElement()) {
168  inAuthor = false;
169 
170  QStringList nameParts = {authorFirstName, authorMiddleName, authorLastName};
171  nameParts.removeAll(QString());
172 
173  if (!nameParts.isEmpty()) {
174  result->add(Property::Author, nameParts.join(QLatin1Char(' ')));
175  } else if (!authorNickName.isEmpty()) {
176  result->add(Property::Author, authorNickName);
177  }
178 
179  authorFirstName.clear();
180  authorMiddleName.clear();
181  authorLastName.clear();
182  authorNickName.clear();
183  }
184  } else if (inDocumentInfo) {
185  if (xml.name() == QLatin1String("date")) {
186  // Date can be "not exact" but date "value", if present, is an xs:date
187  const auto dateValue = xml.attributes().value(QLatin1String("value"));
188  QDateTime dt = QDateTime::fromString(dateValue.toString());
189 
190  if (!dt.isValid()) {
191  dt = ExtractorPlugin::dateTimeFromString(xml.readElementText());
192  }
193 
194  if (dt.isValid()) {
195  result->add(Property::CreationDate, dt);
196  }
197  } else if (xml.name() == QLatin1String("program-used")) {
198  result->add(Property::Generator, xml.readElementText());
199  // "Owner of the fb2 document copyrights"
200  } else if (xml.name() == QLatin1String("publisher")) {
201  result->add(Property::Copyright, xml.readElementText());
202  }
203  } else if (inPublishInfo) {
204  if (xml.name() == QLatin1String("publisher")) {
205  result->add(Property::Publisher, xml.readElementText());
206  } else if (xml.name() == QLatin1String("year")) {
207  bool ok;
208  const int releaseYear = xml.readElementText().toInt(&ok);
209  if (ok) {
210  result->add(Property::ReleaseYear, releaseYear);
211  }
212  }
213  }
214  } else if (inBody && result->inputFlags() & ExtractionResult::ExtractPlainText && xml.isCharacters() && !xml.isWhitespace()) {
215  result->append(xml.text().toString());
216  }
217  }
218 }
219 
220 #include "moc_fb2extractor.cpp"
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const const
static QDateTime dateTimeFromString(const QString &dateString)
Tries to extract a valid date time from the string provided.
virtual void addType(Type::Type type)=0
This function is called by the plugins.
The ExtractionResult class is where all the data extracted by the indexer is saved....
int removeAll(const T &value)
QString inputMimetype() const
The input mimetype.
void clear()
QString inputUrl() const
The input url which the plugins will use to locate the file.
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
bool isEmpty() const const
bool isEmpty() const const
QDateTime fromString(const QString &string, Qt::DateFormat format)
QString join(const QString &separator) const const
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
bool isValid() const const
The ExtractorPlugin is the base class for all file metadata extractors. It is responsible for extract...
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Thu Sep 21 2023 03:48:21 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.