KFileMetaData

fb2extractor.cpp
1/*
2 SPDX-FileCopyrightText: 2022 Kai Uwe Broulik <kde@broulik.de>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7#include "datetimeparser_p.h"
8#include "fb2extractor.h"
9#include "kfilemetadata_debug.h"
10
11#include <QDateTime>
12#include <QFile>
13#include <QXmlStreamReader>
14
15#include <KZip>
16
17#include <memory>
18
19using namespace KFileMetaData;
20
21Fb2Extractor::Fb2Extractor(QObject *parent)
22 : ExtractorPlugin(parent)
23{
24}
25
26namespace
27{
28static const QString regularMimeType()
29{
30 return QStringLiteral("application/x-fictionbook+xml");
31}
32
33static const QString compressedMimeType()
34{
35 return QStringLiteral("application/x-zip-compressed-fb2");
36}
37
38static const QStringList supportedMimeTypes = {regularMimeType(), compressedMimeType()};
39
40}
41
42QStringList Fb2Extractor::mimetypes() const
43{
44 return supportedMimeTypes;
45}
46
47void Fb2Extractor::extract(ExtractionResult *result)
48{
49 std::unique_ptr<QIODevice> device;
50 std::unique_ptr<KZip> zip;
51
52 if (result->inputMimetype() == regularMimeType()) {
53 device.reset(new QFile(result->inputUrl()));
54 if (!device->open(QIODevice::ReadOnly | QIODevice::Text)) {
55 return;
56 }
57
58 } else if (result->inputMimetype() == compressedMimeType()) {
59 zip.reset(new KZip(result->inputUrl()));
60 if (!zip->open(QIODevice::ReadOnly)) {
61 qCDebug(KFILEMETADATA_LOG) << "Failed to open" << zip->fileName() << "-" << zip->errorString();
62 return;
63 }
64
65 const auto entries = zip->directory()->entries();
66 if (entries.count() != 1) {
67 return;
68 }
69
70 const QString entryPath = entries.first();
71 if (!entryPath.endsWith(QLatin1String(".fb2"))) {
72 return;
73 }
74
75 const auto *entry = zip->directory()->file(entryPath);
76 if (!entry) {
77 return;
78 }
79
80 device.reset(entry->createDevice());
81 }
82
83 result->addType(Type::Document);
84
85 QXmlStreamReader xml(device.get());
86
87 bool inFictionBook = false;
88 bool inDescription = false;
89 bool inTitleInfo = false;
90 bool inAuthor = false;
91 bool inDocumentInfo = false;
92 bool inPublishInfo = false;
93 bool inBody = false;
94
95 QString authorFirstName;
96 QString authorMiddleName;
97 QString authorLastName;
98 QString authorNickName;
99
100 while (!xml.atEnd() && !xml.hasError()) {
101 xml.readNext();
102
103 if (xml.name() == QLatin1String("FictionBook")) {
104 if (xml.isStartElement()) {
105 inFictionBook = true;
106 } else if (xml.isEndElement()) {
107 break;
108 }
109 } else if (xml.name() == QLatin1String("description")) {
110 if (xml.isStartElement()) {
111 inDescription = true;
112 } else if (xml.isEndElement()) {
113 inDescription = false;
114 }
115 } else if (xml.name() == QLatin1String("title-info")) {
116 if (xml.isStartElement()) {
117 inTitleInfo = true;
118 } else if (xml.isEndElement()) {
119 inTitleInfo = false;
120 }
121 } else if (xml.name() == QLatin1String("document-info")) {
122 if (xml.isStartElement()) {
123 inDocumentInfo = true;
124 } else if (xml.isEndElement()) {
125 inDocumentInfo = false;
126 }
127 } else if (xml.name() == QLatin1String("publish-info")) {
128 if (xml.isStartElement()) {
129 inPublishInfo = true;
130 } else if (xml.isEndElement()) {
131 inPublishInfo = false;
132 }
133 } else if (xml.name() == QLatin1String("body")) {
134 if (xml.isStartElement()) {
135 inBody = true;
136 } else if (xml.isEndElement()) {
137 inBody = false;
138 }
139 }
140
141 if (!inFictionBook) {
142 continue;
143 }
144
145 if (inDescription && result->inputFlags() & ExtractionResult::ExtractMetaData) {
146 if (inTitleInfo) {
147 if (xml.isStartElement()) {
148 if (xml.name() == QLatin1String("author")) {
149 inAuthor = true;
150 } else if (inAuthor) {
151 if (xml.name() == QLatin1String("first-name")) {
152 authorFirstName = xml.readElementText();
153 } else if (xml.name() == QLatin1String("middle-name")) {
154 authorMiddleName = xml.readElementText();
155 } else if (xml.name() == QLatin1String("last-name")) {
156 authorLastName = xml.readElementText();
157 } else if (xml.name() == QLatin1String("nickname")) {
158 authorNickName = xml.readElementText();
159 }
160 } else if (xml.name() == QLatin1String("book-title")) {
161 result->add(Property::Title, xml.readElementText());
162 } else if (xml.name() == QLatin1String("annotation")) {
163 result->add(Property::Description, xml.readElementText(QXmlStreamReader::IncludeChildElements).trimmed());
164 } else if (xml.name() == QLatin1String("lang")) {
165 result->add(Property::Language, xml.readElementText());
166 } else if (xml.name() == QLatin1String("genre")) {
167 result->add(Property::Genre, xml.readElementText());
168 }
169 } else if (xml.isEndElement()) {
170 inAuthor = false;
171
172 QStringList nameParts = {authorFirstName, authorMiddleName, authorLastName};
173 nameParts.removeAll(QString());
174
175 if (!nameParts.isEmpty()) {
176 result->add(Property::Author, nameParts.join(QLatin1Char(' ')));
177 } else if (!authorNickName.isEmpty()) {
178 result->add(Property::Author, authorNickName);
179 }
180
181 authorFirstName.clear();
182 authorMiddleName.clear();
183 authorLastName.clear();
184 authorNickName.clear();
185 }
186 } else if (inDocumentInfo) {
187 if (xml.name() == QLatin1String("date")) {
188 // Date can be "not exact" but date "value", if present, is an xs:date
189 const auto dateValue = xml.attributes().value(QLatin1String("value"));
190 QDateTime dt = QDateTime::fromString(dateValue.toString());
191
192 if (!dt.isValid()) {
193 dt = Parser::dateTimeFromString(xml.readElementText());
194 }
195
196 if (dt.isValid()) {
197 result->add(Property::CreationDate, dt);
198 }
199 } else if (xml.name() == QLatin1String("program-used")) {
200 result->add(Property::Generator, xml.readElementText());
201 // "Owner of the fb2 document copyrights"
202 } else if (xml.name() == QLatin1String("publisher")) {
203 result->add(Property::Copyright, xml.readElementText());
204 }
205 } else if (inPublishInfo) {
206 if (xml.name() == QLatin1String("publisher")) {
207 result->add(Property::Publisher, xml.readElementText());
208 } else if (xml.name() == QLatin1String("year")) {
209 bool ok;
210 const int releaseYear = xml.readElementText().toInt(&ok);
211 if (ok) {
212 result->add(Property::ReleaseYear, releaseYear);
213 }
214 }
215 }
216 } else if (inBody && result->inputFlags() & ExtractionResult::ExtractPlainText && xml.isCharacters() && !xml.isWhitespace()) {
217 result->append(xml.text().toString());
218 }
219 }
220}
221
222#include "moc_fb2extractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
QString inputUrl() const
The input URL which the plugins will use to locate the file.
virtual void addType(Type::Type type)=0
This function is called by the plugins.
QString inputMimetype() const
The input MIME type.
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
The ExtractorPlugin is the base class for all file metadata extractors.
@ Title
Refers to the Title of the content of the file.
Definition properties.h:121
@ Author
The Author field indicated the primary creator of a document.
Definition properties.h:114
@ Genre
The Genre of an Audio file.
Definition properties.h:52
@ Description
Represents the description stored in the file.
Definition properties.h:351
@ Generator
Refers to the Application used to create this file.
Definition properties.h:134
@ CreationDate
The date the content of the file was created.
Definition properties.h:177
@ Publisher
The publisher of the content.
Definition properties.h:169
@ Language
The language the document is written in.
Definition properties.h:159
@ Copyright
The copyright of the file.
Definition properties.h:164
@ ReleaseYear
Indicates the year a track was released.
Definition properties.h:71
@ Document
Any file which counts as a document.
Definition types.h:63
The KFileMetaData namespace.
QDateTime fromString(QStringView string, QStringView format, QCalendar cal)
bool isValid() const const
bool isEmpty() const const
qsizetype removeAll(const AT &t)
void clear()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString join(QChar separator) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Apr 18 2025 12:03:48 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.