KFileMetaData

fb2extractor.cpp
1/*
2 SPDX-FileCopyrightText: 2022 Kai Uwe Broulik <kde@broulik.de>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7#include "datetimeparser_p.h"
8#include "fb2extractor.h"
9#include "kfilemetadata_debug.h"
10
11#include <QDateTime>
12#include <QFile>
13#include <QXmlStreamReader>
14
15#include <KZip>
16
17#include <memory>
18
19using namespace KFileMetaData;
20
21Fb2Extractor::Fb2Extractor(QObject *parent)
22 : ExtractorPlugin(parent)
23{
24}
25
26namespace
27{
28static const QString regularMimeType()
29{
30 return QStringLiteral("application/x-fictionbook+xml");
31}
32
33static const QString compressedMimeType()
34{
35 return QStringLiteral("application/x-zip-compressed-fb2");
36}
37
38static const QStringList supportedMimeTypes = {regularMimeType(), compressedMimeType()};
39
40}
41
42QStringList Fb2Extractor::mimetypes() const
43{
44 return supportedMimeTypes;
45}
46
47void Fb2Extractor::extract(ExtractionResult *result)
48{
49 std::unique_ptr<QIODevice> device;
50 std::unique_ptr<KZip> zip;
51
52 if (result->inputMimetype() == regularMimeType()) {
53 device.reset(new QFile(result->inputUrl()));
54 if (!device->open(QIODevice::ReadOnly | QIODevice::Text)) {
55 return;
56 }
57
58 } else if (result->inputMimetype() == compressedMimeType()) {
59 zip.reset(new KZip(result->inputUrl()));
60 if (!zip->open(QIODevice::ReadOnly)) {
61 return;
62 }
63
64 const auto entries = zip->directory()->entries();
65 if (entries.count() != 1) {
66 return;
67 }
68
69 const QString entryPath = entries.first();
70 if (!entryPath.endsWith(QLatin1String(".fb2"))) {
71 return;
72 }
73
74 const auto *entry = zip->directory()->file(entryPath);
75 if (!entry) {
76 return;
77 }
78
79 device.reset(entry->createDevice());
80 }
81
82 result->addType(Type::Document);
83
84 QXmlStreamReader xml(device.get());
85
86 bool inFictionBook = false;
87 bool inDescription = false;
88 bool inTitleInfo = false;
89 bool inAuthor = false;
90 bool inDocumentInfo = false;
91 bool inPublishInfo = false;
92 bool inBody = false;
93
94 QString authorFirstName;
95 QString authorMiddleName;
96 QString authorLastName;
97 QString authorNickName;
98
99 while (!xml.atEnd() && !xml.hasError()) {
100 xml.readNext();
101
102 if (xml.name() == QLatin1String("FictionBook")) {
103 if (xml.isStartElement()) {
104 inFictionBook = true;
105 } else if (xml.isEndElement()) {
106 break;
107 }
108 } else if (xml.name() == QLatin1String("description")) {
109 if (xml.isStartElement()) {
110 inDescription = true;
111 } else if (xml.isEndElement()) {
112 inDescription = false;
113 }
114 } else if (xml.name() == QLatin1String("title-info")) {
115 if (xml.isStartElement()) {
116 inTitleInfo = true;
117 } else if (xml.isEndElement()) {
118 inTitleInfo = false;
119 }
120 } else if (xml.name() == QLatin1String("document-info")) {
121 if (xml.isStartElement()) {
122 inDocumentInfo = true;
123 } else if (xml.isEndElement()) {
124 inDocumentInfo = false;
125 }
126 } else if (xml.name() == QLatin1String("publish-info")) {
127 if (xml.isStartElement()) {
128 inPublishInfo = true;
129 } else if (xml.isEndElement()) {
130 inPublishInfo = false;
131 }
132 } else if (xml.name() == QLatin1String("body")) {
133 if (xml.isStartElement()) {
134 inBody = true;
135 } else if (xml.isEndElement()) {
136 inBody = false;
137 }
138 }
139
140 if (!inFictionBook) {
141 continue;
142 }
143
144 if (inDescription && result->inputFlags() & ExtractionResult::ExtractMetaData) {
145 if (inTitleInfo) {
146 if (xml.isStartElement()) {
147 if (xml.name() == QLatin1String("author")) {
148 inAuthor = true;
149 } else if (inAuthor) {
150 if (xml.name() == QLatin1String("first-name")) {
151 authorFirstName = xml.readElementText();
152 } else if (xml.name() == QLatin1String("middle-name")) {
153 authorMiddleName = xml.readElementText();
154 } else if (xml.name() == QLatin1String("last-name")) {
155 authorLastName = xml.readElementText();
156 } else if (xml.name() == QLatin1String("nickname")) {
157 authorNickName = xml.readElementText();
158 }
159 } else if (xml.name() == QLatin1String("book-title")) {
160 result->add(Property::Title, xml.readElementText());
161 } else if (xml.name() == QLatin1String("annotation")) {
162 result->add(Property::Description, xml.readElementText(QXmlStreamReader::IncludeChildElements).trimmed());
163 } else if (xml.name() == QLatin1String("lang")) {
164 result->add(Property::Language, xml.readElementText());
165 } else if (xml.name() == QLatin1String("genre")) {
166 result->add(Property::Genre, xml.readElementText());
167 }
168 } else if (xml.isEndElement()) {
169 inAuthor = false;
170
171 QStringList nameParts = {authorFirstName, authorMiddleName, authorLastName};
172 nameParts.removeAll(QString());
173
174 if (!nameParts.isEmpty()) {
175 result->add(Property::Author, nameParts.join(QLatin1Char(' ')));
176 } else if (!authorNickName.isEmpty()) {
177 result->add(Property::Author, authorNickName);
178 }
179
180 authorFirstName.clear();
181 authorMiddleName.clear();
182 authorLastName.clear();
183 authorNickName.clear();
184 }
185 } else if (inDocumentInfo) {
186 if (xml.name() == QLatin1String("date")) {
187 // Date can be "not exact" but date "value", if present, is an xs:date
188 const auto dateValue = xml.attributes().value(QLatin1String("value"));
189 QDateTime dt = QDateTime::fromString(dateValue.toString());
190
191 if (!dt.isValid()) {
192 dt = Parser::dateTimeFromString(xml.readElementText());
193 }
194
195 if (dt.isValid()) {
196 result->add(Property::CreationDate, dt);
197 }
198 } else if (xml.name() == QLatin1String("program-used")) {
199 result->add(Property::Generator, xml.readElementText());
200 // "Owner of the fb2 document copyrights"
201 } else if (xml.name() == QLatin1String("publisher")) {
202 result->add(Property::Copyright, xml.readElementText());
203 }
204 } else if (inPublishInfo) {
205 if (xml.name() == QLatin1String("publisher")) {
206 result->add(Property::Publisher, xml.readElementText());
207 } else if (xml.name() == QLatin1String("year")) {
208 bool ok;
209 const int releaseYear = xml.readElementText().toInt(&ok);
210 if (ok) {
211 result->add(Property::ReleaseYear, releaseYear);
212 }
213 }
214 }
215 } else if (inBody && result->inputFlags() & ExtractionResult::ExtractPlainText && xml.isCharacters() && !xml.isWhitespace()) {
216 result->append(xml.text().toString());
217 }
218 }
219}
220
221#include "moc_fb2extractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
QString inputUrl() const
The input URL which the plugins will use to locate the file.
virtual void addType(Type::Type type)=0
This function is called by the plugins.
QString inputMimetype() const
The input MIME type.
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
The ExtractorPlugin is the base class for all file metadata extractors.
@ Title
Refers to the Title of the content of the file.
Definition properties.h:121
@ Author
The Author field indicated the primary creator of a document.
Definition properties.h:114
@ Genre
The Genre of an Audio file.
Definition properties.h:52
@ Description
Represents the description stored in the file.
Definition properties.h:351
@ Generator
Refers to the Application used to create this file.
Definition properties.h:134
@ CreationDate
The date the content of the file was created.
Definition properties.h:177
@ Publisher
The publisher of the content.
Definition properties.h:169
@ Language
The language the document is written in.
Definition properties.h:159
@ Copyright
The copyright of the file.
Definition properties.h:164
@ ReleaseYear
Indicates the year a track was released.
Definition properties.h:71
@ Document
Any file which counts as a document.
Definition types.h:63
The KFileMetaData namespace.
QDateTime fromString(QStringView string, QStringView format, QCalendar cal)
bool isValid() const const
bool isEmpty() const const
qsizetype removeAll(const AT &t)
void clear()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString join(QChar separator) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Feb 21 2025 11:53:46 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.