KFileMetaData

popplerextractor.cpp
1/*
2 SPDX-FileCopyrightText: 2012 Vishesh Handa <me@vhanda.in>
3 SPDX-FileCopyrightText: 2012 Jörg Ehrichs <joerg.ehrichs@gmx.de>
4
5 SPDX-License-Identifier: LGPL-2.1-or-later
6*/
7
8
9#include "popplerextractor.h"
10
11#include <QScopedPointer>
12#include <QDebug>
13#include <QDateTime>
14
15using namespace KFileMetaData;
16
17PopplerExtractor::PopplerExtractor(QObject* parent)
18 : ExtractorPlugin(parent)
19{
20
21}
22
23const QStringList supportedMimeTypes = {
24 QStringLiteral("application/pdf"),
25};
26
27QStringList PopplerExtractor::mimetypes() const
28{
29 return supportedMimeTypes;
30}
31
32void PopplerExtractor::extract(ExtractionResult* result)
33{
34 const QString fileUrl = result->inputUrl();
35 std::unique_ptr<Poppler::Document> pdfDoc(Poppler::Document::load(fileUrl, QByteArray(), QByteArray()));
36
37 if (!pdfDoc || pdfDoc->isLocked()) {
38 return;
39 }
40
41 result->addType(Type::Document);
42
43 if (result->inputFlags() & ExtractionResult::ExtractMetaData) {
44 const QString title = pdfDoc->title();
45 if (!title.isEmpty()) {
46 result->add(Property::Title, title);
47 }
48
49 const QString subject = pdfDoc->subject();
50 if (!subject.isEmpty()) {
51 result->add(Property::Subject, subject);
52 }
53
54 const QString author = pdfDoc->author();
55 if (!author.isEmpty()) {
56 result->add(Property::Author, author);
57 }
58
59 const QString generator = pdfDoc->producer();
60 if (!generator.isEmpty()) {
61 result->add(Property::Generator, generator);
62 }
63
64 const QDateTime creationDate = pdfDoc->creationDate();
65 if (!creationDate.isNull()) {
66 result->add(Property::CreationDate, creationDate);
67 }
68
69 const int numPages = pdfDoc->numPages();
70 if (numPages > 0) {
71 result->add(Property::PageCount, numPages);
72 }
73 }
74
75 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
76 return;
77 }
78
79 for (int i = 0; i < pdfDoc->numPages(); i++) {
80 std::unique_ptr<Poppler::Page> page(pdfDoc->page(i));
81 if (!page) { // broken pdf files do not return a valid page
82 qWarning() << "Could not read page content from" << fileUrl;
83 break;
84 }
85 result->append(page->text(QRectF()));
86 }
87}
88
89#include "moc_popplerextractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
QString inputUrl() const
The input URL which the plugins will use to locate the file.
virtual void addType(Type::Type type)=0
This function is called by the plugins.
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
The ExtractorPlugin is the base class for all file metadata extractors.
@ Subject
Refers to the subject of the file.
Definition properties.h:127
@ Title
Refers to the Title of the content of the file.
Definition properties.h:121
@ Author
The Author field indicated the primary creator of a document.
Definition properties.h:114
@ Generator
Refers to the Application used to create this file.
Definition properties.h:134
@ PageCount
The number of pages in a document.
Definition properties.h:139
@ CreationDate
The date the content of the file was created.
Definition properties.h:177
@ Document
Any file which counts as a document.
Definition types.h:63
The KFileMetaData namespace.
bool isNull() const const
bool isEmpty() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:48:11 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.