KFileMetaData

popplerextractor.cpp
1 /*
2  SPDX-FileCopyrightText: 2012 Vishesh Handa <[email protected]>
3  SPDX-FileCopyrightText: 2012 Jörg Ehrichs <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.1-or-later
6 */
7 
8 
9 #include "popplerextractor.h"
10 
11 #include <QScopedPointer>
12 #include <QDebug>
13 #include <QDateTime>
14 
15 using namespace KFileMetaData;
16 
17 PopplerExtractor::PopplerExtractor(QObject* parent)
18  : ExtractorPlugin(parent)
19 {
20 
21 }
22 
23 const QStringList supportedMimeTypes = {
24  QStringLiteral("application/pdf"),
25 };
26 
27 QStringList PopplerExtractor::mimetypes() const
28 {
29  return supportedMimeTypes;
30 }
31 
32 void PopplerExtractor::extract(ExtractionResult* result)
33 {
34  const QString fileUrl = result->inputUrl();
35  std::unique_ptr<Poppler::Document> pdfDoc(Poppler::Document::load(fileUrl, QByteArray(), QByteArray()));
36 
37  if (!pdfDoc || pdfDoc->isLocked()) {
38  return;
39  }
40 
41  result->addType(Type::Document);
42 
43  if (result->inputFlags() & ExtractionResult::ExtractMetaData) {
44  const QString title = pdfDoc->title();
45  if (!title.isEmpty()) {
46  result->add(Property::Title, title);
47  }
48 
49  const QString subject = pdfDoc->subject();
50  if (!subject.isEmpty()) {
51  result->add(Property::Subject, subject);
52  }
53 
54  const QString author = pdfDoc->author();
55  if (!author.isEmpty()) {
56  result->add(Property::Author, author);
57  }
58 
59  const QString generator = pdfDoc->producer();
60  if (!generator.isEmpty()) {
61  result->add(Property::Generator, generator);
62  }
63 
64  const QDateTime creationDate = pdfDoc->creationDate();
65  if (!creationDate.isNull()) {
66  result->add(Property::CreationDate, creationDate);
67  }
68 
69  const int numPages = pdfDoc->numPages();
70  if (numPages > 0) {
71  result->add(Property::PageCount, numPages);
72  }
73  }
74 
75  if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
76  return;
77  }
78 
79  for (int i = 0; i < pdfDoc->numPages(); i++) {
80  std::unique_ptr<Poppler::Page> page(pdfDoc->page(i));
81  if (!page) { // broken pdf files do not return a valid page
82  qWarning() << "Could not read page content from" << fileUrl;
83  break;
84  }
85  result->append(page->text(QRectF()));
86  }
87 }
virtual void addType(Type::Type type)=0
This function is called by the plugins.
The ExtractionResult class is where all the data extracted by the indexer is saved....
bool isNull() const const
QString inputUrl() const
The input url which the plugins will use to locate the file.
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
bool isEmpty() const const
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
The ExtractorPlugin is the base class for all file metadata extractors. It is responsible for extract...
This file is part of the KDE documentation.
Documentation copyright © 1996-2022 The KDE developers.
Generated on Fri May 27 2022 03:47:54 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.