KItinerary

pdfextractoroutputdevice.cpp
1/*
2 SPDX-FileCopyrightText: 2019 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "pdfextractoroutputdevice_p.h"
8#include "pdfbarcodeutil_p.h"
9#include "pdfimage.h"
10#include "pdfimage_p.h"
11#include "popplerutils_p.h"
12
13#include <Annot.h>
14#include <Link.h>
15#include <Page.h>
16
17#include <QDebug>
18
19using namespace KItinerary;
20
21PdfExtractorOutputDevice::PdfExtractorOutputDevice()
22 : TextOutputDev(nullptr, false, 0, false, false)
23{
24}
25
26void PdfExtractorOutputDevice::addRasterImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, PdfImageType type)
27{
28 if ((!colorMap && type == PdfImageType::Image) || (colorMap && !colorMap->isOk()) || (ref && !ref->isRef()) || (!ref && !str)) {
29 return;
30 }
31
32 QImage::Format format;
33 if (!colorMap && type != PdfImageType::Image) {
34 format = QImage::Format_Mono;
35 } else if (colorMap->getColorSpace()->getMode() == csIndexed) {
36 format = QImage::Format_RGB888;
37 } else if (colorMap->getNumPixelComps() == 1 && (colorMap->getBits() >= 1 && colorMap->getBits() <= 8)) {
39 } else if (colorMap->getNumPixelComps() == 3 && colorMap->getBits() == 8) {
40 format = QImage::Format_RGB888;
41 } else {
42 return;
43 }
44
45 PdfImage pdfImg;
46 if (ref) {
47 pdfImg.d->m_ref = PdfImageRef(ref->getRef().num, ref->getRef().gen, type);
48 }
49
50#if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 69, 0)
51 if (colorMap) {
52 pdfImg.d->m_colorMap.reset(colorMap->copy());
53 }
54#endif
55 pdfImg.d->m_sourceHeight = height;
56 pdfImg.d->m_sourceWidth = width;
57 pdfImg.d->m_width = width;
58 pdfImg.d->m_height = height;
59 // deal with aspect-ratio changing scaling
60 const auto sourceAspectRatio = (double)width / (double)height;
61 const auto targetAspectRatio = std::abs(state->getCTM()[0] / -state->getCTM()[3]);
62 if (!qFuzzyCompare(sourceAspectRatio, targetAspectRatio) && qFuzzyIsNull(state->getCTM()[1]) && qFuzzyIsNull(state->getCTM()[2])) {
63 if (targetAspectRatio > sourceAspectRatio) {
64 pdfImg.d->m_width = width * targetAspectRatio / sourceAspectRatio;
65 } else {
66 pdfImg.d->m_height = height * sourceAspectRatio / targetAspectRatio;
67 }
68 }
69 pdfImg.d->m_transform = PopplerUtils::currentTransform(state);
70 pdfImg.d->m_format = format;
71
72 if (!ref) {
73 pdfImg.d->load(str, colorMap);
74 }
75
76 m_images.push_back(pdfImg);
77}
78
79void PdfExtractorOutputDevice::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg)
80{
81 Q_UNUSED(invert);
82 Q_UNUSED(interpolate);
83
84 if (!str && !inlineImg) {
85 return;
86 }
87 addRasterImage(state, ref, str, width, height, nullptr, PdfImageType::Mask);
88}
89
90void PdfExtractorOutputDevice::drawImage(GfxState* state, Object* ref, Stream* str, int width, int height, GfxImageColorMap* colorMap, bool interpolate, PopplerMaskColors* maskColors, bool inlineImg)
91{
92 Q_UNUSED(interpolate)
93 Q_UNUSED(maskColors)
94
95 if (!str && !inlineImg) {
96 return;
97 }
98 addRasterImage(state, ref, str, width, height, colorMap, PdfImageType::Image);
99}
100
101void PdfExtractorOutputDevice::drawMaskedImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, Stream *maskStr, int maskWidth, int maskHeight, bool maskInvert, bool maskInterpolate)
102{
103 Q_UNUSED(interpolate)
104 Q_UNUSED(maskInvert)
105 Q_UNUSED(maskInterpolate)
106
107 addRasterImage(state, ref, str, width, height, colorMap, PdfImageType::Image);
108
109 if (ref) {
110 const auto dict = str->getDict();
111 const auto maskObj = dict->lookup("Mask");
112 if (maskObj.isStream()) {
113 addRasterImage(state, ref, maskStr, maskWidth, maskHeight, nullptr, PdfImageType::Mask);
114 }
115 }
116}
117
118void PdfExtractorOutputDevice::saveState(GfxState *state)
119{
120 Q_UNUSED(state)
121 m_vectorOps.push_back(VectorOp{VectorOp::PushState, {}, {}});
122}
123
124void PdfExtractorOutputDevice::restoreState(GfxState *state)
125{
126 Q_UNUSED(state)
127 if (m_vectorOps.empty()) {
128 return;
129 }
130 const auto &lastOp = *(m_vectorOps.end() -1);
131 if (lastOp.type == VectorOp::PushState) {
132 m_vectorOps.resize(m_vectorOps.size() - 1);
133 } else {
134 m_vectorOps.push_back(VectorOp{VectorOp::PopState, {}, {}});
135 }
136}
137
138static bool isRelevantStroke(const QPen &pen)
139{
140 return !qFuzzyCompare(pen.widthF(), 0.0) && pen.color() == Qt::black;
141}
142
143static bool isRectangularPath(const QPainterPath &path)
144{
145 qreal x = 0.0, y = 0.0;
146 for (int i = 0; i < path.elementCount(); ++i) {
147 const auto elem = path.elementAt(i);
148 switch (elem.type) {
150 x = elem.x;
151 y = elem.y;
152 break;
154 if (x != elem.x && y != elem.y) {
155 qDebug() << "path contains diagonal line, discarding";
156 return false;
157 }
158 x = elem.x;
159 y = elem.y;
160 break;
163 qDebug() << "path contains a curve, discarding";
164 return false;
165 }
166 }
167
168 return true;
169}
170
171void PdfExtractorOutputDevice::stroke(GfxState *state)
172{
173 const auto pen = PopplerUtils::currentPen(state);
174 if (!isRelevantStroke(pen)) {
175 return;
176 }
177
178 const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill);
179 if (!isRectangularPath(path)) {
180 return;
181 }
182 const auto t = PopplerUtils::currentTransform(state);
183 m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, pen, QBrush()}});
184}
185
186static bool isRelevantFill(const QBrush &brush)
187{
188 return brush.color() == Qt::black;
189}
190
191void PdfExtractorOutputDevice::fill(GfxState *state)
192{
193 const auto brush = PopplerUtils::currentBrush(state);
194 if (!isRelevantFill(brush)) {
195 return;
196 }
197
198 const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill);
199 const auto b = path.boundingRect();
200 if (b.width() == 0 || b.height() == 0) {
201 return;
202 }
203
204 const auto t = PopplerUtils::currentTransform(state);
205 m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}});
206}
207
208void PdfExtractorOutputDevice::eoFill(GfxState *state)
209{
210 const auto brush = PopplerUtils::currentBrush(state);
211 if (!isRelevantFill(brush)) {
212 return;
213 }
214
215 const auto path = PopplerUtils::convertPath(state->getPath(), Qt::OddEvenFill);
216 const auto b = path.boundingRect();
217 if (b.width() == 0 || b.height() == 0) {
218 return;
219 }
220
221 const auto t = PopplerUtils::currentTransform(state);
222 m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}});
223}
224
225void PdfExtractorOutputDevice::finalize()
226{
227 // remove single state groups, then try to merge adjacents paths
228 std::vector<VectorOp> mergedOps;
229 mergedOps.reserve(m_vectorOps.size());
230 for (auto it = m_vectorOps.begin(); it != m_vectorOps.end(); ++it) {
231 if ((*it).type == VectorOp::PushState && std::distance(it, m_vectorOps.end()) >= 2 && (*(it + 1)).type == VectorOp::Path && (*(it + 2)).type == VectorOp::PopState) {
232 ++it;
233 mergedOps.push_back(*it);
234 ++it;
235 } else {
236 mergedOps.push_back(*it);
237 }
238 }
239 //qDebug() << m_vectorOps.size() << mergedOps.size();
240
241 std::vector<PdfVectorPicture::PathStroke> strokes;
242 QTransform t;
243 for (const auto &op : mergedOps) {
244 if (op.type == VectorOp::Path) {
245 if (t.isIdentity()) {
246 t = op.transform;
247 }
248 if (t != op.transform) {
249 //qDebug() << "diffent transforms for strokes, not supported yet";
250 continue;
251 }
252 strokes.push_back(op.stroke);
253 } else if (!strokes.empty()) {
254 PdfVectorPicture pic;
255 pic.setStrokes(std::move(strokes));
256 pic.setTransform(t);
257 addVectorImage(pic);
258 t = QTransform();
259 }
260 }
261 if (!strokes.empty()) {
262 PdfVectorPicture pic;
263 pic.setStrokes(std::move(strokes));
264 pic.setTransform(t);
265 addVectorImage(pic);
266 }
267}
268
269void PdfExtractorOutputDevice::addVectorImage(const PdfVectorPicture &pic)
270{
271 if (PdfBarcodeUtil::isPlausiblePath(pic.pathElementsCount(), BarcodeDecoder::Any) == BarcodeDecoder::None) {
272 return;
273 }
274
275 PdfImage img;
276 img.d->m_height = pic.height();
277 img.d->m_width = pic.width();
278 img.d->m_sourceHeight = pic.sourceHeight();
279 img.d->m_sourceWidth = pic.sourceWidth();
280 img.d->m_transform = pic.transform();
281 img.d->m_vectorPicture = pic;
282 m_images.push_back(img);
283}
284
285void PdfExtractorOutputDevice::processLink(AnnotLink *link)
286{
287 TextOutputDev::processLink(link);
288 if (!link->isOk() || !link->getAction() || link->getAction()->getKind() != actionURI) {
289 return;
290 }
291
292 const auto uriLink = static_cast<LinkURI*>(link->getAction());
293 double xd1, yd1, xd2, yd2;
294 link->getRect(&xd1, &yd1, &xd2, &yd2);
295
296 double xu1, yu1, xu2, yu2;
297 cvtDevToUser(xd1, yd1, &xu1, &yu1);
298 cvtDevToUser(xd2, yd2, &xu2, &yu2);
299 PdfLink l(QString::fromStdString(uriLink->getURI()), QRectF(QPointF(std::min(xu1, xu2), std::min(yu1, yu2)), QPointF(std::max(xu1, xu2), std::max(yu1, yu2))));
300 m_links.push_back(std::move(l));
301}
PDF object reference for an image, with the ability to address attached masks as well.
Definition pdfimage.h:43
An image in a PDF document.
Definition pdfimage.h:73
KIOCORE_EXPORT CopyJob * link(const QList< QUrl > &src, const QUrl &destDir, JobFlags flags=DefaultFlags)
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
PdfImageType
PDF image element type.
Definition pdfimage.h:35
QString path(const QString &relativePath)
const QColor & color() const const
QColor color() const const
qreal widthF() const const
QString fromStdString(const std::string &str)
WindingFill
bool isIdentity() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:14:49 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.