KItinerary

pdfextractoroutputdevice.cpp
1/*
2 SPDX-FileCopyrightText: 2019 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "pdfextractoroutputdevice_p.h"
8#include "pdfbarcodeutil_p.h"
9#include "pdfimage.h"
10#include "pdfimage_p.h"
11#include "popplerutils_p.h"
12
13#include <Annot.h>
14#include <Link.h>
15#include <Page.h>
16
17#include <QDebug>
18
19using namespace KItinerary;
20
21PdfExtractorOutputDevice::PdfExtractorOutputDevice()
22 : TextOutputDev(nullptr, false, 0, false, false)
23{
24}
25
26void PdfExtractorOutputDevice::addRasterImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, PdfImageType type)
27{
28 if ((!colorMap && type == PdfImageType::Image) || (colorMap && !colorMap->isOk()) || (ref && !ref->isRef()) || (!ref && !str)) {
29 return;
30 }
31
32 QImage::Format format;
33 if (!colorMap && type == PdfImageType::Mask) {
34 format = QImage::Format_Mono;
35 } else if (colorMap->getColorSpace()->getMode() == csIndexed) {
36 format = QImage::Format_RGB888;
37 } else if (colorMap->getNumPixelComps() == 1 && (colorMap->getBits() >= 1 && colorMap->getBits() <= 8)) {
39 } else if (colorMap->getNumPixelComps() == 3 && colorMap->getBits() == 8) {
40 format = QImage::Format_RGB888;
41 } else {
42 return;
43 }
44
45 PdfImage pdfImg;
46 if (ref) {
47 pdfImg.d->m_ref = PdfImageRef(ref->getRef().num, ref->getRef().gen, type);
48 }
49
50 if (colorMap) {
51 pdfImg.d->m_colorMap.reset(colorMap->copy());
52 }
53 pdfImg.d->m_sourceHeight = height;
54 pdfImg.d->m_sourceWidth = width;
55 pdfImg.d->m_width = width;
56 pdfImg.d->m_height = height;
57 // deal with aspect-ratio changing scaling
58 const auto sourceAspectRatio = (double)width / (double)height;
59 const auto targetAspectRatio = std::abs(state->getCTM()[0] / -state->getCTM()[3]);
60 if (!qFuzzyCompare(sourceAspectRatio, targetAspectRatio) && qFuzzyIsNull(state->getCTM()[1]) && qFuzzyIsNull(state->getCTM()[2])) {
61 if (targetAspectRatio > sourceAspectRatio) {
62 pdfImg.d->m_width = width * targetAspectRatio / sourceAspectRatio;
63 } else {
64 pdfImg.d->m_height = height * sourceAspectRatio / targetAspectRatio;
65 }
66 }
67 pdfImg.d->m_transform = PopplerUtils::currentTransform(state);
68 pdfImg.d->m_format = format;
69
70 if (!ref) {
71 pdfImg.d->load(str, colorMap);
72 }
73
74 m_images.push_back(pdfImg);
75}
76
77void PdfExtractorOutputDevice::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg)
78{
79 Q_UNUSED(invert);
80 Q_UNUSED(interpolate);
81
82 if (!str && !inlineImg) {
83 return;
84 }
85 addRasterImage(state, ref, str, width, height, nullptr, PdfImageType::Mask);
86}
87
88void PdfExtractorOutputDevice::drawImage(GfxState* state, Object* ref, Stream* str, int width, int height, GfxImageColorMap* colorMap, bool interpolate, PopplerMaskColors* maskColors, bool inlineImg)
89{
90 Q_UNUSED(interpolate)
91 Q_UNUSED(maskColors)
92
93 if (!str && !inlineImg) {
94 return;
95 }
96 addRasterImage(state, ref, str, width, height, colorMap, PdfImageType::Image);
97}
98
99void PdfExtractorOutputDevice::drawMaskedImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, Stream *maskStr, int maskWidth, int maskHeight, bool maskInvert, bool maskInterpolate)
100{
101 Q_UNUSED(interpolate)
102 Q_UNUSED(maskInvert)
103 Q_UNUSED(maskInterpolate)
104
105 addRasterImage(state, ref, str, width, height, colorMap, PdfImageType::Image);
106
107 if (ref) {
108 const auto dict = str->getDict();
109 const auto maskObj = dict->lookup("Mask");
110 if (maskObj.isStream()) {
111 addRasterImage(state, ref, maskStr, maskWidth, maskHeight, nullptr, PdfImageType::Mask);
112 }
113 }
114}
115
116void PdfExtractorOutputDevice::drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, Stream *maskStr, int maskWidth, int maskHeight, GfxImageColorMap *maskColorMap, bool maskInterpolate)
117{
118 Q_UNUSED(interpolate);
119 Q_UNUSED(maskInterpolate);
120
121 addRasterImage(state, ref, str, width, height, colorMap, PdfImageType::Image);
122 if (ref) {
123 const auto dict = str->getDict();
124 const auto maskObj = dict->lookup("SMask");
125 if (maskObj.isStream()) {
126 addRasterImage(state, ref, maskStr, maskWidth, maskHeight, maskColorMap, PdfImageType::SMask);
127 }
128 }
129}
130
131void PdfExtractorOutputDevice::saveState(GfxState *state)
132{
133 Q_UNUSED(state)
134 m_vectorOps.push_back(VectorOp{VectorOp::PushState, {}, {}});
135}
136
137void PdfExtractorOutputDevice::restoreState(GfxState *state)
138{
139 Q_UNUSED(state)
140 if (m_vectorOps.empty()) {
141 return;
142 }
143 const auto &lastOp = *(m_vectorOps.end() -1);
144 if (lastOp.type == VectorOp::PushState) {
145 m_vectorOps.resize(m_vectorOps.size() - 1);
146 } else {
147 m_vectorOps.push_back(VectorOp{VectorOp::PopState, {}, {}});
148 }
149}
150
151static bool isRelevantStroke(const QPen &pen)
152{
153 return !qFuzzyCompare(pen.widthF(), 0.0) && pen.color() == Qt::black;
154}
155
156static bool isRectangularPath(const QPainterPath &path)
157{
158 qreal x = 0.0, y = 0.0;
159 for (int i = 0; i < path.elementCount(); ++i) {
160 const auto elem = path.elementAt(i);
161 switch (elem.type) {
163 x = elem.x;
164 y = elem.y;
165 break;
167 if (x != elem.x && y != elem.y) {
168 qDebug() << "path contains diagonal line, discarding";
169 return false;
170 }
171 x = elem.x;
172 y = elem.y;
173 break;
176 qDebug() << "path contains a curve, discarding";
177 return false;
178 }
179 }
180
181 return true;
182}
183
184void PdfExtractorOutputDevice::stroke(GfxState *state)
185{
186 const auto pen = PopplerUtils::currentPen(state);
187 if (!isRelevantStroke(pen)) {
188 return;
189 }
190
191 const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill);
192 if (!isRectangularPath(path)) {
193 return;
194 }
195 const auto t = PopplerUtils::currentTransform(state);
196 m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, pen, QBrush()}});
197}
198
199static bool isRelevantFill(const QBrush &brush)
200{
201 return brush.color() == Qt::black;
202}
203
204void PdfExtractorOutputDevice::fill(GfxState *state)
205{
206 const auto brush = PopplerUtils::currentBrush(state);
207 if (!isRelevantFill(brush)) {
208 return;
209 }
210
211 const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill);
212 const auto b = path.boundingRect();
213 if (b.width() == 0 || b.height() == 0) {
214 return;
215 }
216
217 const auto t = PopplerUtils::currentTransform(state);
218 m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}});
219}
220
221void PdfExtractorOutputDevice::eoFill(GfxState *state)
222{
223 const auto brush = PopplerUtils::currentBrush(state);
224 if (!isRelevantFill(brush)) {
225 return;
226 }
227
228 const auto path = PopplerUtils::convertPath(state->getPath(), Qt::OddEvenFill);
229 const auto b = path.boundingRect();
230 if (b.width() == 0 || b.height() == 0) {
231 return;
232 }
233
234 const auto t = PopplerUtils::currentTransform(state);
235 m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}});
236}
237
238void PdfExtractorOutputDevice::finalize()
239{
240 // remove single state groups, then try to merge adjacents paths
241 std::vector<VectorOp> mergedOps;
242 mergedOps.reserve(m_vectorOps.size());
243 for (auto it = m_vectorOps.begin(); it != m_vectorOps.end(); ++it) {
244 if ((*it).type == VectorOp::PushState && std::distance(it, m_vectorOps.end()) >= 2 && (*(it + 1)).type == VectorOp::Path && (*(it + 2)).type == VectorOp::PopState) {
245 ++it;
246 mergedOps.push_back(*it);
247 ++it;
248 } else {
249 mergedOps.push_back(*it);
250 }
251 }
252 //qDebug() << m_vectorOps.size() << mergedOps.size();
253
254 std::vector<PdfVectorPicture::PathStroke> strokes;
255 QTransform t;
256 for (const auto &op : mergedOps) {
257 if (op.type == VectorOp::Path) {
258 if (t.isIdentity()) {
259 t = op.transform;
260 }
261 if (t != op.transform) {
262 //qDebug() << "diffent transforms for strokes, not supported yet";
263 continue;
264 }
265 strokes.push_back(op.stroke);
266 } else if (!strokes.empty()) {
267 PdfVectorPicture pic;
268 pic.setStrokes(std::move(strokes));
269 pic.setTransform(t);
270 addVectorImage(pic);
271 t = QTransform();
272 }
273 }
274 if (!strokes.empty()) {
275 PdfVectorPicture pic;
276 pic.setStrokes(std::move(strokes));
277 pic.setTransform(t);
278 addVectorImage(pic);
279 }
280}
281
282void PdfExtractorOutputDevice::addVectorImage(const PdfVectorPicture &pic)
283{
284 if (PdfBarcodeUtil::isPlausiblePath(pic.pathElementsCount(), BarcodeDecoder::Any) == BarcodeDecoder::None) {
285 return;
286 }
287
288 PdfImage img;
289 img.d->m_height = pic.height();
290 img.d->m_width = pic.width();
291 img.d->m_sourceHeight = pic.sourceHeight();
292 img.d->m_sourceWidth = pic.sourceWidth();
293 img.d->m_transform = pic.transform();
294 img.d->m_vectorPicture = pic;
295 m_images.push_back(img);
296}
297
298void PdfExtractorOutputDevice::processLink(AnnotLink *link)
299{
300 TextOutputDev::processLink(link);
301 if (!link->isOk() || !link->getAction() || link->getAction()->getKind() != actionURI) {
302 return;
303 }
304
305 const auto uriLink = static_cast<LinkURI*>(link->getAction());
306 double xd1, yd1, xd2, yd2;
307 link->getRect(&xd1, &yd1, &xd2, &yd2);
308
309 double xu1, yu1, xu2, yu2;
310 cvtDevToUser(xd1, yd1, &xu1, &yu1);
311 cvtDevToUser(xd2, yd2, &xu2, &yu2);
312 PdfLink l(QString::fromStdString(uriLink->getURI()), QRectF(QPointF(std::min(xu1, xu2), std::min(yu1, yu2)), QPointF(std::max(xu1, xu2), std::max(yu1, yu2))));
313 m_links.push_back(std::move(l));
314}
PDF object reference for an image, with the ability to address attached masks as well.
Definition pdfimage.h:43
An image in a PDF document.
Definition pdfimage.h:74
KIOCORE_EXPORT CopyJob * link(const QList< QUrl > &src, const QUrl &destDir, JobFlags flags=DefaultFlags)
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
PdfImageType
PDF image element type.
Definition pdfimage.h:35
QString path(const QString &relativePath)
const QColor & color() const const
QColor color() const const
qreal widthF() const const
QString fromStdString(const std::string &str)
WindingFill
bool isIdentity() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Oct 4 2024 12:00:26 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.