KItinerary

mimedocumentprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "mimedocumentprocessor.h"
8
9#include <KItinerary/ExtractorDocumentNodeFactory>
10#include <KItinerary/ExtractorEngine>
11#include <KItinerary/ExtractorFilter>
12
13#include <KMime/Message>
14
15#include <QDebug>
16
17using namespace Qt::Literals::StringLiterals;
18using namespace KItinerary;
19
20Q_DECLARE_METATYPE(KItinerary::Internal::OwnedPtr<const KMime::Content>)
21
22namespace {
23bool contentMightBeEmail(const QByteArray &data)
24{
25 // raw email
26 for (const auto c : data) {
27 if (std::isalpha(c) || c == '-') {
28 continue;
29 }
30 if (c == ':') {
31 return true;
32 }
33 break;
34 }
35
36 // mbox format
37 return data.startsWith("From ");
38}
39
40template <typename T>
41const T* findHeader(const KMime::Content *content)
42{
43 const auto header = content->header<T>();
44 if (header || !content->parent()) {
45 return header;
46 }
47 return findHeader<T>(content->parent());
48}
49
50const KMime::Headers::Base* findHeader(const KMime::Content *content, const char *headerType)
51{
52 const auto header = content->headerByType(headerType);
53 if (header || !content->parent()) {
54 return header;
55 }
56 return findHeader(content->parent(), headerType);
57}
58}
59
60bool MimeDocumentProcessor::canHandleData(const QByteArray &encodedData, QStringView fileName) const
61{
62 return contentMightBeEmail(encodedData) ||
65}
66
68{
69 auto msg = new KMime::Message;
70 msg->setContent(KMime::CRLFtoLF(encodedData));
71 if (msg->head().isEmpty() || msg->body().isEmpty()) {
72 delete msg;
73 return {};
74 }
75 msg->parse();
76
78 node.setContent<Internal::OwnedPtr<const KMime::Content>>(msg);
79
80 const auto dateHdr = findHeader<KMime::Headers::Date>(msg);
81 if (dateHdr) {
82 node.setContextDateTime(dateHdr->dateTime());
83 }
84
85 return node;
86}
87
89{
90 auto *content = decodedData.value<const KMime::Content*>();
91 if (!content) {
92 content = decodedData.value<const KMime::Message*>();
93 }
94 // TODO eventually remove the non-const cases
95 if (!content) {
96 content = decodedData.value<KMime::Content*>();
97 }
98 if (!content) {
99 content = decodedData.value<KMime::Message*>();
100 }
101
102 if (!content) {
103 return {};
104 }
105
107 node.setContent(content);
108
109 const auto dateHdr = findHeader<KMime::Headers::Date>(content);
110 if (dateHdr) {
111 node.setContextDateTime(dateHdr->dateTime());
112 }
113
114 return node;
115}
116
117static ExtractorDocumentNode expandContentNode(ExtractorDocumentNode &node, const KMime::Content *content, const ExtractorEngine *engine)
118{
119 QString fileName;
120 const auto contentType = content->contentType();
121 if (contentType) {
122 fileName = contentType->name();
123 }
124 const auto contentDisposition = content->contentDisposition();
125 if (fileName.isEmpty() && contentDisposition) {
126 fileName = contentDisposition->filename();
127 }
128
130 if ((contentType && contentType->isPlainText() && fileName.isEmpty()) || (!contentType && content->isTopLevel())) {
131 child = engine->documentNodeFactory()->createNode(content->decodedText(), u"text/plain");
132 } else if (contentType && contentType->isHTMLText()) {
133 child = engine->documentNodeFactory()->createNode(content->decodedText(), u"text/html");
134 } else if (content->bodyIsMessage()) {
135 child = engine->documentNodeFactory()->createNode(QVariant::fromValue(content->bodyAsMessage().get()), u"message/rfc822");
136 } else {
137 child = engine->documentNodeFactory()->createNode(content->decodedContent(), fileName);
138 }
139 node.appendChild(child);
140 return child;
141}
142
143static void expandContentNodeRecursive(ExtractorDocumentNode &node, const KMime::Content *content, const ExtractorEngine *engine)
144{
145 const auto ct = content->contentType();
146 const auto children = content->contents();
147 if (!ct || children.empty()) {
148 expandContentNode(node, content, engine);
149 return;
150 }
151
152 // special handling of multipart/related to add images to the corresponding HTML document
153 if (ct && ct->isMultipart() && ct->isSubtype("related") && ct->parameter("type") == "text/html"_L1 && children.size() >= 2) {
154 const KMime::Content *child = children.front();
155 if (child->contentType() && child->contentType()->isHTMLText()) {
156 auto htmlNode = expandContentNode(node, child, engine);
157 for (auto it = std::next(children.begin()); it != children.end(); ++it) {
158 const KMime::Content *imgChild = *it;
159 auto imgNode = expandContentNode(htmlNode, imgChild, engine);
160 const auto cid = imgChild->contentID();
161 if (cid) {
162 imgNode.setLocation(cid->identifier());
163 }
164 }
165 return;
166 }
167 }
168
169 for (const auto child : children) {
170 if (child->bodyIsMessage()) {
171 expandContentNode(node, child, engine); // do not recurse into nested emails, we want those as dedicated nodes
172 } else {
173 expandContentNodeRecursive(node, child, engine);
174 }
175 }
176}
177
179{
180 const auto content = node.content<const KMime::Content*>();
181 expandContentNodeRecursive(node, content, engine);
182}
183
185{
186 const auto content = node.content<const KMime::Content*>();
187 const auto header = findHeader(content, filter.fieldName().toUtf8().constData());
188 return header ? filter.matches(header->asUnicodeString()) : false;
189}
190
192{
193 destroyIfOwned<const KMime::Content>(node);
194}
ExtractorDocumentNode createNode(const QByteArray &data, QStringView fileName={}, QStringView mimeType={}) const
Create a new document node from data.
A node in the extracted document object tree.
void appendChild(ExtractorDocumentNode &child)
Add another child node.
void setContextDateTime(const QDateTime &contextDateTime)
Set the context date/time.
QJSValue content
The decoded content of this node.
void setContent(const QVariant &content)
Set decoded content.
Semantic data extraction engine.
const ExtractorDocumentNodeFactory * documentNodeFactory() const
Factory for creating new document nodes.
Determines whether an extractor is applicable to a given email.
void destroyNode(ExtractorDocumentNode &node) const override
Destroys type-specific data in node.
ExtractorDocumentNode createNodeFromContent(const QVariant &decodedData) const override
Create a document node from an already decoded data type.
bool matches(const ExtractorFilter &filter, const ExtractorDocumentNode &node) const override
Checks whether the given filter matches node.
void expandNode(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Create child nodes for node, as far as that's necessary for this document type.
ExtractorDocumentNode createNodeFromData(const QByteArray &encodedData) const override
Create a document node from raw data.
bool canHandleData(const QByteArray &encodedData, QStringView fileName) const override
Fast check whether the given encoded data can possibly be processed by this instance.
QString decodedText(bool trimText, bool removeTrailingNewlines=false) const
const Headers::ContentType * contentType() const
T * header() const
Headers::Base * headerByType(QByteArrayView type) const
Content * parent()
QByteArray decodedContent() const
QSharedPointer< Message > bodyAsMessage()
bool isTopLevel() const
bool bodyIsMessage() const
const Headers::ContentID * contentID() const
void setContent(const QByteArray &s)
const Headers::ContentDisposition * contentDisposition() const
QList< Content * > contents()
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
T * get() const const
bool isEmpty() const const
bool endsWith(QChar ch) const const
CaseInsensitive
QVariant fromValue(T &&value)
T value() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 4 2024 16:28:48 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.