KItinerary

mimedocumentprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "mimedocumentprocessor.h"
8
9#include <KItinerary/ExtractorDocumentNodeFactory>
10#include <KItinerary/ExtractorEngine>
11#include <KItinerary/ExtractorFilter>
12
13#include <KMime/Message>
14
15#include <QDebug>
16
17using namespace KItinerary;
18
19Q_DECLARE_METATYPE(KItinerary::Internal::OwnedPtr<KMime::Content>)
20
21namespace {
22bool contentMightBeEmail(const QByteArray &data)
23{
24 // raw email
25 for (const auto c : data) {
26 if (std::isalpha(c) || c == '-') {
27 continue;
28 }
29 if (c == ':') {
30 return true;
31 }
32 break;
33 }
34
35 // mbox format
36 return data.startsWith("From ");
37}
38
39template <typename T>
40const T* findHeader(KMime::Content *content)
41{
42 auto header = content->header<T>();
43 if (header || !content->parent()) {
44 return header;
45 }
46 return findHeader<T>(content->parent());
47}
48
49const KMime::Headers::Base* findHeader(KMime::Content *content, const char *headerType)
50{
51 const auto header = content->headerByType(headerType);
52 if (header || !content->parent()) {
53 return header;
54 }
55 return findHeader(content->parent(), headerType);
56}
57}
58
59bool MimeDocumentProcessor::canHandleData(const QByteArray &encodedData, QStringView fileName) const
60{
61 return contentMightBeEmail(encodedData) ||
64}
65
67{
68 auto msg = new KMime::Message;
69 msg->setContent(KMime::CRLFtoLF(encodedData));
70 if (msg->head().isEmpty() || msg->body().isEmpty()) {
71 delete msg;
72 return {};
73 }
74 msg->parse();
75
77 node.setContent<Internal::OwnedPtr<KMime::Content>>(msg);
78
79 const auto dateHdr = findHeader<KMime::Headers::Date>(msg);
80 if (dateHdr) {
81 node.setContextDateTime(dateHdr->dateTime());
82 }
83
84 return node;
85}
86
88{
89 auto *content = decodedData.value<KMime::Content*>();
90 if (!content) {
91 content = decodedData.value<KMime::Message*>();
92 }
93 if (!content) {
94 return {};
95 }
96
98 node.setContent(content);
99
100 const auto dateHdr = findHeader<KMime::Headers::Date>(content);
101 if (dateHdr) {
102 node.setContextDateTime(dateHdr->dateTime());
103 }
104
105 return node;
106}
107
108static ExtractorDocumentNode expandContentNode(ExtractorDocumentNode &node, KMime::Content *content, const ExtractorEngine *engine)
109{
110 QString fileName;
111 const auto contentType = content->contentType(false);
112 if (contentType) {
113 fileName = contentType->name();
114 }
115 const auto contentDisposition = content->contentDisposition(false);
116 if (fileName.isEmpty() && contentDisposition) {
117 fileName = contentDisposition->filename();
118 }
119
121 if ((contentType && contentType->isPlainText() && fileName.isEmpty()) || (!contentType && content->isTopLevel())) {
122 child = engine->documentNodeFactory()->createNode(content->decodedText(), u"text/plain");
123 } else if (contentType && contentType->isHTMLText()) {
124 child = engine->documentNodeFactory()->createNode(content->decodedText(), u"text/html");
125 } else if (content->bodyIsMessage()) {
126 child = engine->documentNodeFactory()->createNode(QVariant::fromValue(content->bodyAsMessage().get()), u"message/rfc822");
127 } else {
128 child = engine->documentNodeFactory()->createNode(content->decodedContent(), fileName);
129 }
130 node.appendChild(child);
131 return child;
132}
133
134static void expandContentNodeRecursive(ExtractorDocumentNode &node, KMime::Content *content, const ExtractorEngine *engine)
135{
136 const auto ct = content->contentType(false);
137 const auto children = content->contents();
138 if (!ct || children.empty()) {
139 expandContentNode(node, content, engine);
140 return;
141 }
142
143 // special handling of multipart/related to add images to the corresponding HTML document
144 if (ct && ct->isMultipart() && ct->isSubtype("related") &&
145 ct->parameter(QLatin1StringView("type")) ==
146 QLatin1String("text/html") &&
147 children.size() >= 2) {
148 const auto child = children.front();
149 if (child->contentType(false) &&
150 child->contentType(false)->isHTMLText()) {
151 auto htmlNode = expandContentNode(node, child, engine);
152 for (auto it = std::next(children.begin()); it != children.end();
153 ++it) {
154 auto imgNode = expandContentNode(htmlNode, (*it), engine);
155 const auto cid = (*it)->contentID(false);
156 if (cid) {
157 imgNode.setLocation(cid->identifier());
158 }
159 }
160 return;
161 }
162 }
163
164 for (const auto child : children) {
165 if (child->bodyIsMessage()) {
166 expandContentNode(node, child, engine); // do not recurse into nested emails, we want those as dedicated nodes
167 } else {
168 expandContentNodeRecursive(node, child, engine);
169 }
170 }
171}
172
174{
175 const auto content = node.content<KMime::Content*>();
176 expandContentNodeRecursive(node, content, engine);
177}
178
180{
181 const auto content = node.content<KMime::Content*>();
182 const auto header = findHeader(content, filter.fieldName().toUtf8().constData());
183 return header ? filter.matches(header->asUnicodeString()) : false;
184}
185
187{
188 destroyIfOwned<KMime::Content>(node);
189}
ExtractorDocumentNode createNode(const QByteArray &data, QStringView fileName={}, QStringView mimeType={}) const
Create a new document node from data.
A node in the extracted document object tree.
void appendChild(ExtractorDocumentNode &child)
Add another child node.
void setContextDateTime(const QDateTime &contextDateTime)
Set the context date/time.
QJSValue content
The decoded content of this node.
void setContent(const QVariant &content)
Set decoded content.
Semantic data extraction engine.
const ExtractorDocumentNodeFactory * documentNodeFactory() const
Factory for creating new document nodes.
Determines whether an extractor is applicable to a given email.
void destroyNode(ExtractorDocumentNode &node) const override
Destroys type-specific data in node.
ExtractorDocumentNode createNodeFromContent(const QVariant &decodedData) const override
Create a document node from an already decoded data type.
bool matches(const ExtractorFilter &filter, const ExtractorDocumentNode &node) const override
Checks whether the given filter matches node.
void expandNode(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Create child nodes for node, as far as that's necessary for this document type.
ExtractorDocumentNode createNodeFromData(const QByteArray &encodedData) const override
Create a document node from raw data.
bool canHandleData(const QByteArray &encodedData, QStringView fileName) const override
Fast check whether the given encoded data can possibly be processed by this instance.
Headers::ContentType * contentType(bool create=true)
QString decodedText(bool trimText=false, bool removeTrailingNewlines=false)
Content * parent() const
QSharedPointer< Message > bodyAsMessage() const
Headers::ContentDisposition * contentDisposition(bool create=true)
QByteArray decodedContent()
bool isTopLevel() const
bool bodyIsMessage() const
T * header(bool create=false)
void setContent(const QByteArray &s)
Headers::Base * headerByType(const char *type) const
QList< Content * > contents() const
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
bool startsWith(QByteArrayView bv) const const
T * get() const const
bool isEmpty() const const
bool endsWith(QChar ch) const const
CaseInsensitive
QVariant fromValue(T &&value)
T value() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:14:49 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.