KItinerary

mimedocumentprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "mimedocumentprocessor.h"
8
9#include <KItinerary/ExtractorDocumentNodeFactory>
10#include <KItinerary/ExtractorEngine>
11#include <KItinerary/ExtractorFilter>
12
13#include <KMime/Message>
14
15#include <QDebug>
16
17using namespace Qt::Literals::StringLiterals;
18using namespace KItinerary;
19
20Q_DECLARE_METATYPE(KItinerary::Internal::OwnedPtr<KMime::Content>)
21
22namespace {
23bool contentMightBeEmail(const QByteArray &data)
24{
25 // raw email
26 for (const auto c : data) {
27 if (std::isalpha(c) || c == '-') {
28 continue;
29 }
30 if (c == ':') {
31 return true;
32 }
33 break;
34 }
35
36 // mbox format
37 return data.startsWith("From ");
38}
39
40template <typename T>
41const T* findHeader(const KMime::Content *content)
42{
43 const auto header = content->header<T>();
44 if (header || !content->parent()) {
45 return header;
46 }
47 return findHeader<T>(content->parent());
48}
49
50const KMime::Headers::Base* findHeader(const KMime::Content *content, const char *headerType)
51{
52 const auto header = content->headerByType(headerType);
53 if (header || !content->parent()) {
54 return header;
55 }
56 return findHeader(content->parent(), headerType);
57}
58}
59
60bool MimeDocumentProcessor::canHandleData(const QByteArray &encodedData, QStringView fileName) const
61{
62 return contentMightBeEmail(encodedData) ||
65}
66
68{
69 auto msg = new KMime::Message;
70 msg->setContent(KMime::CRLFtoLF(encodedData));
71 if (msg->head().isEmpty() || msg->body().isEmpty()) {
72 delete msg;
73 return {};
74 }
75 msg->parse();
76
78 node.setContent<Internal::OwnedPtr<KMime::Content>>(msg);
79
80 const auto dateHdr = findHeader<KMime::Headers::Date>(msg);
81 if (dateHdr) {
82 node.setContextDateTime(dateHdr->dateTime());
83 }
84
85 return node;
86}
87
89{
90 auto *content = decodedData.value<KMime::Content*>();
91 if (!content) {
92 content = decodedData.value<KMime::Message*>();
93 }
94 if (!content) {
95 return {};
96 }
97
99 node.setContent(content);
100
101 const auto dateHdr = findHeader<KMime::Headers::Date>(content);
102 if (dateHdr) {
103 node.setContextDateTime(dateHdr->dateTime());
104 }
105
106 return node;
107}
108
109static ExtractorDocumentNode expandContentNode(ExtractorDocumentNode &node, const KMime::Content *content, const ExtractorEngine *engine)
110{
111 QString fileName;
112 const auto contentType = content->contentType();
113 if (contentType) {
114 fileName = contentType->name();
115 }
116 const auto contentDisposition = content->contentDisposition();
117 if (fileName.isEmpty() && contentDisposition) {
118 fileName = contentDisposition->filename();
119 }
120
122 if ((contentType && contentType->isPlainText() && fileName.isEmpty()) || (!contentType && content->isTopLevel())) {
123 child = engine->documentNodeFactory()->createNode(content->decodedText(), u"text/plain");
124 } else if (contentType && contentType->isHTMLText()) {
125 child = engine->documentNodeFactory()->createNode(content->decodedText(), u"text/html");
126 } else if (content->bodyIsMessage()) {
127 child = engine->documentNodeFactory()->createNode(QVariant::fromValue(content->bodyAsMessage().get()), u"message/rfc822");
128 } else {
129 child = engine->documentNodeFactory()->createNode(content->decodedContent(), fileName);
130 }
131 node.appendChild(child);
132 return child;
133}
134
135static void expandContentNodeRecursive(ExtractorDocumentNode &node, const KMime::Content *content, const ExtractorEngine *engine)
136{
137 const auto ct = content->contentType();
138 const auto children = content->contents();
139 if (!ct || children.empty()) {
140 expandContentNode(node, content, engine);
141 return;
142 }
143
144 // special handling of multipart/related to add images to the corresponding HTML document
145 if (ct && ct->isMultipart() && ct->isSubtype("related") && ct->parameter("type"_L1) == "text/html"_L1 && children.size() >= 2) {
146 const auto child = children.front();
147 if (child->contentType(false) && child->contentType(false)->isHTMLText()) {
148 auto htmlNode = expandContentNode(node, child, engine);
149 for (auto it = std::next(children.begin()); it != children.end(); ++it) {
150 auto imgNode = expandContentNode(htmlNode, (*it), engine);
151 const auto cid = (*it)->contentID(false);
152 if (cid) {
153 imgNode.setLocation(cid->identifier());
154 }
155 }
156 return;
157 }
158 }
159
160 for (const auto child : children) {
161 if (child->bodyIsMessage()) {
162 expandContentNode(node, child, engine); // do not recurse into nested emails, we want those as dedicated nodes
163 } else {
164 expandContentNodeRecursive(node, child, engine);
165 }
166 }
167}
168
170{
171 const auto content = node.content<KMime::Content*>();
172 expandContentNodeRecursive(node, content, engine);
173}
174
176{
177 const auto content = node.content<KMime::Content*>();
178 const auto header = findHeader(content, filter.fieldName().toUtf8().constData());
179 return header ? filter.matches(header->asUnicodeString()) : false;
180}
181
183{
184 destroyIfOwned<KMime::Content>(node);
185}
ExtractorDocumentNode createNode(const QByteArray &data, QStringView fileName={}, QStringView mimeType={}) const
Create a new document node from data.
A node in the extracted document object tree.
void appendChild(ExtractorDocumentNode &child)
Add another child node.
void setContextDateTime(const QDateTime &contextDateTime)
Set the context date/time.
QJSValue content
The decoded content of this node.
void setContent(const QVariant &content)
Set decoded content.
Semantic data extraction engine.
const ExtractorDocumentNodeFactory * documentNodeFactory() const
Factory for creating new document nodes.
Determines whether an extractor is applicable to a given email.
void destroyNode(ExtractorDocumentNode &node) const override
Destroys type-specific data in node.
ExtractorDocumentNode createNodeFromContent(const QVariant &decodedData) const override
Create a document node from an already decoded data type.
bool matches(const ExtractorFilter &filter, const ExtractorDocumentNode &node) const override
Checks whether the given filter matches node.
void expandNode(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Create child nodes for node, as far as that's necessary for this document type.
ExtractorDocumentNode createNodeFromData(const QByteArray &encodedData) const override
Create a document node from raw data.
bool canHandleData(const QByteArray &encodedData, QStringView fileName) const override
Fast check whether the given encoded data can possibly be processed by this instance.
const Headers::ContentType * contentType() const
T * header() const
Headers::Base * headerByType(QByteArrayView type) const
Content * parent()
QSharedPointer< Message > bodyAsMessage() const
QByteArray decodedContent() const
bool isTopLevel() const
bool bodyIsMessage() const
QString decodedText(bool trimText=false, bool removeTrailingNewlines=false) const
void setContent(const QByteArray &s)
const Headers::ContentDisposition * contentDisposition() const
QList< Content * > contents() const
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
T * get() const const
bool isEmpty() const const
bool endsWith(QChar ch) const const
CaseInsensitive
QVariant fromValue(T &&value)
T value() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Sep 27 2024 11:47:33 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.