Messagelib

converthtmltoplaintext.cpp
1/*
2 SPDX-FileCopyrightText: 2015-2025 Laurent Montel <montel@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5
6*/
7
8#include "converthtmltoplaintext.h"
9
10#include <KPIMTextEdit/MarkupDirector>
11#include <KPIMTextEdit/PlainTextMarkupBuilder>
12#include <QTextDocument>
13
14using namespace MimeTreeParser;
15ConvertHtmlToPlainText::ConvertHtmlToPlainText() = default;
16
17ConvertHtmlToPlainText::~ConvertHtmlToPlainText() = default;
18
19void ConvertHtmlToPlainText::setHtmlString(const QString &htmlString)
20{
21 mHtmlString = htmlString;
22}
23
24QString ConvertHtmlToPlainText::generatePlainText()
25{
26 if (mHtmlString.isEmpty()) {
27 return {};
28 }
29 auto pb = new KPIMTextEdit::PlainTextMarkupBuilder();
30
31 auto pmd = new KPIMTextEdit::MarkupDirector(pb);
32 auto doc = new QTextDocument;
33 doc->setHtml(mHtmlString);
34
35 pmd->processDocument(doc);
36 QString plainText = pb->getResult();
37
38 delete doc;
39 delete pmd;
40 delete pb;
41 toCleanPlainText(plainText);
42 return plainText;
43}
44
45QString ConvertHtmlToPlainText::htmlString() const
46{
47 return mHtmlString;
48}
49
50// Duplicate from kpimtextedit/textedit.h
51void ConvertHtmlToPlainText::toCleanPlainText(QString &text)
52{
53 // Remove line separators. Normal \n chars are still there, so no linebreaks get lost here
54 text.remove(QChar::LineSeparator);
55
56 // Get rid of embedded images, see QTextImageFormat documentation:
57 // "Inline images are represented by an object replacement character (0xFFFC in Unicode) "
58 text.remove(QChar(0xFFFC));
59
60 // In plaintext mode, each space is non-breaking.
61 text.replace(QChar::Nbsp, QLatin1Char(' '));
62}
bool isEmpty() const const
void setHtml(const QString &html)
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:55:28 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.