KMime

kmime_codecs.cpp
1/*
2 kmime_codecs.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 SPDX-FileCopyrightText: 2001 the KMime authors.
6 See file AUTHORS for details
7
8 SPDX-License-Identifier: LGPL-2.0-or-later
9*/
10
11#include "kmime_codecs_p.h"
12#include "kmime_debug.h"
13
14#include <QStringDecoder>
15#include <QStringEncoder>
16
17namespace KMime {
18
19static const char reservedCharacters[] = "\"()<>@,.;:\\[]=";
20
22 bool addressHeader)
23{
24 QByteArray result;
25 int start = 0;
26 int end = 0;
27 bool nonAscii = false;
28 bool useQEncoding = false;
29
30 // fromLatin1() is safe here, codecForName() uses toLatin1() internally
31 QStringEncoder codec(charset.constData());
32
33 QByteArray usedCS;
34 if (!codec.isValid()) {
35 //no codec available => try local8Bit and hope the best ;-)
37 usedCS = codec.name();
38 } else {
39 if (charset.isEmpty()) {
40 usedCS = codec.name();
41 } else {
42 usedCS = charset;
43 }
44 }
45
46 QByteArray encoded8Bit = codec.encode(src);
47 if (codec.hasError()) {
48 usedCS = "utf-8";
49 codec = QStringEncoder(usedCS.constData());
50 encoded8Bit = codec.encode(src);
51 }
52
53 if (usedCS.contains("8859-")) { // use "B"-Encoding for non iso-8859-x charsets
54 useQEncoding = true;
55 }
56
57 int encoded8BitLength = encoded8Bit.length();
58 for (int i = 0; i < encoded8BitLength; i++) {
59 if (encoded8Bit[i] == ' ') { // encoding starts at word boundaries
60 start = i + 1;
61 }
62
63 // encode escape character, for japanese encodings...
64 if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') ||
65 (addressHeader && (strchr("\"()<>@,.;:\\[]=", encoded8Bit[i]) != nullptr))) {
66 end = start; // non us-ascii char found, now we determine where to stop encoding
67 nonAscii = true;
68 break;
69 }
70 }
71
72 if (nonAscii) {
73 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
74 // we encode complete words
75 end++;
76 }
77
78 for (int x = end; x < encoded8Bit.length(); x++) {
79 if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') ||
80 (addressHeader && (strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) {
81 end = x; // we found another non-ascii word
82
83 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
84 // we encode complete words
85 end++;
86 }
87 }
88 }
89
90 result = encoded8Bit.left(start) + "=?" + usedCS;
91
92 if (useQEncoding) {
93 result += "?Q?";
94
95 char hexcode; // "Q"-encoding implementation described in RFC 2047
96 for (int i = start; i < end; i++) {
97 char c = encoded8Bit[i];
98 if (c == ' ') { // make the result readable with not MIME-capable readers
99 result += '_';
100 } else {
101 if (((c >= 'a') && (c <= 'z')) || // paranoid mode, encode *all* special chars to avoid problems
102 ((c >= 'A') && (c <= 'Z')) || // with "From" & "To" headers
103 ((c >= '0') && (c <= '9'))) {
104 result += c;
105 } else {
106 result += '='; // "stolen" from KMail ;-)
107 hexcode = ((c & 0xF0) >> 4) + 48;
108 if (hexcode >= 58) {
109 hexcode += 7;
110 }
111 result += hexcode;
112 hexcode = (c & 0x0F) + 48;
113 if (hexcode >= 58) {
114 hexcode += 7;
115 }
116 result += hexcode;
117 }
118 }
119 }
120 } else {
121 result += "?B?" + encoded8Bit.mid(start, end - start).toBase64();
122 }
123
124 result += "?=";
125 result += encoded8Bit.right(encoded8Bit.length() - end);
126 } else {
127 result = encoded8Bit;
128 }
129
130 return result;
131}
132
133QByteArray encodeRFC2047Sentence(QStringView src, const QByteArray &charset)
134{
135 QByteArray result;
136 const QChar *ch = src.constData();
137 const int length = src.length();
138 int pos = 0;
139 int wordStart = 0;
140
141 //qCDebug(KMIME_LOG) << "Input:" << src;
142 // Loop over all characters of the string.
143 // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
144 while (pos < length) {
145 //qCDebug(KMIME_LOG) << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1();
146 const bool isAscii = ch->unicode() < 127;
147 const bool isReserved = (strchr(reservedCharacters, ch->toLatin1()) != nullptr);
148 if (isAscii && isReserved) {
149 const int wordSize = pos - wordStart;
150 if (wordSize > 0) {
151 const auto word = src.mid(wordStart, wordSize);
152 result += encodeRFC2047String(word, charset);
153 }
154
155 result += ch->toLatin1();
156 wordStart = pos + 1;
157 }
158 ch++;
159 pos++;
160 }
161
162 // Encode the last word
163 const int wordSize = pos - wordStart;
164 if (wordSize > 0) {
165 const auto word = src.mid(wordStart, pos - wordStart);
166 result += encodeRFC2047String(word, charset);
167 }
168
169 return result;
170}
171
172//-----------------------------------------------------------------------------
173QByteArray encodeRFC2231String(QStringView str, const QByteArray &charset)
174{
175 if (str.isEmpty()) {
176 return {};
177 }
178
179 QStringEncoder codec(charset.constData());
180 QByteArray latin;
181 if (charset == "us-ascii") {
182 latin = str.toLatin1();
183 } else if (codec.isValid()) {
184 latin = codec.encode(str);
185 } else {
186 latin = str.toLocal8Bit();
187 }
188
189 char *l;
190 for (l = latin.data(); *l; ++l) {
191 if (((*l & 0xE0) == 0) || (*l & 0x80)) {
192 // *l is control character or 8-bit char
193 break;
194 }
195 }
196 if (!*l) {
197 return latin;
198 }
199
200 QByteArray result = charset + "''";
201 for (l = latin.data(); *l; ++l) {
202 bool needsQuoting = (*l & 0x80) || (*l == '%');
203 if (!needsQuoting) {
204 const QByteArray especials = "()<>@,;:\"/[]?.= \033";
205 int len = especials.length();
206 for (int i = 0; i < len; i++) {
207 if (*l == especials[i]) {
208 needsQuoting = true;
209 break;
210 }
211 }
212 }
213 if (needsQuoting) {
214 result += '%';
215 unsigned char hexcode;
216 hexcode = ((*l & 0xF0) >> 4) + 48;
217 if (hexcode >= 58) {
218 hexcode += 7;
219 }
220 result += hexcode;
221 hexcode = (*l & 0x0F) + 48;
222 if (hexcode >= 58) {
223 hexcode += 7;
224 }
225 result += hexcode;
226 } else {
227 result += *l;
228 }
229 }
230 return result;
231}
232
233}
Q_SCRIPTABLE Q_NOREPLY void start()
KCODECS_EXPORT QByteArray encodeRFC2047String(QStringView src, const QByteArray &charset)
const QList< QKeySequence > & end()
const char * constData() const const
bool contains(QByteArrayView bv) const const
char * data()
bool isEmpty() const const
QByteArray left(qsizetype len) const const
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
QByteArray right(qsizetype len) const const
QByteArray toBase64(Base64Options options) const const
char toLatin1() const const
char16_t & unicode()
QStringView mid(qsizetype start, qsizetype length) const const
const_pointer constData() const const
bool isEmpty() const const
qsizetype length() const const
QByteArray toLatin1() const const
QByteArray toLocal8Bit() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:20:12 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.