KMime

kmime_codecs.cpp
1 /*
2  kmime_codecs.cpp
3 
4  KMime, the KDE Internet mail/usenet news message library.
5  SPDX-FileCopyrightText: 2001 the KMime authors.
6  See file AUTHORS for details
7 
8  SPDX-License-Identifier: LGPL-2.0-or-later
9 */
10 
11 #include "kmime_codecs.h"
12 #include "kmime_debug.h"
13 #include <KCharsets>
14 
15 #include <QTextCodec>
16 
17 namespace KMime {
18 
19 static const char reservedCharacters[] = "\"()<>@,.;:\\[]=";
20 
21 QByteArray encodeRFC2047String(const QString &src, const QByteArray &charset,
22  bool addressHeader, bool allow8BitHeaders)
23 {
24  QByteArray result;
25  int start = 0;
26  int end = 0;
27  bool nonAscii = false;
28  bool ok = true;
29  bool useQEncoding = false;
30 
31  // fromLatin1() is safe here, codecForName() uses toLatin1() internally
32  const QTextCodec *codec = KCharsets::charsets()->codecForName(QString::fromLatin1(charset), ok);
33 
34  QByteArray usedCS;
35  if (!ok) {
36  //no codec available => try local8Bit and hope the best ;-)
37  usedCS = QTextCodec::codecForLocale()->name();
39  } else {
40  Q_ASSERT(codec);
41  if (charset.isEmpty()) {
42  usedCS = codec->name();
43  } else {
44  usedCS = charset;
45  }
46  }
47 
49  QByteArray encoded8Bit = codec->fromUnicode(src.constData(), src.length(), &converterState);
50  if (converterState.invalidChars > 0) {
51  usedCS = "utf-8";
52  codec = QTextCodec::codecForName(usedCS);
53  encoded8Bit = codec->fromUnicode(src);
54  }
55 
56  if (usedCS.contains("8859-")) { // use "B"-Encoding for non iso-8859-x charsets
57  useQEncoding = true;
58  }
59 
60  if (allow8BitHeaders) {
61  return encoded8Bit;
62  }
63 
64  int encoded8BitLength = encoded8Bit.length();
65  for (int i = 0; i < encoded8BitLength; i++) {
66  if (encoded8Bit[i] == ' ') { // encoding starts at word boundaries
67  start = i + 1;
68  }
69 
70  // encode escape character, for japanese encodings...
71  if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') ||
72  (addressHeader && (strchr("\"()<>@,.;:\\[]=", encoded8Bit[i]) != nullptr))) {
73  end = start; // non us-ascii char found, now we determine where to stop encoding
74  nonAscii = true;
75  break;
76  }
77  }
78 
79  if (nonAscii) {
80  while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
81  // we encode complete words
82  end++;
83  }
84 
85  for (int x = end; x < encoded8Bit.length(); x++) {
86  if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') ||
87  (addressHeader && (strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) {
88  end = x; // we found another non-ascii word
89 
90  while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
91  // we encode complete words
92  end++;
93  }
94  }
95  }
96 
97  result = encoded8Bit.left(start) + "=?" + usedCS;
98 
99  if (useQEncoding) {
100  result += "?Q?";
101 
102  char c;
103  char hexcode; // "Q"-encoding implementation described in RFC 2047
104  for (int i = start; i < end; i++) {
105  c = encoded8Bit[i];
106  if (c == ' ') { // make the result readable with not MIME-capable readers
107  result += '_';
108  } else {
109  if (((c >= 'a') && (c <= 'z')) || // paranoid mode, encode *all* special chars to avoid problems
110  ((c >= 'A') && (c <= 'Z')) || // with "From" & "To" headers
111  ((c >= '0') && (c <= '9'))) {
112  result += c;
113  } else {
114  result += '='; // "stolen" from KMail ;-)
115  hexcode = ((c & 0xF0) >> 4) + 48;
116  if (hexcode >= 58) {
117  hexcode += 7;
118  }
119  result += hexcode;
120  hexcode = (c & 0x0F) + 48;
121  if (hexcode >= 58) {
122  hexcode += 7;
123  }
124  result += hexcode;
125  }
126  }
127  }
128  } else {
129  result += "?B?" + encoded8Bit.mid(start, end - start).toBase64();
130  }
131 
132  result += "?=";
133  result += encoded8Bit.right(encoded8Bit.length() - end);
134  } else {
135  result = encoded8Bit;
136  }
137 
138  return result;
139 }
140 
141 QByteArray encodeRFC2047Sentence(const QString &src, const QByteArray &charset)
142 {
143  QByteArray result;
144  const QChar *ch = src.constData();
145  const int length = src.length();
146  int pos = 0;
147  int wordStart = 0;
148 
149  //qCDebug(KMIME_LOG) << "Input:" << src;
150  // Loop over all characters of the string.
151  // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
152  while (pos < length) {
153  //qCDebug(KMIME_LOG) << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1();
154  const bool isAscii = ch->unicode() < 127;
155  const bool isReserved = (strchr(reservedCharacters, ch->toLatin1()) != nullptr);
156  if (isAscii && isReserved) {
157  const int wordSize = pos - wordStart;
158  if (wordSize > 0) {
159  const QString word = src.mid(wordStart, wordSize);
160  result += encodeRFC2047String(word, charset);
161  }
162 
163  result += ch->toLatin1();
164  wordStart = pos + 1;
165  }
166  ch++;
167  pos++;
168  }
169 
170  // Encode the last word
171  const int wordSize = pos - wordStart;
172  if (wordSize > 0) {
173  const QString word = src.mid(wordStart, pos - wordStart);
174  result += encodeRFC2047String(word, charset);
175  }
176 
177  return result;
178 }
179 
180 //-----------------------------------------------------------------------------
181 QByteArray encodeRFC2231String(const QString &str, const QByteArray &charset)
182 {
183  if (str.isEmpty()) {
184  return QByteArray();
185  }
186 
188  QByteArray latin;
189  if (charset == "us-ascii") {
190  latin = str.toLatin1();
191  } else if (codec) {
192  latin = codec->fromUnicode(str);
193  } else {
194  latin = str.toLocal8Bit();
195  }
196 
197  char *l;
198  for (l = latin.data(); *l; ++l) {
199  if (((*l & 0xE0) == 0) || (*l & 0x80)) {
200  // *l is control character or 8-bit char
201  break;
202  }
203  }
204  if (!*l) {
205  return latin;
206  }
207 
208  QByteArray result = charset + "''";
209  for (l = latin.data(); *l; ++l) {
210  bool needsQuoting = (*l & 0x80) || (*l == '%');
211  if (!needsQuoting) {
212  const QByteArray especials = "()<>@,;:\"/[]?.= \033";
213  int len = especials.length();
214  for (int i = 0; i < len; i++) {
215  if (*l == especials[i]) {
216  needsQuoting = true;
217  break;
218  }
219  }
220  }
221  if (needsQuoting) {
222  result += '%';
223  unsigned char hexcode;
224  hexcode = ((*l & 0xF0) >> 4) + 48;
225  if (hexcode >= 58) {
226  hexcode += 7;
227  }
228  result += hexcode;
229  hexcode = (*l & 0x0F) + 48;
230  if (hexcode >= 58) {
231  hexcode += 7;
232  }
233  result += hexcode;
234  } else {
235  result += *l;
236  }
237  }
238  return result;
239 }
240 
241 //-----------------------------------------------------------------------------
242 QString decodeRFC2231String(const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
243  bool forceCS)
244 {
245  int p = str.indexOf('\'');
246  if (p < 0) {
248  }
249 
250  QByteArray charset = str.left(p);
251 
252  QByteArray st = str.mid(str.lastIndexOf('\'') + 1);
253 
254  char ch;
255  char ch2;
256  p = 0;
257  while (p < st.length()) {
258  if (st.at(p) == 37) {
259  // Only try to decode the percent-encoded character if the percent sign
260  // is really followed by two other characters, see testcase at bug 163024
261  if (p + 2 < st.length()) {
262  ch = st.at(p + 1) - 48;
263  if (ch > 16) {
264  ch -= 7;
265  }
266  ch2 = st.at(p + 2) - 48;
267  if (ch2 > 16) {
268  ch2 -= 7;
269  }
270  st[p] = ch * 16 + ch2;
271  st.remove(p + 1, 2);
272  }
273  }
274  p++;
275  }
276  qCDebug(KMIME_LOG) << "Got pre-decoded:" << st;
277  const QTextCodec *charsetcodec = KCharsets::charsets()->codecForName(QString::fromLatin1(charset));
278  if (!charsetcodec || forceCS) {
279  charsetcodec = KCharsets::charsets()->codecForName(QString::fromLatin1(defaultCS));
280  }
281 
282  usedCS = charsetcodec->name();
283  return charsetcodec->toUnicode(st);
284 }
285 
286 }
QTextCodec * codecForName(const QString &name) const
QByteArray fromUnicode(const QString &str) const const
const QChar * constData() const const
KIMAP_EXPORT const QString encodeRFC2231String(const QString &str)
KIMAP_EXPORT const QString decodeRFC2231String(const QString &str)
virtual QByteArray name() const const =0
char at(int i) const const
int lastIndexOf(char ch, int from) const const
bool isEmpty() const const
int length() const const
QTextCodec * codecForLocale()
int indexOf(char ch, int from) const const
bool isEmpty() const const
QByteArray right(int len) const const
QByteArray mid(int pos, int len) const const
ushort unicode() const const
QByteArray toLocal8Bit() const const
static KCharsets * charsets()
char toLatin1() const const
const QList< QKeySequence > & end()
QByteArray left(int len) const const
KIMAP_EXPORT const QString encodeRFC2047String(const QString &str)
QByteArray toLatin1() const const
QString mid(int position, int n) const const
QTextCodec * codecForName(const QByteArray &name)
bool contains(char ch) const const
int length() const const
char * data()
QString fromLatin1(const char *str, int size)
QByteArray & remove(int pos, int len)
QByteArray toBase64(QByteArray::Base64Options options) const const
QString toUnicode(const QByteArray &a) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Mon Sep 27 2021 23:15:57 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.