KIMAP

rfccodecs.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * rfccodecs.cpp - handler for various rfc/mime encodings
4  * SPDX-FileCopyrightText: 2000 s [email protected]
5  *
6  * SPDX-License-Identifier: LGPL-2.0-or-later
7  *
8  *********************************************************************/
20 #include "rfccodecs.h"
21 
22 #include <ctype.h>
23 #include <sys/types.h>
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 
28 #include <QTextCodec>
29 #include <QByteArray>
30 #include <QLatin1Char>
31 #include <KCodecs>
32 
33 using namespace KIMAP;
34 
35 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
36 // adapted to QT-Toolkit by Sven Carstens <[email protected]> 2000
37 
38 //@cond PRIVATE
39 static const unsigned char base64chars[] =
40  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
41 #define UNDEFINED 64
42 #define MAXLINE 76
43 static const char especials[17] = "()<>@,;:\"/[]?.= ";
44 
45 /* UTF16 definitions */
46 #define UTF16MASK 0x03FFUL
47 #define UTF16SHIFT 10
48 #define UTF16BASE 0x10000UL
49 #define UTF16HIGHSTART 0xD800UL
50 #define UTF16HIGHEND 0xDBFFUL
51 #define UTF16LOSTART 0xDC00UL
52 #define UTF16LOEND 0xDFFFUL
53 //@endcond
54 
55 //-----------------------------------------------------------------------------
57 {
58  unsigned char c, i, bitcount;
59  unsigned long ucs4, utf16, bitbuf;
60  unsigned char base64[256], utf8[6];
61  unsigned int srcPtr = 0;
62  QByteArray dst;
63  QByteArray src = inSrc;
64  uint srcLen = inSrc.length();
65 
66  /* initialize modified base64 decoding table */
67  memset(base64, UNDEFINED, sizeof(base64));
68  for (i = 0; i < sizeof(base64chars); ++i) {
69  base64[(int)base64chars[i]] = i;
70  }
71 
72  /* loop until end of string */
73  while (srcPtr < srcLen) {
74  c = src[srcPtr++];
75  /* deal with literal characters and &- */
76  if (c != '&' || src[srcPtr] == '-') {
77  /* encode literally */
78  dst += c;
79  /* skip over the '-' if this is an &- sequence */
80  if (c == '&') {
81  srcPtr++;
82  }
83  } else {
84  /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
85  bitbuf = 0;
86  bitcount = 0;
87  ucs4 = 0;
88  while ((c = base64[(unsigned char)src[srcPtr]]) != UNDEFINED) {
89  ++srcPtr;
90  bitbuf = (bitbuf << 6) | c;
91  bitcount += 6;
92  /* enough bits for a UTF-16 character? */
93  if (bitcount >= 16) {
94  bitcount -= 16;
95  utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff;
96  /* convert UTF16 to UCS4 */
97  if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) {
98  ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT;
99  continue;
100  } else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) {
101  ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
102  } else {
103  ucs4 = utf16;
104  }
105  /* convert UTF-16 range of UCS4 to UTF-8 */
106  if (ucs4 <= 0x7fUL) {
107  utf8[0] = ucs4;
108  i = 1;
109  } else if (ucs4 <= 0x7ffUL) {
110  utf8[0] = 0xc0 | (ucs4 >> 6);
111  utf8[1] = 0x80 | (ucs4 & 0x3f);
112  i = 2;
113  } else if (ucs4 <= 0xffffUL) {
114  utf8[0] = 0xe0 | (ucs4 >> 12);
115  utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f);
116  utf8[2] = 0x80 | (ucs4 & 0x3f);
117  i = 3;
118  } else {
119  utf8[0] = 0xf0 | (ucs4 >> 18);
120  utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f);
121  utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f);
122  utf8[3] = 0x80 | (ucs4 & 0x3f);
123  i = 4;
124  }
125  /* copy it */
126  for (c = 0; c < i; ++c) {
127  dst += utf8[c];
128  }
129  }
130  }
131  /* skip over trailing '-' in modified UTF-7 encoding */
132  if (src[srcPtr] == '-') {
133  ++srcPtr;
134  }
135  }
136  }
137  return dst;
138 }
139 
141 {
142  return QString::fromUtf8(decodeImapFolderName(inSrc.toUtf8()).constData());
143 }
144 
145 //-----------------------------------------------------------------------------
146 
148 {
149  int len = src.length();
150  QByteArray result;
151  result.reserve(2 * len);
152  for (int i = 0; i < len; i++) {
153  if (src[i] == '"' || src[i] == '\\') {
154  result += '\\';
155  }
156  result += src[i];
157  }
158  result.squeeze();
159  return result;
160 }
161 
163 {
164  uint len = src.length();
165  QString result;
166  result.reserve(2 * len);
167  for (unsigned int i = 0; i < len; i++) {
168  if (src[i] == QLatin1Char('"') || src[i] == QLatin1Char('\\')) {
169  result += QLatin1Char('\\');
170  }
171  result += src[i];
172  }
173  //result.squeeze(); - unnecessary and slow
174  return result;
175 }
176 
177 //-----------------------------------------------------------------------------
179 {
180  return QString::fromUtf8(encodeImapFolderName(inSrc.toUtf8()).constData());
181 }
182 
184 {
185  unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
186  unsigned int ucs4, bitbuf;
187  QByteArray src = inSrc;
188  QByteArray dst;
189 
190  int srcPtr = 0;
191  utf7mode = 0;
192  utf8total = 0;
193  bitstogo = 0;
194  utf8pos = 0;
195  bitbuf = 0;
196  ucs4 = 0;
197  while (srcPtr < src.length()) {
198  c = (unsigned char)src[srcPtr++];
199  /* normal character? */
200  if (c >= ' ' && c <= '~') {
201  /* switch out of UTF-7 mode */
202  if (utf7mode) {
203  if (bitstogo) {
204  dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
205  bitstogo = 0;
206  }
207  dst += '-';
208  utf7mode = 0;
209  }
210  dst += c;
211  /* encode '&' as '&-' */
212  if (c == '&') {
213  dst += '-';
214  }
215  continue;
216  }
217  /* switch to UTF-7 mode */
218  if (!utf7mode) {
219  dst += '&';
220  utf7mode = 1;
221  }
222  /* Encode US-ASCII characters as themselves */
223  if (c < 0x80) {
224  ucs4 = c;
225  utf8total = 1;
226  } else if (utf8total) {
227  /* save UTF8 bits into UCS4 */
228  ucs4 = (ucs4 << 6) | (c & 0x3FUL);
229  if (++utf8pos < utf8total) {
230  continue;
231  }
232  } else {
233  utf8pos = 1;
234  if (c < 0xE0) {
235  utf8total = 2;
236  ucs4 = c & 0x1F;
237  } else if (c < 0xF0) {
238  utf8total = 3;
239  ucs4 = c & 0x0F;
240  } else {
241  /* NOTE: can't convert UTF8 sequences longer than 4 */
242  utf8total = 4;
243  ucs4 = c & 0x03;
244  }
245  continue;
246  }
247  /* loop to split ucs4 into two utf16 chars if necessary */
248  utf8total = 0;
249  do {
250  if (ucs4 >= UTF16BASE) {
251  ucs4 -= UTF16BASE;
252  bitbuf =
253  (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART);
254  ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
255  utf16flag = 1;
256  } else {
257  bitbuf = (bitbuf << 16) | ucs4;
258  utf16flag = 0;
259  }
260  bitstogo += 16;
261  /* spew out base64 */
262  while (bitstogo >= 6) {
263  bitstogo -= 6;
264  dst +=
265  base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F];
266  }
267  } while (utf16flag);
268  }
269  /* if in UTF-7 mode, finish in ASCII */
270  if (utf7mode) {
271  if (bitstogo) {
272  dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
273  }
274  dst += '-';
275  }
276  return quoteIMAP(dst);
277 }
278 
279 //-----------------------------------------------------------------------------
281 {
282  if (str.isEmpty()) {
283  return nullptr;
284  }
285  return QTextCodec::codecForName(str.toLower().
286  replace(QStringLiteral("windows"), QStringLiteral("cp")).toLatin1());
287 }
288 
289 //-----------------------------------------------------------------------------
291 {
292  QString throw_away;
293 
294  return decodeRFC2047String(str, throw_away);
295 }
296 
297 //-----------------------------------------------------------------------------
299  QString &charset)
300 {
301  QString throw_away;
302 
303  return decodeRFC2047String(str, charset, throw_away);
304 }
305 
306 //-----------------------------------------------------------------------------
308  QString &charset,
309  QString &language)
310 {
311  //do we have a rfc string
312  if (!str.contains(QLatin1String("=?"))) {
313  return str;
314  }
315 
316  // FIXME get rid of the conversion?
317  QByteArray aStr = str.toLatin1(); // QString.length() means Unicode chars
318  QByteArray result;
319  char *pos, *beg, *end, *mid = nullptr;
320  QByteArray cstr;
321  char encoding = 0, ch;
322  bool valid;
323  const int maxLen = 200;
324  int i;
325 
326 // result.truncate(aStr.length());
327  for (pos = aStr.data(); *pos; pos++) {
328  if (pos[0] != '=' || pos[1] != '?') {
329  result += *pos;
330  continue;
331  }
332  beg = pos + 2;
333  end = beg;
334  valid = true;
335  // parse charset name
336  for (i = 2, pos += 2;
337  i < maxLen &&
338  (*pos != '?' && (ispunct(*pos) || isalnum(*pos)));
339  i++) {
340  pos++;
341  }
342  if (*pos != '?' || i < 4 || i >= maxLen) {
343  valid = false;
344  } else {
345  charset = QLatin1String(QByteArray(beg, i - 1)); // -2 + 1 for the zero
346  int pt = charset.lastIndexOf(QLatin1Char('*'));
347  if (pt != -1) {
348  // save language for later usage
349  language = charset.right(charset.length() - pt - 1);
350 
351  // tie off language as defined in rfc2047
352  charset.truncate(pt);
353  }
354  // get encoding and check delimiting question marks
355  encoding = toupper(pos[1]);
356  if (pos[2] != '?' ||
357  (encoding != 'Q' && encoding != 'B' &&
358  encoding != 'q' && encoding != 'b')) {
359  valid = false;
360  }
361  pos += 3;
362  i += 3;
363 // qCDebug(KIMAP_LOG) << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'";
364  }
365  if (valid) {
366  mid = pos;
367  // search for end of encoded part
368  while (i < maxLen && *pos && !(*pos == '?' && *(pos + 1) == '=')) {
369  i++;
370  pos++;
371  }
372  end = pos + 2;//end now points to the first char after the encoded string
373  if (i >= maxLen || !*pos) {
374  valid = false;
375  }
376  }
377  if (valid) {
378  ch = *pos;
379  *pos = '\0';
380  cstr = QByteArray(mid).left(static_cast<int>(mid - pos - 1));
381  if (encoding == 'Q') {
382  // decode quoted printable text
383  for (i = cstr.length() - 1; i >= 0; --i) {
384  if (cstr[i] == '_') {
385  cstr[i] = ' ';
386  }
387  }
388 // qCDebug(KIMAP_LOG) << "before QP '"
389 // << cstr << "'";
390  cstr = KCodecs::quotedPrintableDecode(cstr);
391 // qCDebug(KIMAP_LOG) << "after QP '"
392 // << cstr << "'";
393  } else {
394  // decode base64 text
395  cstr = QByteArray::fromBase64(cstr);
396  }
397  *pos = ch;
398  int len = cstr.length();
399  for (i = 0; i < len; ++i) {
400  result += cstr[i];
401  }
402 
403  pos = end - 1;
404  } else {
405 // qCDebug(KIMAP_LOG) << "invalid";
406  //result += "=?";
407  //pos = beg -1; // because pos gets increased shortly afterwards
408  pos = beg - 2;
409  result += *pos++;
410  result += *pos;
411  }
412  }
413  if (!charset.isEmpty()) {
414  QTextCodec *aCodec = codecForName(QLatin1String(charset.toLatin1()));
415  if (aCodec) {
416 // qCDebug(KIMAP_LOG) << "Codec is" << aCodec->name();
417  return aCodec->toUnicode(result);
418  }
419  }
420  return QLatin1String(result);
421 }
422 
423 //-----------------------------------------------------------------------------
425 {
427 }
428 
429 //-----------------------------------------------------------------------------
431 {
432  if (str.isEmpty()) {
433  return str;
434  }
435 
436  const signed char *latin =
437  reinterpret_cast<const signed char *>
438  (str.data()), *l, *start, *stop;
439  char hexcode;
440  int numQuotes, i;
441  int rptr = 0;
442  // My stats show this number results in 12 resize() out of 73,000
443  int resultLen = 3 * str.length() / 2;
444  QByteArray result(resultLen, '\0');
445 
446  while (*latin) {
447  l = latin;
448  start = latin;
449  while (*l) {
450  if (*l == 32) {
451  start = l + 1;
452  }
453  if (*l < 0) {
454  break;
455  }
456  l++;
457  }
458  if (*l) {
459  numQuotes = 1;
460  while (*l) {
461  /* The encoded word must be limited to 75 character */
462  for (i = 0; i < 16; ++i) {
463  if (*l == especials[i]) {
464  numQuotes++;
465  }
466  }
467  if (*l < 0) {
468  numQuotes++;
469  }
470  /* Stop after 58 = 75 - 17 characters or at "<[email protected]" */
471  if (l - start + 2 * numQuotes >= 58 || *l == 60) {
472  break;
473  }
474  l++;
475  }
476  if (*l) {
477  stop = l - 1;
478  while (stop >= start && *stop != 32) {
479  stop--;
480  }
481  if (stop <= start) {
482  stop = l;
483  }
484  } else {
485  stop = l;
486  }
487  if (resultLen - rptr - 1 <= start - latin + 1 + 16) {
488  // =?iso-88...
489  resultLen += (start - latin + 1) * 2 + 20; // more space
490  result.resize(resultLen);
491  }
492  while (latin < start) {
493  result[rptr++] = *latin;
494  latin++;
495  }
496  result.replace(rptr, 15, "=?iso-8859-1?q?");
497  rptr += 15;
498  if (resultLen - rptr - 1 <= 3 * (stop - latin + 1)) {
499  resultLen += (stop - latin + 1) * 4 + 20; // more space
500  result.resize(resultLen);
501  }
502  while (latin < stop) {
503  // can add up to 3 chars/iteration
504  numQuotes = 0;
505  for (i = 0; i < 16; ++i) {
506  if (*latin == especials[i]) {
507  numQuotes = 1;
508  }
509  }
510  if (*latin < 0) {
511  numQuotes = 1;
512  }
513  if (numQuotes) {
514  result[rptr++] = '=';
515  hexcode = ((*latin & 0xF0) >> 4) + 48;
516  if (hexcode >= 58) {
517  hexcode += 7;
518  }
519  result[rptr++] = hexcode;
520  hexcode = (*latin & 0x0F) + 48;
521  if (hexcode >= 58) {
522  hexcode += 7;
523  }
524  result[rptr++] = hexcode;
525  } else {
526  result[rptr++] = *latin;
527  }
528  latin++;
529  }
530  result[rptr++] = '?';
531  result[rptr++] = '=';
532  } else {
533  while (*latin) {
534  if (rptr == resultLen - 1) {
535  resultLen += 30;
536  result.resize(resultLen);
537  }
538  result[rptr++] = *latin;
539  latin++;
540  }
541  }
542  }
543  result[rptr] = 0;
544  return result;
545 }
546 
547 //-----------------------------------------------------------------------------
549 {
550  if (str.isEmpty()) {
551  return str;
552  }
553 
554  signed char *latin = (signed char *)calloc(1, str.length() + 1);
555  char *latin_us = (char *)latin;
556  strcpy(latin_us, str.toLatin1().constData());
557  signed char *l = latin;
558  char hexcode;
559  int i;
560  while (*l) {
561  if (*l < 0) {
562  break;
563  }
564  l++;
565  }
566  if (!*l) {
567  free(latin);
568  return str;
569  }
570  QByteArray result;
571  l = latin;
572  while (*l) {
573  bool quote = *l < 0;
574  for (i = 0; i < 16; ++i) {
575  if (*l == especials[i]) {
576  quote = true;
577  }
578  }
579  if (quote) {
580  result += '%';
581  hexcode = ((*l & 0xF0) >> 4) + 48;
582  if (hexcode >= 58) {
583  hexcode += 7;
584  }
585  result += hexcode;
586  hexcode = (*l & 0x0F) + 48;
587  if (hexcode >= 58) {
588  hexcode += 7;
589  }
590  result += hexcode;
591  } else {
592  result += *l;
593  }
594  l++;
595  }
596  free(latin);
597  return QLatin1String(result);
598 }
599 
600 //-----------------------------------------------------------------------------
602 {
603  int p = str.indexOf(QLatin1Char('\''));
604 
605  //see if it is an rfc string
606  if (p < 0) {
607  return str;
608  }
609 
610  int l = str.lastIndexOf(QLatin1Char('\''));
611 
612  //second is language
613  if (p >= l) {
614  return str;
615  }
616 
617  //first is charset or empty
618  //QString charset = str.left ( p );
619  QString st = str.mid(l + 1);
620  //QString language = str.mid ( p + 1, l - p - 1 );
621 
622  //qCDebug(KIMAP_LOG) << "Charset:" << charset << "Language:" << language;
623 
624  char ch, ch2;
625  p = 0;
626  while (p < st.length()) {
627  if (st.at(p) == 37) {
628  ch = st.at(p + 1).toLatin1() - 48;
629  if (ch > 16) {
630  ch -= 7;
631  }
632  ch2 = st.at(p + 2).toLatin1() - 48;
633  if (ch2 > 16) {
634  ch2 -= 7;
635  }
636  st.replace(p, 1, ch * 16 + ch2);
637  st.remove(p + 1, 2);
638  }
639  p++;
640  }
641  return st;
642 }
void squeeze()
int indexOf(QChar ch, int from, Qt::CaseSensitivity cs) const const
This file is part of the IMAP support library and defines the RfcCodecs class.
KIMAP_EXPORT QByteArray quoteIMAP(const QByteArray &src)
Replaces " with \" and \ with \ " and \ characters.
Definition: rfccodecs.cpp:147
void truncate(int position)
KIMAP_EXPORT const QString encodeRFC2231String(const QString &str)
Encodes a RFC2231 string str.
Definition: rfccodecs.cpp:548
KIMAP_EXPORT const QString decodeRFC2231String(const QString &str)
Decodes a RFC2231 string str.
Definition: rfccodecs.cpp:601
void reserve(int size)
KIMAP_EXPORT const QByteArray encodeRFC2047String(const QByteArray &str)
Encodes a RFC2047 string str.
Definition: rfccodecs.cpp:430
bool isEmpty() const const
KIMAP_EXPORT QString encodeImapFolderName(const QString &src)
Converts an Unicode IMAP mailbox to a QString which can be used in IMAP communication.
Definition: rfccodecs.cpp:178
KIMAP_EXPORT QString decodeImapFolderName(const QString &inSrc)
Converts an UTF-7 encoded IMAP mailbox to a Unicode QString.
Definition: rfccodecs.cpp:140
int length() const const
QString & remove(int position, int n)
KIMAP2_EXPORT QByteArray decodeImapFolderName(const QByteArray &inSrc)
Converts an UTF-7 encoded IMAP mailbox to a QByteArray.
Definition: rfccodecs.cpp:56
int lastIndexOf(QChar ch, int from, Qt::CaseSensitivity cs) const const
void resize(int size)
KIMAP_EXPORT const QString decodeRFC2047String(const QString &str)
Decodes a RFC2047 string str.
Definition: rfccodecs.cpp:290
KIMAP2_EXPORT const QString encodeRFC2047String(const QString &str)
Encodes a RFC2047 string str.
Definition: rfccodecs.cpp:424
KIMAP2_EXPORT QString quoteIMAP(const QString &src)
Replaces " with \" and \ with \ " and \ characters.
Definition: rfccodecs.cpp:162
QString fromUtf8(const char *str, int size)
bool isEmpty() const const
const char * constData() const const
QByteArray & replace(int pos, int len, const char *after)
QString right(int n) const const
QString toLower() const const
KIMAP_EXPORT QTextCodec * codecForName(const QString &name)
Fetches a Codec by name.
Definition: rfccodecs.cpp:280
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
KIMAP2_EXPORT QByteArray encodeImapFolderName(const QByteArray &src)
Converts an Unicode IMAP mailbox to a QByteArray which can be used in IMAP communication.
Definition: rfccodecs.cpp:183
char toLatin1() const const
QString & replace(int position, int n, QChar after)
QByteArray left(int len) const const
QByteArray toLatin1() const const
QString mid(int position, int n) const const
QByteArray fromBase64(const QByteArray &base64, QByteArray::Base64Options options)
const QChar at(int position) const const
QTextCodec * codecForName(const QByteArray &name)
int length() const const
void reserve(int size)
char * data()
KIMAP2_EXPORT const QString decodeRFC2047String(const QString &str, QString &charset, QString &language)
Decodes a RFC2047 string str.
Definition: rfccodecs.cpp:307
Definition: acl.cpp:12
QString toUnicode(const QByteArray &a) const const
KCODECS_EXPORT QByteArray quotedPrintableDecode(const QByteArray &in)
QByteArray toUtf8() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Sat Oct 24 2020 23:16:51 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.