KIMAP2

rfccodecs.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * rfccodecs.cpp - handler for various rfc/mime encodings
4  * Copyright (C) 2000 [email protected]
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Library General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Library General Public License for more details.
15  *
16  * You should have received a copy of the GNU Library General Public License
17  * along with this library; see the file COPYING.LIB. If not, write to
18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  * Boston, MA 02110-1301, USA.
20  *
21  *********************************************************************/
22 /**
23  * @file
24  * This file is part of the IMAP support library and defines the
25  * RfcCodecs class.
26  *
27  * @brief
28  * Defines the RfcCodecs class.
29  *
30  * @author Sven Carstens
31  */
32 
33 #include "rfccodecs.h"
34 
35 #include <ctype.h>
36 #include <sys/types.h>
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 
41 #include <QtCore/QTextCodec>
42 #include <QtCore/QBuffer>
43 #include <QtCore/QByteArray>
44 #include <QtCore/QLatin1Char>
45 #include <kcodecs.h>
46 
47 using namespace KIMAP2;
48 
49 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
50 // adapted to QT-Toolkit by Sven Carstens <[email protected]> 2000
51 
52 //@cond PRIVATE
53 static const unsigned char base64chars[] =
54  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
55 #define UNDEFINED 64
56 #define MAXLINE 76
57 static const char especials[17] = "()<>@,;:\"/[]?.= ";
58 
59 /* UTF16 definitions */
60 #define UTF16MASK 0x03FFUL
61 #define UTF16SHIFT 10
62 #define UTF16BASE 0x10000UL
63 #define UTF16HIGHSTART 0xD800UL
64 #define UTF16HIGHEND 0xDBFFUL
65 #define UTF16LOSTART 0xDC00UL
66 #define UTF16LOEND 0xDFFFUL
67 //@endcond
68 
69 //-----------------------------------------------------------------------------
71 {
72  unsigned char c, i, bitcount;
73  unsigned long ucs4, utf16, bitbuf;
74  unsigned char base64[256], utf8[6];
75  unsigned int srcPtr = 0;
76  QByteArray dst;
77  QByteArray src = inSrc;
78  uint srcLen = inSrc.length();
79 
80  /* initialize modified base64 decoding table */
81  memset(base64, UNDEFINED, sizeof(base64));
82  for (i = 0; i < sizeof(base64chars); ++i) {
83  base64[(int)base64chars[i]] = i;
84  }
85 
86  /* loop until end of string */
87  while (srcPtr < srcLen) {
88  c = src[srcPtr++];
89  /* deal with literal characters and &- */
90  if (c != '&' || src[srcPtr] == '-') {
91  /* encode literally */
92  dst += c;
93  /* skip over the '-' if this is an &- sequence */
94  if (c == '&') {
95  srcPtr++;
96  }
97  } else {
98  /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
99  bitbuf = 0;
100  bitcount = 0;
101  ucs4 = 0;
102  while ((c = base64[(unsigned char)src[srcPtr]]) != UNDEFINED) {
103  ++srcPtr;
104  bitbuf = (bitbuf << 6) | c;
105  bitcount += 6;
106  /* enough bits for a UTF-16 character? */
107  if (bitcount >= 16) {
108  bitcount -= 16;
109  utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff;
110  /* convert UTF16 to UCS4 */
111  if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) {
112  ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT;
113  continue;
114  } else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) {
115  ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
116  } else {
117  ucs4 = utf16;
118  }
119  /* convert UTF-16 range of UCS4 to UTF-8 */
120  if (ucs4 <= 0x7fUL) {
121  utf8[0] = ucs4;
122  i = 1;
123  } else if (ucs4 <= 0x7ffUL) {
124  utf8[0] = 0xc0 | (ucs4 >> 6);
125  utf8[1] = 0x80 | (ucs4 & 0x3f);
126  i = 2;
127  } else if (ucs4 <= 0xffffUL) {
128  utf8[0] = 0xe0 | (ucs4 >> 12);
129  utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f);
130  utf8[2] = 0x80 | (ucs4 & 0x3f);
131  i = 3;
132  } else {
133  utf8[0] = 0xf0 | (ucs4 >> 18);
134  utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f);
135  utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f);
136  utf8[3] = 0x80 | (ucs4 & 0x3f);
137  i = 4;
138  }
139  /* copy it */
140  for (c = 0; c < i; ++c) {
141  dst += utf8[c];
142  }
143  }
144  }
145  /* skip over trailing '-' in modified UTF-7 encoding */
146  if (src[srcPtr] == '-') {
147  ++srcPtr;
148  }
149  }
150  }
151  return dst;
152 }
153 
155 {
157 }
158 
159 //-----------------------------------------------------------------------------
160 
162 {
163  uint len = src.length();
164  QByteArray result;
165  result.reserve(2 * len);
166  for (unsigned int i = 0; i < len; i++) {
167  if (src[i] == '"' || src[i] == '\\') {
168  result += '\\';
169  }
170  result += src[i];
171  }
172  result.squeeze();
173  return result;
174 }
175 
177 {
178  uint len = src.length();
179  QString result;
180  result.reserve(2 * len);
181  for (unsigned int i = 0; i < len; i++) {
182  if (src[i] == QLatin1Char('"') || src[i] == QLatin1Char('\\')) {
183  result += QLatin1Char('\\');
184  }
185  result += src[i];
186  }
187  //result.squeeze(); - unnecessary and slow
188  return result;
189 }
190 
191 //-----------------------------------------------------------------------------
193 {
195 }
196 
198 {
199  unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
200  unsigned int ucs4, bitbuf;
201  QByteArray src = inSrc;
202  QByteArray dst;
203 
204  int srcPtr = 0;
205  utf7mode = 0;
206  utf8total = 0;
207  bitstogo = 0;
208  utf8pos = 0;
209  bitbuf = 0;
210  ucs4 = 0;
211  while (srcPtr < src.length()) {
212  c = (unsigned char)src[srcPtr++];
213  /* normal character? */
214  if (c >= ' ' && c <= '~') {
215  /* switch out of UTF-7 mode */
216  if (utf7mode) {
217  if (bitstogo) {
218  dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
219  bitstogo = 0;
220  }
221  dst += '-';
222  utf7mode = 0;
223  }
224  dst += c;
225  /* encode '&' as '&-' */
226  if (c == '&') {
227  dst += '-';
228  }
229  continue;
230  }
231  /* switch to UTF-7 mode */
232  if (!utf7mode) {
233  dst += '&';
234  utf7mode = 1;
235  }
236  /* Encode US-ASCII characters as themselves */
237  if (c < 0x80) {
238  ucs4 = c;
239  utf8total = 1;
240  } else if (utf8total) {
241  /* save UTF8 bits into UCS4 */
242  ucs4 = (ucs4 << 6) | (c & 0x3FUL);
243  if (++utf8pos < utf8total) {
244  continue;
245  }
246  } else {
247  utf8pos = 1;
248  if (c < 0xE0) {
249  utf8total = 2;
250  ucs4 = c & 0x1F;
251  } else if (c < 0xF0) {
252  utf8total = 3;
253  ucs4 = c & 0x0F;
254  } else {
255  /* NOTE: can't convert UTF8 sequences longer than 4 */
256  utf8total = 4;
257  ucs4 = c & 0x03;
258  }
259  continue;
260  }
261  /* loop to split ucs4 into two utf16 chars if necessary */
262  utf8total = 0;
263  do {
264  if (ucs4 >= UTF16BASE) {
265  ucs4 -= UTF16BASE;
266  bitbuf =
267  (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART);
268  ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
269  utf16flag = 1;
270  } else {
271  bitbuf = (bitbuf << 16) | ucs4;
272  utf16flag = 0;
273  }
274  bitstogo += 16;
275  /* spew out base64 */
276  while (bitstogo >= 6) {
277  bitstogo -= 6;
278  dst +=
279  base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F];
280  }
281  } while (utf16flag);
282  }
283  /* if in UTF-7 mode, finish in ASCII */
284  if (utf7mode) {
285  if (bitstogo) {
286  dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
287  }
288  dst += '-';
289  }
290  return quoteIMAP(dst);
291 }
292 
293 //-----------------------------------------------------------------------------
295 {
296  if (str.isEmpty()) {
297  return Q_NULLPTR;
298  }
299  return QTextCodec::codecForName(str.toLower().
300  replace(QStringLiteral("windows"), QStringLiteral("cp")).toLatin1());
301 }
302 
303 //-----------------------------------------------------------------------------
305 {
306  QString throw_away;
307 
308  return decodeRFC2047String(str, throw_away);
309 }
310 
311 //-----------------------------------------------------------------------------
313  QString &charset)
314 {
315  QString throw_away;
316 
317  return decodeRFC2047String(str, charset, throw_away);
318 }
319 
320 //-----------------------------------------------------------------------------
322  QString &charset,
323  QString &language)
324 {
325  //do we have a rfc string
326  if (!str.contains(QStringLiteral("=?"))) {
327  return str;
328  }
329 
330  // FIXME get rid of the conversion?
331  QByteArray aStr = str.toLatin1(); // QString.length() means Unicode chars
332  QByteArray result;
333  char *pos, *beg, *end, *mid = Q_NULLPTR;
334  QByteArray cstr;
335  char encoding = 0, ch;
336  bool valid;
337  const int maxLen = 200;
338  int i;
339 
340 // result.truncate(aStr.length());
341  for (pos = aStr.data(); *pos; pos++) {
342  if (pos[0] != '=' || pos[1] != '?') {
343  result += *pos;
344  continue;
345  }
346  beg = pos + 2;
347  end = beg;
348  valid = true;
349  // parse charset name
350  for (i = 2, pos += 2;
351  i < maxLen &&
352  (*pos != '?' && (ispunct(*pos) || isalnum(*pos)));
353  i++) {
354  pos++;
355  }
356  if (*pos != '?' || i < 4 || i >= maxLen) {
357  valid = false;
358  } else {
359  charset = QLatin1String(QByteArray(beg, i - 1)); // -2 + 1 for the zero
360  int pt = charset.lastIndexOf(QLatin1Char('*'));
361  if (pt != -1) {
362  // save language for later usage
363  language = charset.right(charset.length() - pt - 1);
364 
365  // tie off language as defined in rfc2047
366  charset.truncate(pt);
367  }
368  // get encoding and check delimiting question marks
369  encoding = toupper(pos[1]);
370  if (pos[2] != '?' ||
371  (encoding != 'Q' && encoding != 'B' &&
372  encoding != 'q' && encoding != 'b')) {
373  valid = false;
374  }
375  pos += 3;
376  i += 3;
377 // qCDebug(KIMAP2_LOG) << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'";
378  }
379  if (valid) {
380  mid = pos;
381  // search for end of encoded part
382  while (i < maxLen && *pos && !(*pos == '?' && *(pos + 1) == '=')) {
383  i++;
384  pos++;
385  }
386  end = pos + 2;//end now points to the first char after the encoded string
387  if (i >= maxLen || !*pos) {
388  valid = false;
389  }
390  }
391  if (valid) {
392  ch = *pos;
393  *pos = '\0';
394  cstr = QByteArray(mid).left((int)(mid - pos - 1));
395  if (encoding == 'Q') {
396  // decode quoted printable text
397  for (i = cstr.length() - 1; i >= 0; --i) {
398  if (cstr[i] == '_') {
399  cstr[i] = ' ';
400  }
401  }
402 // qCDebug(KIMAP2_LOG) << "before QP '"
403 // << cstr << "'";
404  cstr = KCodecs::quotedPrintableDecode(cstr);
405 // qCDebug(KIMAP2_LOG) << "after QP '"
406 // << cstr << "'";
407  } else {
408  // decode base64 text
409  cstr = QByteArray::fromBase64(cstr);
410  }
411  *pos = ch;
412  int len = cstr.length();
413  for (i = 0; i < len; ++i) {
414  result += cstr[i];
415  }
416 
417  pos = end - 1;
418  } else {
419 // qCDebug(KIMAP2_LOG) << "invalid";
420  //result += "=?";
421  //pos = beg -1; // because pos gets increased shortly afterwards
422  pos = beg - 2;
423  result += *pos++;
424  result += *pos;
425  }
426  }
427  if (!charset.isEmpty()) {
428  QTextCodec *aCodec = codecForName(QLatin1String(charset.toLatin1()));
429  if (aCodec) {
430 // qCDebug(KIMAP2_LOG) << "Codec is" << aCodec->name();
431  return aCodec->toUnicode(result);
432  }
433  }
434  return QLatin1String(result);
435 }
436 
437 //-----------------------------------------------------------------------------
439 {
440  return QLatin1String(encodeRFC2047String(str.toLatin1()));
441 }
442 
443 //-----------------------------------------------------------------------------
445 {
446  if (str.isEmpty()) {
447  return str;
448  }
449 
450  const signed char *latin =
451  reinterpret_cast<const signed char *>
452  (str.data()), *l, *start, *stop;
453  char hexcode;
454  int numQuotes, i;
455  int rptr = 0;
456  // My stats show this number results in 12 resize() out of 73,000
457  int resultLen = 3 * str.length() / 2;
458  QByteArray result(resultLen, '\0');
459 
460  while (*latin) {
461  l = latin;
462  start = latin;
463  while (*l) {
464  if (*l == 32) {
465  start = l + 1;
466  }
467  if (*l < 0) {
468  break;
469  }
470  l++;
471  }
472  if (*l) {
473  numQuotes = 1;
474  while (*l) {
475  /* The encoded word must be limited to 75 character */
476  for (i = 0; i < 16; ++i) {
477  if (*l == especials[i]) {
478  numQuotes++;
479  }
480  }
481  if (*l < 0) {
482  numQuotes++;
483  }
484  /* Stop after 58 = 75 - 17 characters or at "<[email protected]" */
485  if (l - start + 2 * numQuotes >= 58 || *l == 60) {
486  break;
487  }
488  l++;
489  }
490  if (*l) {
491  stop = l - 1;
492  while (stop >= start && *stop != 32) {
493  stop--;
494  }
495  if (stop <= start) {
496  stop = l;
497  }
498  } else {
499  stop = l;
500  }
501  if (resultLen - rptr - 1 <= start - latin + 1 + 16) {
502  // =?iso-88...
503  resultLen += (start - latin + 1) * 2 + 20; // more space
504  result.resize(resultLen);
505  }
506  while (latin < start) {
507  result[rptr++] = *latin;
508  latin++;
509  }
510  result.replace(rptr, 15, "=?iso-8859-1?q?");
511  rptr += 15;
512  if (resultLen - rptr - 1 <= 3 * (stop - latin + 1)) {
513  resultLen += (stop - latin + 1) * 4 + 20; // more space
514  result.resize(resultLen);
515  }
516  while (latin < stop) {
517  // can add up to 3 chars/iteration
518  numQuotes = 0;
519  for (i = 0; i < 16; ++i) {
520  if (*latin == especials[i]) {
521  numQuotes = 1;
522  }
523  }
524  if (*latin < 0) {
525  numQuotes = 1;
526  }
527  if (numQuotes) {
528  result[rptr++] = '=';
529  hexcode = ((*latin & 0xF0) >> 4) + 48;
530  if (hexcode >= 58) {
531  hexcode += 7;
532  }
533  result[rptr++] = hexcode;
534  hexcode = (*latin & 0x0F) + 48;
535  if (hexcode >= 58) {
536  hexcode += 7;
537  }
538  result[rptr++] = hexcode;
539  } else {
540  result[rptr++] = *latin;
541  }
542  latin++;
543  }
544  result[rptr++] = '?';
545  result[rptr++] = '=';
546  } else {
547  while (*latin) {
548  if (rptr == resultLen - 1) {
549  resultLen += 30;
550  result.resize(resultLen);
551  }
552  result[rptr++] = *latin;
553  latin++;
554  }
555  }
556  }
557  result[rptr] = 0;
558  return result;
559 }
560 
561 //-----------------------------------------------------------------------------
563 {
564  if (str.isEmpty()) {
565  return str;
566  }
567 
568  signed char *latin = (signed char *)calloc(1, str.length() + 1);
569  char *latin_us = (char *)latin;
570  strcpy(latin_us, str.toLatin1());
571  signed char *l = latin;
572  char hexcode;
573  int i;
574  bool quote;
575  while (*l) {
576  if (*l < 0) {
577  break;
578  }
579  l++;
580  }
581  if (!*l) {
582  free(latin);
583  return str;
584  }
585  QByteArray result;
586  l = latin;
587  while (*l) {
588  quote = *l < 0;
589  for (i = 0; i < 16; ++i) {
590  if (*l == especials[i]) {
591  quote = true;
592  }
593  }
594  if (quote) {
595  result += '%';
596  hexcode = ((*l & 0xF0) >> 4) + 48;
597  if (hexcode >= 58) {
598  hexcode += 7;
599  }
600  result += hexcode;
601  hexcode = (*l & 0x0F) + 48;
602  if (hexcode >= 58) {
603  hexcode += 7;
604  }
605  result += hexcode;
606  } else {
607  result += *l;
608  }
609  l++;
610  }
611  free(latin);
612  return QLatin1String(result);
613 }
614 
615 //-----------------------------------------------------------------------------
617 {
618  int p = str.indexOf(QLatin1Char('\''));
619 
620  //see if it is an rfc string
621  if (p < 0) {
622  return str;
623  }
624 
625  int l = str.lastIndexOf(QLatin1Char('\''));
626 
627  //second is language
628  if (p >= l) {
629  return str;
630  }
631 
632  //first is charset or empty
633  //QString charset = str.left ( p );
634  QString st = str.mid(l + 1);
635  //QString language = str.mid ( p + 1, l - p - 1 );
636 
637  //qCDebug(KIMAP2_LOG) << "Charset:" << charset << "Language:" << language;
638 
639  char ch, ch2;
640  p = 0;
641  while (p < (int) st.length()) {
642  if (st.at(p) == 37) {
643  ch = st.at(p + 1).toLatin1() - 48;
644  if (ch > 16) {
645  ch -= 7;
646  }
647  ch2 = st.at(p + 2).toLatin1() - 48;
648  if (ch2 > 16) {
649  ch2 -= 7;
650  }
651  st.replace(p, 1, ch * 16 + ch2);
652  st.remove(p + 1, 2);
653  }
654  p++;
655  }
656  return st;
657 }
KIMAP2_EXPORT QTextCodec * codecForName(const QString &name)
Fetches a Codec by name.
Definition: rfccodecs.cpp:294
void truncate(int position)
KIMAP2_EXPORT QString encodeImapFolderName(const QString &src)
Converts an Unicode IMAP mailbox to a QString which can be used in IMAP communication.
Definition: rfccodecs.cpp:192
const KIMAP2_EXPORT QString decodeRFC2231String(const QString &str)
Decodes a RFC2231 string str.
Definition: rfccodecs.cpp:616
QString fromUtf8(const char *str, int size)
void stop(Ekos::AlignState mode)
void squeeze()
KIMAP_EXPORT QByteArray quoteIMAP(const QByteArray &src)
Replaces " with \" and \ with \ " and \ characters.
Definition: rfccodecs.cpp:161
Q_SCRIPTABLE Q_NOREPLY void start()
QByteArray toLatin1() const const
void reserve(int size)
const KIMAP2_EXPORT QByteArray encodeRFC2047String(const QByteArray &str)
Encodes a RFC2047 string str.
Definition: rfccodecs.cpp:444
int lastIndexOf(QChar ch, int from, Qt::CaseSensitivity cs) const const
KIMAP2_EXPORT QString decodeImapFolderName(const QString &inSrc)
Converts an UTF-7 encoded IMAP mailbox to a Unicode QString.
Definition: rfccodecs.cpp:154
bool isEmpty() const const
const KIMAP2_EXPORT QString decodeRFC2047String(const QString &str)
Decodes a RFC2047 string str.
Definition: rfccodecs.cpp:304
QByteArray toUtf8() const const
int length() const const
QTextCodec * codecForName(const QByteArray &name)
QByteArray fromBase64(const QByteArray &base64, QByteArray::Base64Options options)
int indexOf(QChar ch, int from, Qt::CaseSensitivity cs) const const
QByteArray & replace(int pos, int len, const char *after)
QString & replace(int position, int n, QChar after)
QString & remove(int position, int n)
QString toLower() const const
QByteArray left(int len) const const
bool isEmpty() const const
void resize(int size)
const char * constData() const const
QString right(int n) const const
void reserve(int size)
const QChar at(int position) const const
KIMAP_EXPORT QByteArray decodeImapFolderName(const QByteArray &inSrc)
Converts an UTF-7 encoded IMAP mailbox to a QByteArray.
Definition: rfccodecs.cpp:70
int length() const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
KIMAP2_EXPORT QByteArray quoteIMAP(const QByteArray &src)
Replaces " with \" and \ with \ " and \ characters.
Definition: rfccodecs.cpp:161
QString toUnicode(const QByteArray &a) const const
KCODECS_EXPORT QByteArray quotedPrintableDecode(const QByteArray &in)
char toLatin1() const const
Provides handlers for various RFC/MIME encodings.
QString mid(int position, int n) const const
const KIMAP2_EXPORT QString encodeRFC2231String(const QString &str)
Encodes a RFC2231 string str.
Definition: rfccodecs.cpp:562
char * data()
KIMAP_EXPORT QByteArray encodeImapFolderName(const QByteArray &src)
Converts an Unicode IMAP mailbox to a QByteArray which can be used in IMAP communication.
Definition: rfccodecs.cpp:197
This file is part of the KDE documentation.
Documentation copyright © 1996-2022 The KDE developers.
Generated on Sun Aug 14 2022 04:16:09 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.