00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "rfcdecoder.h"
00024
00025 #include <ctype.h>
00026 #include <sys/types.h>
00027
00028 #include <stdio.h>
00029 #include <stdlib.h>
00030
00031 #include <qtextcodec.h>
00032 #include <qbuffer.h>
00033 #include <qregexp.h>
00034 #include <kmdcodec.h>
00035
00036
00037
00038
00039 static unsigned char base64chars[] =
00040 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
00041 #define UNDEFINED 64
00042 #define MAXLINE 76
00043
00044
00045 #define UTF16MASK 0x03FFUL
00046 #define UTF16SHIFT 10
00047 #define UTF16BASE 0x10000UL
00048 #define UTF16HIGHSTART 0xD800UL
00049 #define UTF16HIGHEND 0xDBFFUL
00050 #define UTF16LOSTART 0xDC00UL
00051 #define UTF16LOEND 0xDFFFUL
00052
00053
00054
00055 QString rfcDecoder::fromIMAP (const QString & inSrc)
00056 {
00057 unsigned char c, i, bitcount;
00058 unsigned long ucs4, utf16, bitbuf;
00059 unsigned char base64[256], utf8[6];
00060 unsigned long srcPtr = 0;
00061 QCString dst;
00062 QCString src = inSrc.ascii ();
00063 uint srcLen = inSrc.length();
00064
00065
00066 memset (base64, UNDEFINED, sizeof (base64));
00067 for (i = 0; i < sizeof (base64chars); ++i)
00068 {
00069 base64[(int)base64chars[i]] = i;
00070 }
00071
00072
00073 while (srcPtr < srcLen)
00074 {
00075 c = src[srcPtr++];
00076
00077 if (c != '&' || src[srcPtr] == '-')
00078 {
00079
00080 dst += c;
00081
00082 if (c == '&')
00083 srcPtr++;
00084 }
00085 else
00086 {
00087
00088 bitbuf = 0;
00089 bitcount = 0;
00090 ucs4 = 0;
00091 while ((c = base64[(unsigned char) src[srcPtr]]) != UNDEFINED)
00092 {
00093 ++srcPtr;
00094 bitbuf = (bitbuf << 6) | c;
00095 bitcount += 6;
00096
00097 if (bitcount >= 16)
00098 {
00099 bitcount -= 16;
00100 utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff;
00101
00102 if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND)
00103 {
00104 ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT;
00105 continue;
00106 }
00107 else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND)
00108 {
00109 ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
00110 }
00111 else
00112 {
00113 ucs4 = utf16;
00114 }
00115
00116 if (ucs4 <= 0x7fUL)
00117 {
00118 utf8[0] = ucs4;
00119 i = 1;
00120 }
00121 else if (ucs4 <= 0x7ffUL)
00122 {
00123 utf8[0] = 0xc0 | (ucs4 >> 6);
00124 utf8[1] = 0x80 | (ucs4 & 0x3f);
00125 i = 2;
00126 }
00127 else if (ucs4 <= 0xffffUL)
00128 {
00129 utf8[0] = 0xe0 | (ucs4 >> 12);
00130 utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f);
00131 utf8[2] = 0x80 | (ucs4 & 0x3f);
00132 i = 3;
00133 }
00134 else
00135 {
00136 utf8[0] = 0xf0 | (ucs4 >> 18);
00137 utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f);
00138 utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f);
00139 utf8[3] = 0x80 | (ucs4 & 0x3f);
00140 i = 4;
00141 }
00142
00143 for (c = 0; c < i; ++c)
00144 {
00145 dst += utf8[c];
00146 }
00147 }
00148 }
00149
00150 if (src[srcPtr] == '-')
00151 ++srcPtr;
00152 }
00153 }
00154 return QString::fromUtf8 (dst.data ());
00155 }
00156
00157
00158 QString rfcDecoder::quoteIMAP(const QString &src)
00159 {
00160 uint len = src.length();
00161 QString result;
00162 result.reserve(2 * len);
00163 for (unsigned int i = 0; i < len; i++)
00164 {
00165 if (src[i] == '"' || src[i] == '\\')
00166 result += '\\';
00167 result += src[i];
00168 }
00169
00170 return result;
00171 }
00172
00173
00174
00175 QString rfcDecoder::toIMAP (const QString & inSrc)
00176 {
00177 unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
00178 unsigned long ucs4, bitbuf;
00179 QCString src = inSrc.utf8 ();
00180 QString dst;
00181
00182 ulong srcPtr = 0;
00183 utf7mode = 0;
00184 utf8total = 0;
00185 bitstogo = 0;
00186 utf8pos = 0;
00187 bitbuf = 0;
00188 ucs4 = 0;
00189 while (srcPtr < src.length ())
00190 {
00191 c = (unsigned char) src[srcPtr++];
00192
00193 if (c >= ' ' && c <= '~')
00194 {
00195
00196 if (utf7mode)
00197 {
00198 if (bitstogo)
00199 {
00200 dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
00201 bitstogo = 0;
00202 }
00203 dst += '-';
00204 utf7mode = 0;
00205 }
00206 dst += c;
00207
00208 if (c == '&')
00209 {
00210 dst += '-';
00211 }
00212 continue;
00213 }
00214
00215 if (!utf7mode)
00216 {
00217 dst += '&';
00218 utf7mode = 1;
00219 }
00220
00221 if (c < 0x80)
00222 {
00223 ucs4 = c;
00224 utf8total = 1;
00225 }
00226 else if (utf8total)
00227 {
00228
00229 ucs4 = (ucs4 << 6) | (c & 0x3FUL);
00230 if (++utf8pos < utf8total)
00231 {
00232 continue;
00233 }
00234 }
00235 else
00236 {
00237 utf8pos = 1;
00238 if (c < 0xE0)
00239 {
00240 utf8total = 2;
00241 ucs4 = c & 0x1F;
00242 }
00243 else if (c < 0xF0)
00244 {
00245 utf8total = 3;
00246 ucs4 = c & 0x0F;
00247 }
00248 else
00249 {
00250
00251 utf8total = 4;
00252 ucs4 = c & 0x03;
00253 }
00254 continue;
00255 }
00256
00257 utf8total = 0;
00258 do
00259 {
00260 if (ucs4 >= UTF16BASE)
00261 {
00262 ucs4 -= UTF16BASE;
00263 bitbuf = (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART);
00264 ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
00265 utf16flag = 1;
00266 }
00267 else
00268 {
00269 bitbuf = (bitbuf << 16) | ucs4;
00270 utf16flag = 0;
00271 }
00272 bitstogo += 16;
00273
00274 while (bitstogo >= 6)
00275 {
00276 bitstogo -= 6;
00277 dst += base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F];
00278 }
00279 }
00280 while (utf16flag);
00281 }
00282
00283 if (utf7mode)
00284 {
00285 if (bitstogo)
00286 {
00287 dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
00288 }
00289 dst += '-';
00290 }
00291 return quoteIMAP(dst);
00292 }
00293
00294
00295 QString rfcDecoder::decodeQuoting(const QString &aStr)
00296 {
00297 QString result;
00298 unsigned int strLength(aStr.length());
00299 for (unsigned int i = 0; i < strLength ; i++)
00300 {
00301 if (aStr[i] == "\\") i++;
00302 result += aStr[i];
00303 }
00304 return result;
00305 }
00306
00307
00308 QTextCodec *
00309 rfcDecoder::codecForName (const QString & _str)
00310 {
00311 if (_str.isEmpty ())
00312 return NULL;
00313 return QTextCodec::codecForName (_str.lower ().
00314 replace ("windows", "cp").latin1 ());
00315 }
00316
00317
00318 const QString
00319 rfcDecoder::decodeRFC2047String (const QString & _str)
00320 {
00321 QString throw_away;
00322
00323 return decodeRFC2047String (_str, throw_away);
00324 }
00325
00326
00327 const QString
00328 rfcDecoder::decodeRFC2047String (const QString & _str, QString & charset)
00329 {
00330 QString throw_away;
00331
00332 return decodeRFC2047String (_str, charset, throw_away);
00333 }
00334
00335
00336 const QString
00337 rfcDecoder::decodeRFC2047String (const QString & _str, QString & charset,
00338 QString & language)
00339 {
00340
00341 if (_str.find("=?") < 0)
00342 return _str;
00343
00344 QCString aStr = _str.ascii ();
00345 QCString result;
00346 char *pos, *beg, *end, *mid = NULL;
00347 QCString str;
00348 char encoding = 0, ch;
00349 bool valid;
00350 const int maxLen = 200;
00351 int i;
00352
00353
00354 for (pos = aStr.data (); *pos; pos++)
00355 {
00356 if (pos[0] != '=' || pos[1] != '?')
00357 {
00358 result += *pos;
00359 continue;
00360 }
00361 beg = pos + 2;
00362 end = beg;
00363 valid = TRUE;
00364
00365 for (i = 2, pos += 2;
00366 i < maxLen && (*pos != '?' && (ispunct (*pos) || isalnum (*pos)));
00367 i++)
00368 pos++;
00369 if (*pos != '?' || i < 4 || i >= maxLen)
00370 valid = FALSE;
00371 else
00372 {
00373 charset = QCString (beg, i - 1);
00374 int pt = charset.findRev('*');
00375 if (pt != -1)
00376 {
00377
00378 language = charset.right (charset.length () - pt - 1);
00379
00380
00381 charset.truncate(pt);
00382 }
00383
00384 encoding = toupper (pos[1]);
00385 if (pos[2] != '?'
00386 || (encoding != 'Q' && encoding != 'B' && encoding != 'q'
00387 && encoding != 'b'))
00388 valid = FALSE;
00389 pos += 3;
00390 i += 3;
00391
00392 }
00393 if (valid)
00394 {
00395 mid = pos;
00396
00397 while (i < maxLen && *pos && !(*pos == '?' && *(pos + 1) == '='))
00398 {
00399 i++;
00400 pos++;
00401 }
00402 end = pos + 2;
00403 if (i >= maxLen || !*pos)
00404 valid = FALSE;
00405 }
00406 if (valid)
00407 {
00408 ch = *pos;
00409 *pos = '\0';
00410 str = QCString (mid).left ((int) (mid - pos - 1));
00411 if (encoding == 'Q')
00412 {
00413
00414 for (i = str.length () - 1; i >= 0; i--)
00415 if (str[i] == '_')
00416 str[i] = ' ';
00417
00418
00419 str = KCodecs::quotedPrintableDecode(str);
00420
00421 }
00422 else
00423 {
00424
00425 str = KCodecs::base64Decode(str);
00426 }
00427 *pos = ch;
00428 int len = str.length();
00429 for (i = 0; i < len; i++)
00430 result += (char) (QChar) str[i];
00431
00432 pos = end - 1;
00433 }
00434 else
00435 {
00436
00437
00438
00439 pos = beg - 2;
00440 result += *pos++;
00441 result += *pos;
00442 }
00443 }
00444 if (!charset.isEmpty ())
00445 {
00446 QTextCodec *aCodec = codecForName (charset.ascii ());
00447 if (aCodec)
00448 {
00449
00450 return aCodec->toUnicode (result);
00451 }
00452 }
00453 return result;
00454 }
00455
00456
00457
00458 const char especials[17] = "()<>@,;:\"/[]?.= ";
00459
00460 const QString
00461 rfcDecoder::encodeRFC2047String (const QString & _str)
00462 {
00463 if (_str.isEmpty ())
00464 return _str;
00465 const signed char *latin = reinterpret_cast<const signed char *>(_str.latin1()), *l, *start, *stop;
00466 char hexcode;
00467 int numQuotes, i;
00468 int rptr = 0;
00469
00470 int resultLen = 3 * _str.length() / 2;
00471 QCString result(resultLen);
00472
00473 while (*latin)
00474 {
00475 l = latin;
00476 start = latin;
00477 while (*l)
00478 {
00479 if (*l == 32)
00480 start = l + 1;
00481 if (*l < 0)
00482 break;
00483 l++;
00484 }
00485 if (*l)
00486 {
00487 numQuotes = 1;
00488 while (*l)
00489 {
00490
00491 for (i = 0; i < 16; i++)
00492 if (*l == especials[i])
00493 numQuotes++;
00494 if (*l < 0)
00495 numQuotes++;
00496
00497 if (l - start + 2 * numQuotes >= 58 || *l == 60)
00498 break;
00499 l++;
00500 }
00501 if (*l)
00502 {
00503 stop = l - 1;
00504 while (stop >= start && *stop != 32)
00505 stop--;
00506 if (stop <= start)
00507 stop = l;
00508 }
00509 else
00510 stop = l;
00511 if (resultLen - rptr - 1 <= start - latin + 1 + 16 ) {
00512 resultLen += (start - latin + 1) * 2 + 20;
00513 result.resize(resultLen);
00514 }
00515 while (latin < start)
00516 {
00517 result[rptr++] = *latin;
00518 latin++;
00519 }
00520 strcpy(&result[rptr], "=?iso-8859-1?q?"); rptr += 15;
00521 if (resultLen - rptr - 1 <= 3*(stop - latin + 1)) {
00522 resultLen += (stop - latin + 1) * 4 + 20;
00523 result.resize(resultLen);
00524 }
00525 while (latin < stop)
00526 {
00527 numQuotes = 0;
00528 for (i = 0; i < 16; i++)
00529 if (*latin == especials[i])
00530 numQuotes = 1;
00531 if (*latin < 0)
00532 numQuotes = 1;
00533 if (numQuotes)
00534 {
00535 result[rptr++] = '=';
00536 hexcode = ((*latin & 0xF0) >> 4) + 48;
00537 if (hexcode >= 58)
00538 hexcode += 7;
00539 result[rptr++] = hexcode;
00540 hexcode = (*latin & 0x0F) + 48;
00541 if (hexcode >= 58)
00542 hexcode += 7;
00543 result[rptr++] = hexcode;
00544 }
00545 else
00546 {
00547 result[rptr++] = *latin;
00548 }
00549 latin++;
00550 }
00551 result[rptr++] = '?';
00552 result[rptr++] = '=';
00553 }
00554 else
00555 {
00556 while (*latin)
00557 {
00558 if (rptr == resultLen - 1) {
00559 resultLen += 30;
00560 result.resize(resultLen);
00561 }
00562 result[rptr++] = *latin;
00563 latin++;
00564 }
00565 }
00566 }
00567 result[rptr] = 0;
00568
00569 return result;
00570 }
00571
00572
00573
00574 const QString
00575 rfcDecoder::encodeRFC2231String (const QString & _str)
00576 {
00577 if (_str.isEmpty ())
00578 return _str;
00579 signed char *latin = (signed char *) calloc (1, _str.length () + 1);
00580 char *latin_us = (char *) latin;
00581 strcpy (latin_us, _str.latin1 ());
00582 signed char *l = latin;
00583 char hexcode;
00584 int i;
00585 bool quote;
00586 while (*l)
00587 {
00588 if (*l < 0)
00589 break;
00590 l++;
00591 }
00592 if (!*l) {
00593 free(latin);
00594 return _str.ascii ();
00595 }
00596 QCString result;
00597 l = latin;
00598 while (*l)
00599 {
00600 quote = *l < 0;
00601 for (i = 0; i < 16; i++)
00602 if (*l == especials[i])
00603 quote = true;
00604 if (quote)
00605 {
00606 result += "%";
00607 hexcode = ((*l & 0xF0) >> 4) + 48;
00608 if (hexcode >= 58)
00609 hexcode += 7;
00610 result += hexcode;
00611 hexcode = (*l & 0x0F) + 48;
00612 if (hexcode >= 58)
00613 hexcode += 7;
00614 result += hexcode;
00615 }
00616 else
00617 {
00618 result += *l;
00619 }
00620 l++;
00621 }
00622 free (latin);
00623 return result;
00624 }
00625
00626
00627
00628 const QString
00629 rfcDecoder::decodeRFC2231String (const QString & _str)
00630 {
00631 int p = _str.find ('\'');
00632
00633
00634 if (p < 0)
00635 return _str;
00636
00637 int l = _str.findRev ('\'');
00638
00639
00640 if (p >= l)
00641 return _str;
00642
00643
00644 QString charset = _str.left (p);
00645 QString st = _str.mid (l + 1);
00646 QString language = _str.mid (p + 1, l - p - 1);
00647
00648
00649
00650 char ch, ch2;
00651 p = 0;
00652 while (p < (int) st.length ())
00653 {
00654 if (st.at (p) == 37)
00655 {
00656 ch = st.at (p + 1).latin1 () - 48;
00657 if (ch > 16)
00658 ch -= 7;
00659 ch2 = st.at (p + 2).latin1 () - 48;
00660 if (ch2 > 16)
00661 ch2 -= 7;
00662 st.at (p) = ch * 16 + ch2;
00663 st.remove (p + 1, 2);
00664 }
00665 p++;
00666 }
00667 return st;
00668 }