23 #include "kmime_header_parsing.h"
28 #include "kmime_util.h"
29 #include "kmime_util_p.h"
31 #include "kmime_warning.h"
34 #include <kcharsets.h>
36 #include <QtCore/QTextCodec>
37 #include <QtCore/QMap>
38 #include <QtCore/QStringList>
39 #include <QtCore/QUrl>
44 using namespace KMime;
45 using namespace KMime::Types;
53 static inline QString QUrl_fromAce_wrapper(
const QString & domain )
55 if ( domain.contains( QLatin1String(
"xn--" ) ) ) {
56 return QUrl::fromAce( domain.toLatin1() );
62 static QString addr_spec_as_string(
const AddrSpec & as,
bool pretty )
68 static QChar dotChar = QLatin1Char(
'.' );
69 static QChar backslashChar = QLatin1Char(
'\\' );
70 static QChar quoteChar = QLatin1Char(
'"' );
72 bool needsQuotes =
false;
74 result.reserve( as.localPart.length() + as.domain.length() + 1 );
75 for (
int i = 0 ; i < as.localPart.length() ; ++i ) {
76 const QChar ch = as.localPart.at( i );
77 if ( ch == dotChar || isAText( ch.toLatin1() ) ) {
81 if ( ch == backslashChar || ch == quoteChar ) {
82 result += backslashChar;
87 const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ;
89 result = quoteChar + result + quoteChar;
91 if ( dom.isEmpty() ) {
94 result += QLatin1Char(
'@' );
100 QString AddrSpec::asString()
const
102 return addr_spec_as_string( *
this,
false );
105 QString AddrSpec::asPrettyString()
const
107 return addr_spec_as_string( *
this,
true );
110 bool AddrSpec::isEmpty()
const
112 return localPart.isEmpty() && domain.isEmpty();
118 const QString asString = addr_spec_as_string( mAddrSpec,
false );
119 if ( !asString.isEmpty() ) {
120 result = asString.toLatin1();
126 AddrSpec Mailbox::addrSpec()
const
143 const char *cursor = addr.constData();
144 if ( !HeaderParsing::parseAngleAddr( cursor,
145 cursor + addr.length(), mAddrSpec ) ) {
146 if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(),
148 kWarning() <<
"Invalid address";
156 mDisplayName = removeBidiControlChars( name );
160 const QByteArray &defaultCharset )
163 setName( decodeRFC2047String( name, cs, defaultCharset,
false ) );
168 return !mAddrSpec.isEmpty();
173 return !mDisplayName.isEmpty();
178 return prettyAddress( QuoteNever );
184 return QLatin1String( address() );
187 if ( quoting != QuoteNever ) {
188 addQuotes( s, quoting == QuoteAlways );
191 if ( hasAddress() ) {
192 s += QLatin1String(
" <" ) + QLatin1String( address() ) + QLatin1Char(
'>' );
199 from7BitString( encodeRFC2047Sentence( s,
"utf-8" ) );
204 const char *cursor = s.constData();
205 HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this );
214 if ( isUsAscii( name() ) ) {
215 QByteArray tmp = name().toLatin1();
216 addQuotes( tmp,
false );
219 rv += encodeRFC2047String( name(), encCharset,
true );
221 if ( hasAddress() ) {
222 rv +=
" <" + address() +
'>';
229 namespace HeaderParsing {
232 bool parseEncodedWord(
const char* &scursor,
const char *
const send,
233 QString &result, QByteArray &language,
234 QByteArray &usedCS,
const QByteArray &defaultCS,
238 assert( *( scursor - 1 ) ==
'=' );
245 char ch = *scursor++;
255 const char * charsetStart = scursor;
256 const char * languageStart = 0;
260 for ( ; scursor != send ; scursor++ ) {
261 if ( *scursor ==
'?' ) {
263 }
else if ( *scursor ==
'*' && languageStart == 0 ) {
264 languageStart = scursor + 1;
269 if ( scursor == send || *scursor !=
'?' ) {
271 KMIME_WARN_PREMATURE_END_OF( EncodedWord );
277 QByteArray maybeLanguage( languageStart, scursor - languageStart );
280 QByteArray maybeCharset( charsetStart,
281 ( languageStart ? languageStart - 1 : scursor ) - charsetStart );
290 const char * encodingStart = scursor;
293 for ( ; scursor != send ; scursor++ ) {
294 if ( *scursor ==
'?' ) {
300 if ( scursor == send || *scursor !=
'?' ) {
302 KMIME_WARN_PREMATURE_END_OF( EncodedWord );
307 QByteArray maybeEncoding( encodingStart, scursor - encodingStart );
320 const char * encodedTextStart = scursor;
323 for ( ; scursor != send ; scursor++ ) {
324 if ( *scursor ==
'?' ) {
325 if ( scursor + 1 != send ) {
326 if ( *( scursor + 1 ) !=
'=' ) {
327 KMIME_WARN <<
"Stray '?' in q-encoded word, ignoring this.";
336 KMIME_WARN_PREMATURE_END_OF( EncodedWord );
342 if ( *( scursor - 2 ) !=
'?' || *( scursor - 1 ) !=
'=' ||
343 scursor < encodedTextStart + 2 ) {
344 KMIME_WARN_PREMATURE_END_OF( EncodedWord );
349 const char *
const encodedTextEnd = scursor - 2;
359 KMIME_WARN_UNKNOWN( Encoding, maybeEncoding );
368 bool matchOK =
false;
369 QTextCodec *textCodec = 0;
370 if ( forceCS || maybeCharset.isEmpty() ) {
371 textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK );
372 usedCS = cachedCharset( defaultCS );
374 textCodec = KGlobal::charsets()->codecForName( QLatin1String( maybeCharset ), matchOK );
376 textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK );
377 usedCS = cachedCharset( defaultCS );
379 usedCS = cachedCharset( maybeCharset );
383 if ( !matchOK || !textCodec ) {
384 KMIME_WARN_UNKNOWN( Charset, maybeCharset );
392 int encodedTextLength = encodedTextEnd - encodedTextStart;
395 char *bbegin = buffer.data();
396 char *bend = bbegin + buffer.length();
403 if ( !dec->
decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) {
404 KMIME_WARN << codec->
name() <<
"codec lies about its maxDecodedSizeFor("
405 << encodedTextLength <<
")\nresult may be truncated";
408 result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() );
413 language = maybeLanguage;
418 static inline void eatWhiteSpace(
const char* &scursor,
const char *
const send )
420 while ( scursor != send &&
421 ( *scursor ==
' ' || *scursor ==
'\n' ||
422 *scursor ==
'\t' || *scursor ==
'\r' ) )
426 bool parseAtom(
const char * &scursor,
const char *
const send,
427 QString &result,
bool allow8Bit )
429 QPair<const char*, int> maybeResult;
431 if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
432 result += QString::fromLatin1( maybeResult.first, maybeResult.second );
439 bool parseAtom(
const char * &scursor,
const char *
const send,
440 QPair<const char*,int> &result,
bool allow8Bit )
442 bool success =
false;
443 const char *start = scursor;
445 while ( scursor != send ) {
446 signed char ch = *scursor++;
447 if ( ch > 0 && isAText( ch ) ) {
450 }
else if ( allow8Bit && ch < 0 ) {
452 KMIME_WARN_8BIT( ch );
462 result.first = start;
463 result.second = scursor - start;
469 bool parseToken(
const char * &scursor,
const char *
const send,
470 QString &result,
bool allow8Bit )
472 QPair<const char*, int> maybeResult;
474 if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
475 result += QString::fromLatin1( maybeResult.first, maybeResult.second );
482 bool parseToken(
const char * &scursor,
const char *
const send,
483 QPair<const char*,int> &result,
bool allow8Bit )
485 bool success =
false;
486 const char * start = scursor;
488 while ( scursor != send ) {
489 signed char ch = *scursor++;
490 if ( ch > 0 && isTText( ch ) ) {
493 }
else if ( allow8Bit && ch < 0 ) {
495 KMIME_WARN_8BIT( ch );
505 result.first = start;
506 result.second = scursor - start;
510 #define READ_ch_OR_FAIL if ( scursor == send ) { \
511 KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
523 bool parseGenericQuotedString(
const char* &scursor,
const char *
const send,
524 QString &result,
bool isCRLF,
525 const char openChar,
const char closeChar )
534 assert( *( scursor - 1 ) == openChar || *( scursor - 1 ) == closeChar );
536 while ( scursor != send ) {
539 if ( ch == closeChar || ch == openChar ) {
549 KMIME_WARN_IF_8BIT( ch );
550 result += QLatin1Char( ch );
563 KMIME_WARN_LONE( CR );
564 result += QLatin1Char(
'\r' );
570 if ( ch ==
' ' || ch ==
'\t' ) {
574 result += QLatin1Char( ch );
579 KMIME_WARN_NON_FOLDING( CRLF );
580 result += QLatin1String(
"\r\n" );
597 if ( !isCRLF && ( ch ==
' ' || ch ==
'\t' ) ) {
600 result += QLatin1Char( ch );
603 KMIME_WARN_LONE( LF );
604 result += QLatin1Char(
'\n' );
614 if ( scursor == send ) {
618 const char *oldscursor = scursor;
620 QByteArray lang, charset;
621 if ( *scursor++ ==
'?' ) {
623 if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
627 scursor = oldscursor;
630 scursor = oldscursor;
635 KMIME_WARN_IF_8BIT( ch );
636 result += QLatin1Char( ch );
647 bool parseComment(
const char* &scursor,
const char *
const send,
648 QString &result,
bool isCRLF,
bool reallySave )
650 int commentNestingDepth = 1;
651 const char *afterLastClosingParenPos = 0;
653 const char *oldscursor = scursor;
655 assert( *( scursor - 1 ) ==
'(' );
657 while ( commentNestingDepth ) {
659 if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF,
'(',
')' ) ) {
660 assert( *( scursor - 1 ) ==
')' || *( scursor - 1 ) ==
'(' );
663 switch ( *( scursor - 1 ) ) {
669 if ( commentNestingDepth > 1 ) {
671 result += QLatin1Char(
')' );
675 afterLastClosingParenPos = scursor;
676 --commentNestingDepth;
682 maybeCmnt += cmntPart;
683 maybeCmnt += QLatin1Char(
'(' );
685 ++commentNestingDepth;
687 default: assert( 0 );
691 if ( afterLastClosingParenPos ) {
692 scursor = afterLastClosingParenPos;
694 scursor = oldscursor;
705 bool parsePhrase(
const char* &scursor,
const char *
const send,
706 QString &result,
bool isCRLF )
709 None, Phrase, Atom, EncodedWord, QuotedString
713 QByteArray lang, charset;
714 const char *successfullyParsed = 0;
716 const char *oldscursor;
719 bool lastWasEncodedWord =
false;
721 while ( scursor != send ) {
722 char ch = *scursor++;
725 if ( found == None ) {
729 if ( scursor != send && ( *scursor ==
' ' || *scursor ==
'\t' ) ) {
730 result += QLatin1String(
". " );
732 result += QLatin1Char(
'.' );
734 successfullyParsed = scursor;
739 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF,
'"',
'"' ) ) {
740 successfullyParsed = scursor;
741 assert( *( scursor - 1 ) ==
'"' );
744 found = QuotedString;
751 result += QLatin1Char(
' ' );
756 lastWasEncodedWord =
false;
762 if ( found == None ) {
765 result += QLatin1Char(
' ' );
774 if ( parseComment( scursor, send, tmp, isCRLF,
776 successfullyParsed = scursor;
777 lastWasEncodedWord =
false;
779 if ( found == None ) {
782 scursor = successfullyParsed;
789 oldscursor = scursor;
792 if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
793 successfullyParsed = scursor;
802 if ( !lastWasEncodedWord ) {
803 result += QLatin1Char(
' ' );
807 default: assert( 0 );
809 lastWasEncodedWord =
true;
814 scursor = oldscursor;
821 if ( parseAtom( scursor, send, tmp,
true ) ) {
822 successfullyParsed = scursor;
832 result += QLatin1Char(
' ' );
837 lastWasEncodedWord =
false;
840 if ( found == None ) {
843 scursor = successfullyParsed;
848 eatWhiteSpace( scursor, send );
851 return found != None;
855 bool parseDotAtom(
const char* &scursor,
const char *
const send,
856 QString &result,
bool isCRLF )
858 eatCFWS( scursor, send, isCRLF );
861 const char *successfullyParsed;
864 if ( !parseAtom( scursor, send, tmp,
false ) ) {
868 successfullyParsed = scursor;
870 while ( scursor != send ) {
873 if ( scursor == send || *scursor !=
'.' ) {
878 if ( scursor == send || !isAText( *scursor ) ) {
882 scursor = successfullyParsed;
888 if ( !parseAtom( scursor, send, maybeAtom,
false ) ) {
889 scursor = successfullyParsed;
893 result += QLatin1Char(
'.' );
895 successfullyParsed = scursor;
898 scursor = successfullyParsed;
902 void eatCFWS(
const char* &scursor,
const char *
const send,
bool isCRLF )
906 while ( scursor != send ) {
907 const char *oldscursor = scursor;
909 char ch = *scursor++;
919 if ( parseComment( scursor, send, dummy, isCRLF,
false ) ) {
922 scursor = oldscursor;
926 scursor = oldscursor;
932 bool parseDomain(
const char* &scursor,
const char *
const send,
933 QString &result,
bool isCRLF )
935 eatCFWS( scursor, send, isCRLF );
936 if ( scursor == send ) {
946 if ( *scursor ==
'[' ) {
948 QString maybeDomainLiteral;
951 while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
952 isCRLF,
'[',
']' ) ) {
953 if ( scursor == send ) {
955 if ( *( scursor - 1 ) ==
']' ) {
957 result = maybeDomainLiteral;
966 if ( *( scursor - 1 ) ==
'[' ) {
967 maybeDomainLiteral += QLatin1Char(
'[' );
971 result = maybeDomainLiteral;
976 QString maybeDotAtom;
977 if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
978 result = maybeDotAtom;
980 if ( scursor != send && *scursor ==
'.' ) {
981 result += QLatin1Char(
'.' );
990 bool parseObsRoute(
const char* &scursor,
const char*
const send,
991 QStringList &result,
bool isCRLF,
bool save )
993 while ( scursor != send ) {
994 eatCFWS( scursor, send, isCRLF );
995 if ( scursor == send ) {
1000 if ( *scursor ==
',' ) {
1003 result.append( QString() );
1009 if ( *scursor ==
':' ) {
1012 result.append( QString() );
1018 if ( *scursor !=
'@' ) {
1024 QString maybeDomain;
1025 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
1029 result.append( maybeDomain );
1033 eatCFWS( scursor, send, isCRLF );
1034 if ( scursor == send ) {
1037 if ( *scursor ==
':' ) {
1041 if ( *scursor ==
',' ) {
1049 bool parseAddrSpec(
const char* &scursor,
const char *
const send,
1050 AddrSpec &result,
bool isCRLF )
1059 QString maybeLocalPart;
1062 while ( scursor != send ) {
1064 eatCFWS( scursor, send, isCRLF );
1066 char ch = *scursor++;
1069 maybeLocalPart += QLatin1Char(
'.' );
1078 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF,
'"',
'"' ) ) {
1079 maybeLocalPart += tmp;
1088 if ( parseAtom( scursor, send, tmp,
false ) ) {
1089 maybeLocalPart += tmp;
1106 assert( *( scursor - 1 ) ==
'@' );
1108 QString maybeDomain;
1109 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
1113 result.localPart = maybeLocalPart;
1114 result.domain = maybeDomain;
1119 bool parseAngleAddr(
const char* &scursor,
const char *
const send,
1120 AddrSpec &result,
bool isCRLF )
1123 eatCFWS( scursor, send, isCRLF );
1124 if ( scursor == send || *scursor !=
'<' ) {
1129 eatCFWS( scursor, send, isCRLF );
1130 if ( scursor == send ) {
1134 if ( *scursor ==
'@' || *scursor ==
',' ) {
1136 KMIME_WARN <<
"obsolete source route found! ignoring.";
1138 if ( !parseObsRoute( scursor, send, dummy,
1143 if ( scursor == send ) {
1149 AddrSpec maybeAddrSpec;
1150 if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
1154 eatCFWS( scursor, send, isCRLF );
1155 if ( scursor == send || *scursor !=
'>' ) {
1160 result = maybeAddrSpec;
1165 static QString stripQuotes(
const QString &input )
1167 const QLatin1Char quotes(
'"' );
1168 if ( input.startsWith( quotes ) && input.endsWith( quotes ) ) {
1169 QString stripped( input.mid( 1, input.size() - 2 ) );
1176 bool parseMailbox(
const char* &scursor,
const char *
const send,
1177 Mailbox &result,
bool isCRLF )
1179 eatCFWS( scursor, send, isCRLF );
1180 if ( scursor == send ) {
1184 AddrSpec maybeAddrSpec;
1185 QString maybeDisplayName;
1188 const char * oldscursor = scursor;
1189 if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
1192 eatWhiteSpace( scursor, send );
1193 if ( scursor != send && *scursor ==
'(' ) {
1195 if ( !parseComment( scursor, send, maybeDisplayName, isCRLF,
true ) ) {
1199 result.
setName( stripQuotes( maybeDisplayName ) );
1202 scursor = oldscursor;
1205 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
1207 maybeDisplayName.clear();
1208 scursor = oldscursor;
1211 eatCFWS( scursor, send, isCRLF );
1212 if ( scursor == send ) {
1218 if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) {
1222 if ( maybeDisplayName.isNull() ) {
1224 eatWhiteSpace( scursor, send );
1225 if ( scursor != send && *scursor ==
'(' ) {
1227 if ( !parseComment( scursor, send, maybeDisplayName, isCRLF,
true ) ) {
1233 result.
setName( stripQuotes( maybeDisplayName ) );
1238 bool parseGroup(
const char* &scursor,
const char *
const send,
1239 Address &result,
bool isCRLF )
1246 eatCFWS( scursor, send, isCRLF );
1247 if ( scursor == send ) {
1252 QString maybeDisplayName;
1253 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
1258 eatCFWS( scursor, send, isCRLF );
1259 if ( scursor == send || *scursor !=
':' ) {
1265 result.displayName = removeBidiControlChars( maybeDisplayName );
1269 while ( scursor != send ) {
1270 eatCFWS( scursor, send, isCRLF );
1271 if ( scursor == send ) {
1276 if ( *scursor ==
',' ) {
1282 if ( *scursor ==
';' ) {
1288 if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
1291 result.mailboxList.append( maybeMailbox );
1293 eatCFWS( scursor, send, isCRLF );
1295 if ( scursor == send ) {
1299 if ( *scursor ==
';' ) {
1304 if ( *scursor ==
',' ) {
1311 bool parseAddress(
const char* &scursor,
const char *
const send,
1312 Address &result,
bool isCRLF )
1316 eatCFWS( scursor, send, isCRLF );
1317 if ( scursor == send ) {
1323 const char * oldscursor = scursor;
1324 if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
1326 result.displayName.clear();
1327 result.mailboxList.append( maybeMailbox );
1330 scursor = oldscursor;
1332 Address maybeAddress;
1335 if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) {
1339 result = maybeAddress;
1343 bool parseAddressList(
const char* &scursor,
const char *
const send,
1344 AddressList &result,
bool isCRLF )
1346 while ( scursor != send ) {
1347 eatCFWS( scursor, send, isCRLF );
1349 if ( scursor == send ) {
1353 if ( *scursor ==
',' ) {
1358 if ( *scursor ==
';' ) {
1364 Address maybeAddress;
1365 if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) {
1368 result.append( maybeAddress );
1370 eatCFWS( scursor, send, isCRLF );
1372 if ( scursor == send ) {
1376 if ( *scursor ==
',' ) {
1383 static QString asterisk = QString::fromLatin1(
"*0*", 1 );
1384 static QString asteriskZero = QString::fromLatin1(
"*0*", 2 );
1390 bool parseParameter(
const char* &scursor,
const char *
const send,
1391 QPair<QString,QStringOrQPair> &result,
bool isCRLF )
1403 eatCFWS( scursor, send, isCRLF );
1404 if ( scursor == send ) {
1412 QString maybeAttribute;
1413 if ( !parseToken( scursor, send, maybeAttribute,
false ) ) {
1417 eatCFWS( scursor, send, isCRLF );
1419 if ( scursor == send || *scursor !=
'=' ) {
1424 eatCFWS( scursor, send, isCRLF );
1425 if ( scursor == send ) {
1427 if ( maybeAttribute.endsWith( asterisk ) ) {
1428 KMIME_WARN <<
"attribute ends with \"*\", but value is empty!"
1429 "Chopping away \"*\".";
1430 maybeAttribute.truncate( maybeAttribute.length() - 1 );
1432 result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
1436 const char * oldscursor = scursor;
1441 QStringOrQPair maybeValue;
1442 if ( *scursor ==
'"' ) {
1445 if ( maybeAttribute.endsWith( asterisk ) ) {
1449 KMIME_WARN <<
"attribute ends with \"*\", but value is a quoted-string!"
1450 "Chopping away \"*\".";
1451 maybeAttribute.truncate( maybeAttribute.length() - 1 );
1454 if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
1455 scursor = oldscursor;
1456 result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
1461 if ( !parseToken( scursor, send, maybeValue.qpair,
false ) ) {
1462 scursor = oldscursor;
1463 result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
1468 result = qMakePair( maybeAttribute.toLower(), maybeValue );
1474 bool parseRawParameterList(
const char* &scursor,
const char *
const send,
1475 QMap<QString,QStringOrQPair> &result,
1488 while ( scursor != send ) {
1489 eatCFWS( scursor, send, isCRLF );
1491 if ( scursor == send ) {
1495 if ( *scursor ==
';' ) {
1500 QPair<QString, QStringOrQPair> maybeParameter;
1501 if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
1509 if ( maybeParameter.first.isNull() ) {
1512 while ( scursor != send ) {
1513 if ( *scursor++ ==
';' ) {
1524 result.insert( maybeParameter.first, maybeParameter.second );
1526 eatCFWS( scursor, send, isCRLF );
1528 if ( scursor == send ) {
1532 if ( *scursor ==
';' ) {
1539 static void decodeRFC2231Value(
Codec* &rfc2231Codec,
1540 QTextCodec* &textcodec,
1541 bool isContinuation, QString &value,
1542 QPair<const char*,int> &source, QByteArray& charset )
1548 const char * decBegin = source.first;
1549 const char * decCursor = decBegin;
1550 const char * decEnd = decCursor + source.second;
1552 if ( !isContinuation ) {
1554 while ( decCursor != decEnd ) {
1555 if ( *decCursor ==
'\'' ) {
1562 if ( decCursor == decEnd ) {
1565 KMIME_WARN <<
"No charset in extended-initial-value."
1566 "Assuming \"iso-8859-1\".";
1567 value += QString::fromLatin1( decBegin, source.second );
1571 charset = QByteArray( decBegin, decCursor - decBegin );
1573 const char * oldDecCursor = ++decCursor;
1575 while ( decCursor != decEnd ) {
1576 if ( *decCursor ==
'\'' ) {
1582 if ( decCursor == decEnd ) {
1583 KMIME_WARN <<
"No language in extended-initial-value."
1584 "Trying to recover.";
1585 decCursor = oldDecCursor;
1597 bool matchOK =
false;
1598 textcodec = KGlobal::charsets()->codecForName( QLatin1String( charset ), matchOK );
1601 KMIME_WARN_UNKNOWN( Charset, charset );
1605 if ( !rfc2231Codec ) {
1607 assert( rfc2231Codec );
1611 value += QString::fromLatin1( decCursor, decEnd - decCursor );
1624 QByteArray::Iterator bit = buffer.begin();
1625 QByteArray::ConstIterator bend = buffer.end();
1627 if ( !dec->
decode( decCursor, decEnd, bit, bend ) ) {
1628 KMIME_WARN << rfc2231Codec->
name()
1629 <<
"codec lies about its maxDecodedSizeFor()" << endl
1630 <<
"result may be truncated";
1633 value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
1644 bool parseParameterListWithCharset(
const char* &scursor,
1645 const char *
const send,
1646 QMap<QString,QString> &result,
1647 QByteArray& charset,
bool isCRLF )
1650 QMap<QString, QStringOrQPair> rawParameterList;
1651 if ( !parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) {
1655 if ( rawParameterList.isEmpty() ) {
1664 Codec * rfc2231Codec = 0;
1665 QTextCodec * textcodec = 0;
1669 NoMode = 0x0, Continued = 0x1, Encoded = 0x2
1678 QMap<QString, QStringOrQPair>::Iterator it, end = rawParameterList.end();
1680 for ( it = rawParameterList.begin() ; it != end ; ++it ) {
1681 if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
1687 if ( !attribute.isNull() ) {
1688 result.insert( attribute, value );
1692 attribute = it.key();
1694 EncodingMode encodingMode = NoEncoding;
1697 if ( attribute.endsWith( asterisk ) ) {
1698 attribute.truncate( attribute.length() - 1 );
1700 encodingMode = RFC2231;
1703 if ( !( *it ).qstring.isNull() && ( *it ).qstring.contains( QLatin1String(
"=?" ) ) ) {
1705 encodingMode = RFC2047;
1708 if ( attribute.endsWith( asteriskZero ) ) {
1709 attribute.truncate( attribute.length() - 2 );
1715 if ( mode & Encoded ) {
1716 if ( encodingMode == RFC2231 ) {
1717 decodeRFC2231Value( rfc2231Codec, textcodec,
1719 value, ( *it ).qpair, charset );
1721 else if ( encodingMode == RFC2047 ) {
1722 value += decodeRFC2047String( ( *it ).qstring.toLatin1(), charset );
1726 if ( ( *it ).qpair.first ) {
1727 value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second );
1729 value += ( *it ).qstring;
1737 if ( !( mode & Continued ) ) {
1739 result.insert( attribute, value );
1749 if ( it.key().endsWith( asterisk ) ) {
1751 decodeRFC2231Value( rfc2231Codec, textcodec,
1753 value, ( *it ).qpair, charset );
1756 if ( ( *it ).qpair.first ) {
1757 value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second );
1759 value += ( *it ).qstring;
1766 if ( !attribute.isNull() ) {
1767 result.insert( attribute, value );
1774 bool parseParameterList(
const char* &scursor,
const char *
const send,
1775 QMap<QString,QString> &result,
bool isCRLF )
1778 return parseParameterListWithCharset( scursor, send, result, charset, isCRLF );
1781 static const char *
const stdDayNames[] = {
1782 "Sun",
"Mon",
"Tue",
"Wed",
"Thu",
"Fri",
"Sat"
1784 static const int stdDayNamesLen =
sizeof stdDayNames /
sizeof *stdDayNames;
1786 static bool parseDayName(
const char* &scursor,
const char *
const send )
1789 if ( send - scursor < 3 ) {
1793 for (
int i = 0 ; i < stdDayNamesLen ; ++i ) {
1794 if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
1804 static const char *
const stdMonthNames[] = {
1805 "Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
1806 "Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec"
1808 static const int stdMonthNamesLen =
1809 sizeof stdMonthNames /
sizeof *stdMonthNames;
1811 static bool parseMonthName(
const char* &scursor,
const char *
const send,
1815 if ( send - scursor < 3 ) {
1819 for ( result = 0 ; result < stdMonthNamesLen ; ++result ) {
1820 if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
1830 static const struct {
1831 const char * tzName;
1832 long int secsEastOfGMT;
1879 static const int timeZonesLen =
sizeof timeZones /
sizeof *timeZones;
1881 static bool parseAlphaNumericTimeZone(
const char* &scursor,
1882 const char *
const send,
1883 long int &secsEastOfGMT,
1884 bool &timeZoneKnown )
1887 if ( *scursor ==
'"' ) {
1890 if ( scursor == send ) {
1895 QPair<const char*, int> maybeTimeZone( 0, 0 );
1896 if ( !parseToken( scursor, send, maybeTimeZone,
false ) ) {
1899 for (
int i = 0 ; i < timeZonesLen ; ++i ) {
1900 if ( qstrnicmp( timeZones[i].tzName,
1901 maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
1902 scursor += maybeTimeZone.second;
1903 secsEastOfGMT = timeZones[i].secsEastOfGMT;
1904 timeZoneKnown =
true;
1906 if ( *scursor ==
'"' ) {
1915 KMIME_WARN_UNKNOWN( time zone,
1916 QByteArray( maybeTimeZone.first, maybeTimeZone.second ) );
1918 timeZoneKnown =
false;
1923 int parseDigits(
const char* &scursor,
const char *
const send,
int &result )
1927 for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
1929 result += int( *scursor -
'0' );
1934 static bool parseTimeOfDay(
const char* &scursor,
const char *
const send,
1935 int &hour,
int &min,
int &sec,
bool isCRLF=
false )
1942 if ( !parseDigits( scursor, send, hour ) ) {
1946 eatCFWS( scursor, send, isCRLF );
1947 if ( scursor == send || *scursor !=
':' ) {
1952 eatCFWS( scursor, send, isCRLF );
1953 if ( scursor == send ) {
1960 if ( !parseDigits( scursor, send, min ) ) {
1964 eatCFWS( scursor, send, isCRLF );
1965 if ( scursor == send ) {
1972 if ( *scursor ==
':' ) {
1975 eatCFWS( scursor, send, isCRLF );
1976 if ( scursor == send ) {
1980 if ( !parseDigits( scursor, send, sec ) ) {
1990 bool parseTime(
const char* &scursor,
const char * send,
1991 int &hour,
int &min,
int &sec,
long int &secsEastOfGMT,
1992 bool &timeZoneKnown,
bool isCRLF )
2004 eatCFWS( scursor, send, isCRLF );
2005 if ( scursor == send ) {
2009 if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) {
2013 eatCFWS( scursor, send, isCRLF );
2015 if ( ( scursor == send ) || isdigit( *scursor ) ) {
2016 timeZoneKnown =
false;
2021 timeZoneKnown =
true;
2022 if ( *scursor ==
'+' || *scursor ==
'-' ) {
2024 const char sign = *scursor++;
2027 if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) {
2030 secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
2031 if ( sign ==
'-' ) {
2032 secsEastOfGMT *= -1;
2033 if ( secsEastOfGMT == 0 ) {
2034 timeZoneKnown =
false;
2039 if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) {
2046 bool parseDateTime(
const char* &scursor,
const char *
const send,
2047 KDateTime &result,
bool isCRLF )
2059 result = KDateTime();
2060 QDateTime maybeDateTime;
2062 eatCFWS( scursor, send, isCRLF );
2063 if ( scursor == send ) {
2070 if ( parseDayName( scursor, send ) ) {
2071 eatCFWS( scursor, send, isCRLF );
2072 if ( scursor == send ) {
2076 if ( *scursor ==
',' ) {
2078 eatCFWS( scursor, send, isCRLF );
2082 int maybeMonth = -1;
2083 bool asctimeFormat =
false;
2086 if ( !isdigit( *scursor ) && parseMonthName( scursor, send, maybeMonth ) ) {
2087 asctimeFormat =
true;
2088 eatCFWS( scursor, send, isCRLF );
2095 if ( !parseDigits( scursor, send, maybeDay ) ) {
2099 eatCFWS( scursor, send, isCRLF );
2100 if ( scursor == send ) {
2105 if ( *scursor ==
',' ) {
2112 if ( !asctimeFormat && !parseMonthName( scursor, send, maybeMonth ) ) {
2115 if ( scursor == send ) {
2118 assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
2121 eatCFWS( scursor, send, isCRLF );
2122 if ( scursor == send ) {
2127 bool timeAfterYear =
true;
2128 if ( ( send - scursor > 3 ) && ( ( scursor[1] ==
':' ) || ( scursor[2] ==
':' ) ) ) {
2129 timeAfterYear =
false;
2137 if ( timeAfterYear && !parseDigits( scursor, send, maybeYear ) ) {
2141 eatCFWS( scursor, send, isCRLF );
2142 if ( scursor == send ) {
2149 int maybeHour, maybeMinute, maybeSecond;
2150 long int secsEastOfGMT;
2151 bool timeZoneKnown =
true;
2153 if ( !parseTime( scursor, send,
2154 maybeHour, maybeMinute, maybeSecond,
2155 secsEastOfGMT, timeZoneKnown, isCRLF ) ) {
2160 if ( !timeAfterYear ) {
2161 eatCFWS( scursor, send, isCRLF );
2162 if ( scursor == send ) {
2166 if ( !parseDigits( scursor, send, maybeYear ) ) {
2172 if ( maybeYear < 50 ) {
2174 }
else if ( maybeYear < 1000 ) {
2178 if ( maybeYear < 1900 ) {
2182 maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) );
2183 maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) );
2185 if ( !maybeDateTime.isValid() ) {
2189 result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) );
2190 if ( !result.isValid() ) {
2198 int endOfFieldBody = 0;
2199 bool folded =
false;
2202 int startOfFieldBody = head.indexOf(
':' );
2203 const int endOfFieldHeader = startOfFieldBody;
2205 if ( startOfFieldBody > -1 ) {
2207 if ( head[startOfFieldBody] ==
' ' ) {
2210 endOfFieldBody = findHeaderLineEnd( head, startOfFieldBody, &folded );
2212 QByteArray rawType = head.left( endOfFieldHeader );
2213 QByteArray rawFieldBody = head.mid( startOfFieldBody, endOfFieldBody - startOfFieldBody );
2215 rawFieldBody = unfoldHeader( rawFieldBody );
2218 if ( !rawType.isEmpty() ) {
2219 header = HeaderFactory::self()->createHeader( rawType );
2227 head.remove( 0, endOfFieldBody + 1 );
2235 void extractHeaderAndBody(
const QByteArray &content, QByteArray &header, QByteArray &body )
2241 if ( content.startsWith(
'\n' ) ) {
2242 body = content.right( content.length() - 1 );
2246 int pos = content.indexOf(
"\n\n", 0 );
2248 header = content.left( ++pos );
2249 body = content.mid( pos + 1, content.length() - pos - 1 );
2260 QByteArray copy = head;
2261 while ( ( h = extractFirstHeader( copy ) ) ) {
This file is part of the API for handling MIME data and defines the Codec class.
QByteArray address() const
Returns a string representation of the email address, without the angle brackets. ...
Represents an (email address, display name) pair according RFC 2822, section 3.4. ...
QString prettyAddress() const
Returns a assembled display name / address string of the following form: "Display Name <address>"...
QByteArray as7BitString(const QByteArray &encCharset) const
Returns a 7bit transport encoded representation of this mailbox.
virtual int maxDecodedSizeFor(int insize, bool withCRLF=false) const =0
Computes the maximum size, in characters, needed for the deccoding.
void from7BitString(const QByteArray &s)
Parses the given 7bit encoded string.
void setAddress(const AddrSpec &addr)
Sets the email address.
An abstract base class of codecs for common mail transfer encodings.
Stateful CTE decoder class.
void fromUnicodeString(const QString &s)
Parses the given unicode string.
static Codec * codecForName(const char *name)
Returns a codec associated with the specified name.
virtual Decoder * makeDecoder(bool withCRLF=false) const =0
Creates the decoder for the codec.
virtual const char * name() const =0
Returns the name of the encoding.
virtual bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)=0
Decodes a chunk of data, maintaining state information between calls.
void setNameFrom7Bit(const QByteArray &name, const QByteArray &defaultCharset=QByteArray())
Sets the name based on a 7bit encoded string.
Quoting
Describes how display names should be quoted.
bool hasAddress() const
Returns true if this mailbox has an address.
void setName(const QString &name)
Sets the name.
bool hasName() const
Returns true if this mailbox has a display name.
QString name() const
Returns the display name.