24 #include "kmime_util.h"
25 #include "kmime_util_p.h"
29 #include "kmime_header_parsing.h"
30 #include "kmime_message.h"
31 #include "kmime_warning.h"
33 #include <config-kmime.h>
37 #include <klocalizedstring.h>
38 #include <kcharsets.h>
42 #include <QtCore/QList>
43 #include <QtCore/QString>
44 #include <QtCore/QTextCodec>
50 #include <boost/concept_check.hpp>
52 using namespace KMime;
56 QList<QByteArray> c_harsetCache;
57 QList<QByteArray> l_anguageCache;
58 QString f_allbackCharEnc;
59 bool u_seOutlookEncoding =
false;
61 QByteArray cachedCharset(
const QByteArray &name )
63 foreach (
const QByteArray& charset, c_harsetCache ) {
64 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
69 c_harsetCache.append( name.toUpper() );
71 return c_harsetCache.last();
74 QByteArray cachedLanguage(
const QByteArray &name )
76 foreach (
const QByteArray& language, l_anguageCache ) {
77 if ( qstricmp( name.data(), language.data() ) == 0 ) {
82 l_anguageCache.append( name.toUpper() );
84 return l_anguageCache.last();
87 bool isUsAscii(
const QString &s )
89 uint sLength = s.length();
90 for ( uint i=0; i<sLength; i++ ) {
91 if ( s.at( i ).toLatin1() <= 0 ) {
98 QString nameForEncoding( Headers::contentEncoding enc )
101 case Headers::CE7Bit:
return QString::fromLatin1(
"7bit" );
102 case Headers::CE8Bit:
return QString::fromLatin1(
"8bit" );
103 case Headers::CEquPr:
return QString::fromLatin1(
"quoted-printable" );
104 case Headers::CEbase64:
return QString::fromLatin1(
"base64" );
105 case Headers::CEuuenc:
return QString::fromLatin1(
"uuencode" );
106 case Headers::CEbinary:
return QString::fromLatin1(
"binary" );
107 default:
return QString::fromLatin1(
"unknown" );
111 QList<Headers::contentEncoding> encodingsForData(
const QByteArray &data )
113 QList<Headers::contentEncoding> allowed;
116 switch ( cf.type() ) {
118 allowed << Headers::CE7Bit;
120 allowed << Headers::CE8Bit;
122 if ( cf.printableRatio() > 5.0/6.0 ) {
126 allowed << Headers::CEquPr;
127 allowed << Headers::CEbase64;
129 allowed << Headers::CEbase64;
130 allowed << Headers::CEquPr;
134 allowed << Headers::CEbase64;
145 const uchar specialsMap[16] = {
146 0x00, 0x00, 0x00, 0x00,
147 0x20, 0xCA, 0x00, 0x3A,
148 0x80, 0x00, 0x00, 0x1C,
149 0x00, 0x00, 0x00, 0x00
153 const uchar tSpecialsMap[16] = {
154 0x00, 0x00, 0x00, 0x00,
155 0x20, 0xC9, 0x00, 0x3F,
156 0x80, 0x00, 0x00, 0x1C,
157 0x00, 0x00, 0x00, 0x00
161 const uchar aTextMap[16] = {
162 0x00, 0x00, 0x00, 0x00,
163 0x5F, 0x35, 0xFF, 0xC5,
164 0x7F, 0xFF, 0xFF, 0xE3,
165 0xFF, 0xFF, 0xFF, 0xFE
169 const uchar tTextMap[16] = {
170 0x00, 0x00, 0x00, 0x00,
171 0x5F, 0x36, 0xFF, 0xC0,
172 0x7F, 0xFF, 0xFF, 0xE3,
173 0xFF, 0xFF, 0xFF, 0xFE
177 const uchar eTextMap[16] = {
178 0x00, 0x00, 0x00, 0x00,
179 0x40, 0x35, 0xFF, 0xC0,
180 0x7F, 0xFF, 0xFF, 0xE0,
181 0x7F, 0xFF, 0xFF, 0xE0
184 void setFallbackCharEncoding(
const QString& fallbackCharEnc)
186 f_allbackCharEnc = fallbackCharEnc;
189 QString fallbackCharEncoding()
191 return f_allbackCharEnc;
194 void setUseOutlookAttachmentEncoding(
bool violateStandard )
196 u_seOutlookEncoding = violateStandard;
199 bool useOutlookAttachmentEncoding()
201 return u_seOutlookEncoding;
205 QString decodeRFC2047String(
const QByteArray &src, QByteArray &usedCS,
206 const QByteArray &defaultCS,
bool forceCS )
209 QByteArray spaceBuffer;
210 const char *scursor = src.constData();
211 const char *send = scursor + src.length();
212 bool onlySpacesSinceLastWord =
false;
214 while ( scursor != send ) {
216 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
217 spaceBuffer += *scursor++;
222 if ( *scursor ==
'=' ) {
226 const char *start = scursor;
227 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
228 result += decoded.toUtf8();
229 onlySpacesSinceLastWord =
true;
232 if ( onlySpacesSinceLastWord ) {
233 result += spaceBuffer;
234 onlySpacesSinceLastWord =
false;
242 if ( onlySpacesSinceLastWord ) {
243 result += spaceBuffer;
244 onlySpacesSinceLastWord =
false;
252 const QString tryUtf8 = QString::fromUtf8( result );
253 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
254 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
255 return codec->toUnicode( result );
261 QString decodeRFC2047String(
const QByteArray &src )
264 return decodeRFC2047String( src, usedCS,
"utf-8",
false );
267 static const char *reservedCharacters =
"\"()<>@,.;:\\[]=";
269 QByteArray encodeRFC2047String(
const QString &src,
const QByteArray &charset,
270 bool addressHeader,
bool allow8BitHeaders )
274 bool nonAscii=
false, ok=
true, useQEncoding=
false;
277 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
282 usedCS = KGlobal::locale()->encoding();
283 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
286 if ( charset.isEmpty() ) {
287 usedCS = codec->name();
293 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
294 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
295 if ( converterState.invalidChars > 0 ) {
297 codec = QTextCodec::codecForName( usedCS );
298 encoded8Bit = codec->fromUnicode( src );
301 if ( usedCS.contains(
"8859-" ) ) {
305 if ( allow8BitHeaders ) {
309 uint encoded8BitLength = encoded8Bit.length();
310 for (
unsigned int i=0; i<encoded8BitLength; i++ ) {
311 if ( encoded8Bit[i] ==
' ' ) {
316 if ( ( (
signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] ==
'\033' ) ||
317 ( addressHeader && ( strchr(
"\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
325 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
330 for (
int x=end; x<encoded8Bit.length(); x++ ) {
331 if ( ( (
signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] ==
'\033' ) ||
332 ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) {
335 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
342 result = encoded8Bit.left( start ) +
"=?" + usedCS;
344 if ( useQEncoding ) {
348 for (
int i=start; i<end; i++ ) {
353 if ( ( ( c >=
'a' ) && ( c <=
'z' ) ) ||
354 ( ( c >=
'A' ) && ( c <=
'Z' ) ) ||
355 ( ( c >=
'0' ) && ( c <=
'9' ) ) ) {
359 hexcode = ( ( c & 0xF0 ) >> 4 ) + 48;
360 if ( hexcode >= 58 ) {
364 hexcode = ( c & 0x0F ) + 48;
365 if ( hexcode >= 58 ) {
373 result +=
"?B?" + encoded8Bit.mid( start, end - start ).toBase64();
377 result += encoded8Bit.right( encoded8Bit.length() - end );
379 result = encoded8Bit;
385 QByteArray encodeRFC2047Sentence(
const QString& src,
const QByteArray& charset )
388 QList<QChar> splitChars;
389 splitChars << QLatin1Char(
',' ) << QLatin1Char(
'\"' ) << QLatin1Char(
';' ) << QLatin1Char(
'\\' );
390 const QChar *ch = src.constData();
391 const int length = src.length();
398 while ( pos < length ) {
400 const bool isAscii = ch->unicode() < 127;
401 const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != 0 );
402 if ( isAscii && isReserved ) {
403 const int wordSize = pos - wordStart;
404 if ( wordSize > 0 ) {
405 const QString word = src.mid( wordStart, wordSize );
406 result += encodeRFC2047String( word, charset );
409 result += ch->toLatin1();
417 const int wordSize = pos - wordStart;
418 if ( wordSize > 0 ) {
419 const QString word = src.mid( wordStart, pos - wordStart );
420 result += encodeRFC2047String( word, charset );
429 QByteArray encodeRFC2231String(
const QString& str,
const QByteArray& charset )
431 if ( str.isEmpty() ) {
435 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
437 if ( charset ==
"us-ascii" ) {
438 latin = str.toLatin1();
439 }
else if ( codec ) {
440 latin = codec->fromUnicode( str );
442 latin = str.toLocal8Bit();
446 for ( l = latin.data(); *l; ++l ) {
447 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) {
456 QByteArray result = charset +
"''";
457 for ( l = latin.data(); *l; ++l ) {
458 bool needsQuoting = ( *l & 0x80 ) || ( *l ==
'%' );
459 if ( !needsQuoting ) {
460 const QByteArray especials =
"()<>@,;:\"/[]?.= \033";
461 int len = especials.length();
462 for (
int i = 0; i < len; i++ ) {
463 if ( *l == especials[i] ) {
469 if ( needsQuoting ) {
471 unsigned char hexcode;
472 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
473 if ( hexcode >= 58 ) {
477 hexcode = ( *l & 0x0F ) + 48;
478 if ( hexcode >= 58 ) {
491 QString decodeRFC2231String(
const QByteArray &str, QByteArray &usedCS,
const QByteArray &defaultCS,
494 int p = str.indexOf(
'\'' );
496 return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str );
500 QByteArray charset = str.left( p );
502 QByteArray st = str.mid( str.lastIndexOf(
'\'' ) + 1 );
506 while ( p < (
int)st.length() ) {
507 if ( st.at( p ) == 37 ) {
510 if ( p + 2 < st.length() ) {
511 ch = st.at( p + 1 ) - 48;
515 ch2 = st.at( p + 2 ) - 48;
519 st[p] = ch * 16 + ch2;
520 st.remove( p + 1, 2 );
525 kDebug() <<
"Got pre-decoded:" << st;
527 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
528 if ( !charsetcodec || forceCS ) {
529 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
532 usedCS = charsetcodec->name();
533 return charsetcodec->toUnicode( st );
536 QString decodeRFC2231String(
const QByteArray &src )
539 return decodeRFC2231String( src, usedCS,
"utf-8",
false );
542 QByteArray uniqueString()
544 static char chars[] =
"0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
548 unsigned int timeval;
552 ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) );
553 timeval = ( now / ran ) + getpid();
555 for (
int i = 0; i < 10; i++ ) {
556 pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) );
562 ret.setNum( timeval );
569 QByteArray multiPartBoundary()
571 return "nextPart" + uniqueString();
574 QByteArray unfoldHeader(
const QByteArray &header )
577 if ( header.isEmpty() ) {
581 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
582 while ( ( foldMid = header.indexOf(
'\n', pos ) ) >= 0 ) {
583 foldBegin = foldEnd = foldMid;
585 while ( foldBegin > 0 ) {
586 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
592 while ( foldEnd <= header.length() - 1 ) {
593 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
595 }
else if ( foldEnd > 0 && header[foldEnd - 1] ==
'\n' &&
596 header[foldEnd] ==
'=' && foldEnd + 2 < header.length() &&
597 ( ( header[foldEnd + 1] ==
'0' &&
598 header[foldEnd + 2] ==
'9' ) ||
599 ( header[foldEnd + 1] ==
'2' &&
600 header[foldEnd + 2] ==
'0' ) ) ) {
609 result += header.mid( pos, foldBegin - pos );
610 if ( foldEnd < header.length() - 1 ) {
615 const int len = header.length();
617 result += header.mid( pos, len - pos );
622 int findHeaderLineEnd(
const QByteArray &src,
int &dataBegin,
bool *folded )
625 int len = src.length() - 1;
631 if ( dataBegin < 0 ) {
636 if ( dataBegin > len ) {
644 if ( src.at( end ) ==
'\n' && end + 1 < len &&
645 ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ) ) {
652 if ( src.at( end ) !=
'\n' ) {
654 end = src.indexOf(
'\n', end + 1 );
655 if ( end == -1 || end == len ) {
658 }
else if ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ||
659 ( src[end + 1] ==
'=' && end + 3 <= len &&
660 ( ( src[end + 2] ==
'0' && src[end + 3] ==
'9' ) ||
661 ( src[end + 2] ==
'2' && src[end + 3] ==
'0' ) ) ) ) {
679 int indexOfHeader(
const QByteArray &src,
const QByteArray &name,
int &end,
int &dataBegin,
bool *folded )
685 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
689 const char *p = strcasestr( src.constData(), n.constData() );
693 begin = p - src.constData();
699 dataBegin = begin + name.length() + 1;
701 if ( src.at( dataBegin ) ==
' ' ) {
704 end = findHeaderLineEnd( src, dataBegin, folded );
714 QByteArray extractHeader(
const QByteArray &src,
const QByteArray &name )
720 if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
726 result = src.mid( begin, end - begin );
729 QByteArray hdrValue = src.mid( begin, end - begin );
730 result = unfoldHeader( hdrValue );
737 QList<QByteArray> extractHeaders(
const QByteArray &src,
const QByteArray &name )
741 QList<QByteArray> result;
742 QByteArray copySrc( src );
744 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
748 while ( begin >= 0 ) {
750 result.append( copySrc.mid( begin, end - begin ) );
752 QByteArray hdrValue = copySrc.mid( begin, end - begin );
753 result.append( unfoldHeader( hdrValue ) );
757 copySrc = copySrc.mid( end );
758 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
765 void removeHeader( QByteArray &header,
const QByteArray &name )
767 int begin, end, dummy;
768 begin = indexOfHeader( header, name, end, dummy );
770 header.remove( begin, end - begin + 1 );
774 QByteArray CRLFtoLF(
const QByteArray &s )
777 ret.replace(
"\r\n",
"\n" );
781 QByteArray CRLFtoLF(
const char *s )
784 return CRLFtoLF( ret );
787 QByteArray LFtoCRLF(
const QByteArray &s )
790 ret.replace(
'\n',
"\r\n" );
794 QByteArray LFtoCRLF(
const char *s )
797 return LFtoCRLF( ret );
801 template <
typename StringType,
typename CharType >
void removeQuotesGeneric( StringType & str )
803 bool inQuote =
false;
804 for (
int i = 0; i < str.length(); ++i ) {
805 if ( str[i] == CharType(
'"' ) ) {
810 if ( inQuote && ( str[i] == CharType(
'\\' ) ) ) {
818 void removeQuots( QByteArray &str )
820 removeQuotesGeneric<QByteArray, char>( str );
823 void removeQuots( QString &str )
825 removeQuotesGeneric<QString, QLatin1Char>( str );
828 template<
class StringType,
class CharType,
class CharConverterType,
class StringConverterType,
class ToString>
829 void addQuotes_impl( StringType &str,
bool forceQuotes )
831 bool needsQuotes=
false;
832 for (
int i=0; i < str.length(); i++ ) {
833 const CharType cur = str.at( i );
834 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String(
"\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
837 if ( cur == CharConverterType(
'\\' ) || cur == CharConverterType(
'\"' ) ) {
838 str.insert( i, CharConverterType(
'\\' ) );
843 if ( needsQuotes || forceQuotes ) {
844 str.insert( 0, CharConverterType(
'\"' ) );
845 str.append( StringConverterType(
"\"" ) );
849 void addQuotes( QByteArray &str,
bool forceQuotes )
851 addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
854 void addQuotes( QString &str,
bool forceQuotes )
856 addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
859 KMIME_EXPORT QString balanceBidiState(
const QString &input )
861 const int LRO = 0x202D;
862 const int RLO = 0x202E;
863 const int LRE = 0x202A;
864 const int RLE = 0x202B;
865 const int PDF = 0x202C;
867 QString result = input;
869 int openDirChangers = 0;
870 int numPDFsRemoved = 0;
871 for (
int i = 0; i < input.length(); i++ ) {
872 const ushort &code = input.at( i ).unicode();
873 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
875 }
else if ( code == PDF ) {
876 if ( openDirChangers > 0 ) {
880 kWarning() <<
"Possible Unicode spoofing (unexpected PDF) detected in" << input;
881 result.remove( i - numPDFsRemoved, 1 );
887 if ( openDirChangers > 0 ) {
888 kWarning() <<
"Possible Unicode spoofing detected in" << input;
893 for (
int i = openDirChangers; i > 0; i-- ) {
894 if ( result.endsWith( QLatin1Char(
'"' ) ) ) {
895 result.insert( result.length() - 1, QChar( PDF ) );
897 result += QChar( PDF );
905 QString removeBidiControlChars(
const QString &input )
907 const int LRO = 0x202D;
908 const int RLO = 0x202E;
909 const int LRE = 0x202A;
910 const int RLE = 0x202B;
911 QString result = input;
912 result.remove( LRO );
913 result.remove( RLO );
914 result.remove( LRE );
915 result.remove( RLE );
919 static bool isCryptoPart(
Content* content )
931 const QByteArray lowerSubType = contentType->
subType().toLower();
932 return ( contentType->
mediaType().toLower() ==
"application" &&
933 ( lowerSubType ==
"pgp-encrypted" ||
934 lowerSubType ==
"pgp-signature" ||
935 lowerSubType ==
"pkcs7-mime" ||
936 lowerSubType ==
"pkcs7-signature" ||
937 lowerSubType ==
"x-pkcs7-signature" ||
938 ( lowerSubType ==
"octet-stream" &&
942 bool hasAttachment(
Content* content )
948 bool emptyFilename =
true;
951 emptyFilename =
false;
954 if ( emptyFilename &&
957 emptyFilename =
false;
961 if ( !emptyFilename && !isCryptoPart( content ) ) {
968 if ( hasAttachment( child ) ) {
976 bool isSigned(
Message *message )
983 if ( contentType->
isSubtype(
"signed" ) ||
984 contentType->
isSubtype(
"pgp-signature" ) ||
985 contentType->
isSubtype(
"pkcs7-signature" ) ||
986 contentType->
isSubtype(
"x-pkcs7-signature" ) ||
989 message->
mainBodyPart(
"application/pkcs7-signature" ) ||
990 message->
mainBodyPart(
"application/x-pkcs7-signature" ) ) {
996 bool isEncrypted(
Message *message )
1003 if ( contentType->
isSubtype(
"encrypted" ) ||
1004 contentType->
isSubtype(
"pgp-encrypted" ) ||
1005 contentType->
isSubtype(
"pkcs7-mime" ) ||
1007 message->
mainBodyPart(
"application/pgp-encrypted" ) ||
1015 bool isInvitation(
Content *content )
1023 if ( contentType && contentType->
isMediatype(
"text" ) && contentType->
isSubtype(
"calendar" ) ) {
This file is part of the API for handling MIME data and defines the Codec class.
List contents() const
For multipart contents, this will return a list of all multipart child contents.
Content * mainBodyPart(const QByteArray &type=QByteArray())
Returns the first main body part of a given type, taking multipart/mixed and multipart/alternative no...
Headers::ContentDisposition * contentDisposition(bool create=true)
Returns the Content-Disposition header.
This file is part of the API for handling MIME data and defines the CharFreq class.
Headers::ContentType * contentType(bool create=true)
Returns the Content-Type header.
Represents a (email) message.
A class that encapsulates MIME encoded Content.
A class for performing basic data typing using frequency count heuristics.