24 #include "kmime_util.h"
25 #include "kmime_util_p.h"
29 #include "kmime_header_parsing.h"
30 #include "kmime_message.h"
31 #include "kmime_warning.h"
33 #include <config-kmime.h>
37 #include <klocalizedstring.h>
38 #include <kcharsets.h>
42 #include <QtCore/QList>
43 #include <QtCore/QString>
44 #include <QtCore/QTextCodec>
51 using namespace KMime;
58 bool u_seOutlookEncoding =
false;
62 foreach (
const QByteArray& charset, c_harsetCache ) {
63 if ( qstricmp( name.
data(), charset.
data() ) == 0 ) {
70 return c_harsetCache.
last();
75 foreach (
const QByteArray& language, l_anguageCache ) {
76 if ( qstricmp( name.
data(), language.
data() ) == 0 ) {
83 return l_anguageCache.
last();
86 bool isUsAscii(
const QString &s )
89 for ( uint i=0; i<sLength; i++ ) {
97 QString nameForEncoding( Headers::contentEncoding enc )
115 switch ( cf.type() ) {
117 allowed << Headers::CE7Bit;
119 allowed << Headers::CE8Bit;
121 if ( cf.printableRatio() > 5.0/6.0 ) {
125 allowed << Headers::CEquPr;
126 allowed << Headers::CEbase64;
128 allowed << Headers::CEbase64;
129 allowed << Headers::CEquPr;
133 allowed << Headers::CEbase64;
144 const uchar specialsMap[16] = {
145 0x00, 0x00, 0x00, 0x00,
146 0x20, 0xCA, 0x00, 0x3A,
147 0x80, 0x00, 0x00, 0x1C,
148 0x00, 0x00, 0x00, 0x00
152 const uchar tSpecialsMap[16] = {
153 0x00, 0x00, 0x00, 0x00,
154 0x20, 0xC9, 0x00, 0x3F,
155 0x80, 0x00, 0x00, 0x1C,
156 0x00, 0x00, 0x00, 0x00
160 const uchar aTextMap[16] = {
161 0x00, 0x00, 0x00, 0x00,
162 0x5F, 0x35, 0xFF, 0xC5,
163 0x7F, 0xFF, 0xFF, 0xE3,
164 0xFF, 0xFF, 0xFF, 0xFE
168 const uchar tTextMap[16] = {
169 0x00, 0x00, 0x00, 0x00,
170 0x5F, 0x36, 0xFF, 0xC0,
171 0x7F, 0xFF, 0xFF, 0xE3,
172 0xFF, 0xFF, 0xFF, 0xFE
176 const uchar eTextMap[16] = {
177 0x00, 0x00, 0x00, 0x00,
178 0x40, 0x35, 0xFF, 0xC0,
179 0x7F, 0xFF, 0xFF, 0xE0,
180 0x7F, 0xFF, 0xFF, 0xE0
183 void setFallbackCharEncoding(
const QString& fallbackCharEnc)
185 f_allbackCharEnc = fallbackCharEnc;
190 return f_allbackCharEnc;
193 void setUseOutlookAttachmentEncoding(
bool violateStandard )
195 u_seOutlookEncoding = violateStandard;
198 bool useOutlookAttachmentEncoding()
200 return u_seOutlookEncoding;
212 const char *send = scursor + src.
length();
213 bool onlySpacesSinceLastWord =
false;
215 while ( scursor != send ) {
217 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
218 spaceBuffer += *scursor++;
223 if ( *scursor ==
'=' ) {
227 const char *start = scursor;
228 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
229 result += decoded.
toUtf8();
230 onlySpacesSinceLastWord =
true;
233 if ( onlySpacesSinceLastWord ) {
234 result += spaceBuffer;
235 onlySpacesSinceLastWord =
false;
243 if ( onlySpacesSinceLastWord ) {
244 result += spaceBuffer;
245 onlySpacesSinceLastWord =
false;
265 return decodeRFC2047String( src, usedCS,
"utf-8",
false );
268 static const char *reservedCharacters =
"\"()<>@,.;:\\[]=";
271 bool addressHeader,
bool allow8BitHeaders )
275 bool nonAscii=
false, ok=
true, useQEncoding=
false;
283 usedCS = KGlobal::locale()->encoding();
288 usedCS = codec->
name();
296 if ( converterState.invalidChars > 0 ) {
306 if ( allow8BitHeaders ) {
310 uint encoded8BitLength = encoded8Bit.
length();
311 for (
unsigned int i=0; i<encoded8BitLength; i++ ) {
312 if ( encoded8Bit[i] ==
' ' ) {
317 if ( ( (
signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] ==
'\033' ) ||
318 ( addressHeader && ( strchr(
"\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
326 while ( ( end < encoded8Bit.
length() ) && ( encoded8Bit[end] !=
' ' ) ) {
331 for (
int x=end; x<encoded8Bit.
length(); x++ ) {
332 if ( ( (
signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] ==
'\033' ) ||
333 ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) {
336 while ( ( end < encoded8Bit.
length() ) && ( encoded8Bit[end] !=
' ' ) ) {
343 result = encoded8Bit.
left( start ) +
"=?" + usedCS;
345 if ( useQEncoding ) {
349 for (
int i=start; i<end; i++ ) {
354 if ( ( ( c >=
'a' ) && ( c <=
'z' ) ) ||
355 ( ( c >=
'A' ) && ( c <=
'Z' ) ) ||
356 ( ( c >=
'0' ) && ( c <=
'9' ) ) ) {
360 hexcode = ( ( c & 0xF0 ) >> 4 ) + 48;
361 if ( hexcode >= 58 ) {
365 hexcode = ( c & 0x0F ) + 48;
366 if ( hexcode >= 58 ) {
374 result +=
"?B?" + encoded8Bit.
mid( start, end - start ).
toBase64();
378 result += encoded8Bit.
right( encoded8Bit.
length() - end );
380 result = encoded8Bit;
392 const int length = src.
length();
399 while ( pos < length ) {
401 const bool isAscii = ch->
unicode() < 127;
402 const bool isReserved = ( strchr( reservedCharacters, ch->
toLatin1() ) != 0 );
403 if ( isAscii && isReserved ) {
404 const int wordSize = pos - wordStart;
405 if ( wordSize > 0 ) {
406 const QString word = src.
mid( wordStart, wordSize );
407 result += encodeRFC2047String( word, charset );
418 const int wordSize = pos - wordStart;
419 if ( wordSize > 0 ) {
420 const QString word = src.
mid( wordStart, pos - wordStart );
421 result += encodeRFC2047String( word, charset );
438 if ( charset ==
"us-ascii" ) {
440 }
else if ( codec ) {
447 for ( l = latin.
data(); *l; ++l ) {
448 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) {
458 for ( l = latin.
data(); *l; ++l ) {
459 bool needsQuoting = ( *l & 0x80 ) || ( *l ==
'%' );
460 if ( !needsQuoting ) {
461 const QByteArray especials =
"()<>@,;:\"/[]?.= \033";
462 int len = especials.
length();
463 for (
int i = 0; i < len; i++ ) {
464 if ( *l == especials[i] ) {
470 if ( needsQuoting ) {
472 unsigned char hexcode;
473 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
474 if ( hexcode >= 58 ) {
478 hexcode = ( *l & 0x0F ) + 48;
479 if ( hexcode >= 58 ) {
497 return KGlobal::charsets()->codecForName(
QString::fromLatin1( defaultCS ) )->toUnicode( str );
507 while ( p < (
int)st.
length() ) {
508 if ( st.
at( p ) == 37 ) {
511 if ( p + 2 < st.
length() ) {
512 ch = st.
at( p + 1 ) - 48;
516 ch2 = st.
at( p + 2 ) - 48;
520 st[p] = ch * 16 + ch2;
526 kDebug() <<
"Got pre-decoded:" << st;
529 if ( !charsetcodec || forceCS ) {
533 usedCS = charsetcodec->
name();
540 return decodeRFC2231String( src, usedCS,
"utf-8",
false );
545 static char chars[] =
"0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
549 unsigned int timeval;
553 ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) );
554 timeval = ( now / ran ) + getpid();
556 for (
int i = 0; i < 10; i++ ) {
557 pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) );
572 return "nextPart" + uniqueString();
582 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
583 while ( ( foldMid = header.
indexOf(
'\n', pos ) ) >= 0 ) {
584 foldBegin = foldEnd = foldMid;
586 while ( foldBegin > 0 ) {
593 while ( foldEnd <= header.
length() - 1 ) {
596 }
else if ( foldEnd > 0 && header[foldEnd - 1] ==
'\n' &&
597 header[foldEnd] ==
'=' && foldEnd + 2 < header.
length() &&
598 ( ( header[foldEnd + 1] ==
'0' &&
599 header[foldEnd + 2] ==
'9' ) ||
600 ( header[foldEnd + 1] ==
'2' &&
601 header[foldEnd + 2] ==
'0' ) ) ) {
610 result += header.
mid( pos, foldBegin - pos );
611 if ( foldEnd < header.
length() - 1 ) {
616 const int len = header.
length();
618 result += header.
mid( pos, len - pos );
623 int findHeaderLineEnd(
const QByteArray &src,
int &dataBegin,
bool *folded )
626 int len = src.
length() - 1;
632 if ( dataBegin < 0 ) {
637 if ( dataBegin > len ) {
645 if ( src.
at( end ) ==
'\n' && end + 1 < len &&
646 ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ) ) {
653 if ( src.at( end ) !=
'\n' ) {
655 end = src.indexOf(
'\n', end + 1 );
656 if ( end == -1 || end == len ) {
659 }
else if ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ||
660 ( src[end + 1] ==
'=' && end + 3 <= len &&
661 ( ( src[end + 2] ==
'0' && src[end + 3] ==
'9' ) ||
662 ( src[end + 2] ==
'2' && src[end + 3] ==
'0' ) ) ) ) {
680 int indexOfHeader(
const QByteArray &src,
const QByteArray &name,
int &end,
int &dataBegin,
bool *folded )
700 dataBegin = begin + name.
length() + 1;
702 if ( src.
at( dataBegin ) ==
' ' ) {
705 end = findHeaderLineEnd( src, dataBegin, folded );
721 if ( src.
isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
727 result = src.
mid( begin, end - begin );
731 result = unfoldHeader( hdrValue );
745 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
749 while ( begin >= 0 ) {
751 result.
append( copySrc.mid( begin, end - begin ) );
754 result.
append( unfoldHeader( hdrValue ) );
758 copySrc = copySrc.mid( end );
759 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
768 int begin, end, dummy;
769 begin = indexOfHeader( header, name, end, dummy );
771 header.
remove( begin, end - begin + 1 );
785 return CRLFtoLF( ret );
798 return LFtoCRLF( ret );
802 template <
typename StringType,
typename CharType >
void removeQuotesGeneric( StringType & str )
804 bool inQuote =
false;
805 for (
int i = 0; i < str.length(); ++i ) {
806 if ( str[i] == CharType(
'"' ) ) {
811 if ( inQuote && ( str[i] == CharType(
'\\' ) ) ) {
821 removeQuotesGeneric<QByteArray, char>( str );
824 void removeQuots(
QString &str )
826 removeQuotesGeneric<QString, QLatin1Char>( str );
829 template<
class StringType,
class CharType,
class CharConverterType,
class StringConverterType,
class ToString>
830 void addQuotes_impl( StringType &str,
bool forceQuotes )
832 bool needsQuotes=
false;
833 for (
int i=0; i < str.length(); i++ ) {
834 const CharType cur = str.at( i );
838 if ( cur == CharConverterType(
'\\' ) || cur == CharConverterType(
'\"' ) ) {
839 str.insert( i, CharConverterType(
'\\' ) );
844 if ( needsQuotes || forceQuotes ) {
845 str.insert( 0, CharConverterType(
'\"' ) );
846 str.append( StringConverterType(
"\"" ) );
850 void addQuotes(
QByteArray &str,
bool forceQuotes )
852 addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
855 void addQuotes(
QString &str,
bool forceQuotes )
857 addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
862 const int LRO = 0x202D;
863 const int RLO = 0x202E;
864 const int LRE = 0x202A;
865 const int RLE = 0x202B;
866 const int PDF = 0x202C;
870 int openDirChangers = 0;
871 int numPDFsRemoved = 0;
872 for (
int i = 0; i < input.
length(); i++ ) {
873 const ushort &code = input.
at( i ).
unicode();
874 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
876 }
else if ( code == PDF ) {
877 if ( openDirChangers > 0 ) {
881 kWarning() <<
"Possible Unicode spoofing (unexpected PDF) detected in" << input;
882 result.
remove( i - numPDFsRemoved, 1 );
888 if ( openDirChangers > 0 ) {
889 kWarning() <<
"Possible Unicode spoofing detected in" << input;
894 for (
int i = openDirChangers; i > 0; i-- ) {
898 result +=
QChar( PDF );
908 const int LRO = 0x202D;
909 const int RLO = 0x202E;
910 const int LRE = 0x202A;
911 const int RLE = 0x202B;
920 static bool isCryptoPart(
Content* content )
934 ( lowerSubType ==
"pgp-encrypted" ||
935 lowerSubType ==
"pgp-signature" ||
936 lowerSubType ==
"pkcs7-mime" ||
937 lowerSubType ==
"x-pkcs7-mime" ||
938 lowerSubType ==
"pkcs7-signature" ||
939 lowerSubType ==
"x-pkcs7-signature" ||
940 ( lowerSubType ==
"octet-stream" &&
944 bool hasAttachment(
Content* content )
950 bool emptyFilename =
true;
953 emptyFilename =
false;
956 if ( emptyFilename &&
959 emptyFilename =
false;
963 if ( !emptyFilename && !isCryptoPart( content ) ) {
970 if ( hasAttachment( child ) ) {
978 bool hasInvitation(
Content *content )
984 if ( isInvitation(content) ) {
991 if ( hasInvitation( child ) ) {
999 bool isSigned(
Message *message )
1006 if ( contentType->
isSubtype(
"signed" ) ||
1007 contentType->
isSubtype(
"pgp-signature" ) ||
1008 contentType->
isSubtype(
"pkcs7-signature" ) ||
1009 contentType->
isSubtype(
"x-pkcs7-signature" ) ||
1011 message->
mainBodyPart(
"application/pgp-signature" ) ||
1012 message->
mainBodyPart(
"application/pkcs7-signature" ) ||
1013 message->
mainBodyPart(
"application/x-pkcs7-signature" ) ) {
1019 bool isEncrypted(
Message *message )
1026 if ( contentType->
isSubtype(
"encrypted" ) ||
1027 contentType->
isSubtype(
"pgp-encrypted" ) ||
1028 contentType->
isSubtype(
"pkcs7-mime" ) ||
1029 contentType->
isSubtype(
"x-pkcs7-mime" ) ||
1031 message->
mainBodyPart(
"application/pgp-encrypted" ) ||
1033 message->
mainBodyPart(
"application/x-pkcs7-mime" ) ) {
1040 bool isInvitation(
Content *content )
1048 if ( contentType && contentType->
isMediatype(
"text" ) && contentType->
isSubtype(
"calendar" ) ) {
This file is part of the API for handling MIME data and defines the Codec class.
QByteArray fromUnicode(const QString &str) const
const QChar * constData() const
virtual QByteArray name() const =0
List contents() const
For multipart contents, this will return a list of all multipart child contents.
QByteArray toLower() const
QByteArray & setNum(short n, int base)
int lastIndexOf(char ch, int from) const
QByteArray toUpper() const
Content * mainBodyPart(const QByteArray &type=QByteArray())
Returns the first main body part of a given type, taking multipart/mixed and multipart/alternative no...
Headers::ContentDisposition * contentDisposition(bool create=true)
Returns the Content-Disposition header.
QString & remove(int position, int n)
int indexOf(char ch, int from) const
This file is part of the API for handling MIME data and defines the CharFreq class.
void append(const T &value)
QString fromUtf8(const char *str, int size)
QString & insert(int position, QChar ch)
QByteArray & prepend(char ch)
const char * constData() const
QByteArray right(int len) const
QByteArray & replace(int pos, int len, const char *after)
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const
QByteArray mid(int pos, int len) const
Headers::ContentType * contentType(bool create=true)
Returns the Content-Type header.
QByteArray & append(char ch)
QByteArray toLocal8Bit() const
bool contains(QChar ch, Qt::CaseSensitivity cs) const
QByteArray left(int len) const
Represents a (email) message.
QByteArray toLatin1() const
QString mid(int position, int n) const
const QChar at(int position) const
QTextCodec * codecForName(const QByteArray &name)
bool contains(char ch) const
A class that encapsulates MIME encoded Content.
QString fromLatin1(const char *str, int size)
QByteArray toBase64() const
A class for performing basic data typing using frequency count heuristics.
QByteArray & remove(int pos, int len)
QString toUnicode(const QByteArray &a) const
QByteArray toUtf8() const