• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdepimlibs API Reference
  • KDE Home
  • Contact Us
 

KMIME Library

  • sources
  • kde-4.12
  • kdepimlibs
  • kmime
kmime_util.cpp
1 /*
2  kmime_util.cpp
3 
4  KMime, the KDE Internet mail/usenet news message library.
5  Copyright (c) 2001 the KMime authors.
6  See file AUTHORS for details
7 
8  This library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU Library General Public
10  License as published by the Free Software Foundation; either
11  version 2 of the License, or (at your option) any later version.
12 
13  This library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Library General Public License for more details.
17 
18  You should have received a copy of the GNU Library General Public License
19  along with this library; see the file COPYING.LIB. If not, write to
20  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  Boston, MA 02110-1301, USA.
22 */
23 
24 #include "kmime_util.h"
25 #include "kmime_util_p.h"
26 
27 #include "kmime_charfreq.h"
28 #include "kmime_codecs.h"
29 #include "kmime_header_parsing.h"
30 #include "kmime_message.h"
31 #include "kmime_warning.h"
32 
33 #include <config-kmime.h>
34 #include <kdefakes.h> // for strcasestr
35 #include <kglobal.h>
36 #include <klocale.h>
37 #include <klocalizedstring.h>
38 #include <kcharsets.h>
39 #include <kcodecs.h>
40 #include <kdebug.h>
41 
42 #include <QtCore/QList>
43 #include <QtCore/QString>
44 #include <QtCore/QTextCodec>
45 
46 #include <ctype.h>
47 #include <time.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <boost/concept_check.hpp>
51 
52 using namespace KMime;
53 
54 namespace KMime {
55 
56 QList<QByteArray> c_harsetCache;
57 QList<QByteArray> l_anguageCache;
58 QString f_allbackCharEnc;
59 bool u_seOutlookEncoding = false;
60 
61 QByteArray cachedCharset( const QByteArray &name )
62 {
63  foreach ( const QByteArray& charset, c_harsetCache ) {
64  if ( qstricmp( name.data(), charset.data() ) == 0 ) {
65  return charset;
66  }
67  }
68 
69  c_harsetCache.append( name.toUpper() );
70  //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
71  return c_harsetCache.last();
72 }
73 
74 QByteArray cachedLanguage( const QByteArray &name )
75 {
76  foreach ( const QByteArray& language, l_anguageCache ) {
77  if ( qstricmp( name.data(), language.data() ) == 0 ) {
78  return language;
79  }
80  }
81 
82  l_anguageCache.append( name.toUpper() );
83  //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
84  return l_anguageCache.last();
85 }
86 
87 bool isUsAscii( const QString &s )
88 {
89  uint sLength = s.length();
90  for ( uint i=0; i<sLength; i++ ) {
91  if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii
92  return false;
93  }
94  }
95  return true;
96 }
97 
98 QString nameForEncoding( Headers::contentEncoding enc )
99 {
100  switch ( enc ) {
101  case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
102  case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
103  case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
104  case Headers::CEbase64: return QString::fromLatin1( "base64" );
105  case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
106  case Headers::CEbinary: return QString::fromLatin1( "binary" );
107  default: return QString::fromLatin1( "unknown" );
108  }
109 }
110 
111 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
112 {
113  QList<Headers::contentEncoding> allowed;
114  CharFreq cf( data );
115 
116  switch ( cf.type() ) {
117  case CharFreq::SevenBitText:
118  allowed << Headers::CE7Bit;
119  case CharFreq::EightBitText:
120  allowed << Headers::CE8Bit;
121  case CharFreq::SevenBitData:
122  if ( cf.printableRatio() > 5.0/6.0 ) {
123  // let n the length of data and p the number of printable chars.
124  // Then base64 \approx 4n/3; qp \approx p + 3(n-p)
125  // => qp < base64 iff p > 5n/6.
126  allowed << Headers::CEquPr;
127  allowed << Headers::CEbase64;
128  } else {
129  allowed << Headers::CEbase64;
130  allowed << Headers::CEquPr;
131  }
132  break;
133  case CharFreq::EightBitData:
134  allowed << Headers::CEbase64;
135  break;
136  case CharFreq::None:
137  default:
138  Q_ASSERT( false );
139  }
140 
141  return allowed;
142 }
143 
144 // "(),.:;<>@[\]
145 const uchar specialsMap[16] = {
146  0x00, 0x00, 0x00, 0x00, // CTLs
147  0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?'
148  0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
149  0x00, 0x00, 0x00, 0x00 // '`' ... DEL
150 };
151 
152 // "(),:;<>@[\]/=?
153 const uchar tSpecialsMap[16] = {
154  0x00, 0x00, 0x00, 0x00, // CTLs
155  0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?'
156  0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
157  0x00, 0x00, 0x00, 0x00 // '`' ... DEL
158 };
159 
160 // all except specials, CTLs, SPACE.
161 const uchar aTextMap[16] = {
162  0x00, 0x00, 0x00, 0x00,
163  0x5F, 0x35, 0xFF, 0xC5,
164  0x7F, 0xFF, 0xFF, 0xE3,
165  0xFF, 0xFF, 0xFF, 0xFE
166 };
167 
168 // all except tspecials, CTLs, SPACE.
169 const uchar tTextMap[16] = {
170  0x00, 0x00, 0x00, 0x00,
171  0x5F, 0x36, 0xFF, 0xC0,
172  0x7F, 0xFF, 0xFF, 0xE3,
173  0xFF, 0xFF, 0xFF, 0xFE
174 };
175 
176 // none except a-zA-Z0-9!*+-/
177 const uchar eTextMap[16] = {
178  0x00, 0x00, 0x00, 0x00,
179  0x40, 0x35, 0xFF, 0xC0,
180  0x7F, 0xFF, 0xFF, 0xE0,
181  0x7F, 0xFF, 0xFF, 0xE0
182 };
183 
184 void setFallbackCharEncoding(const QString& fallbackCharEnc)
185 {
186  f_allbackCharEnc = fallbackCharEnc;
187 }
188 
189 QString fallbackCharEncoding()
190 {
191  return f_allbackCharEnc;
192 }
193 
194 void setUseOutlookAttachmentEncoding( bool violateStandard )
195 {
196  u_seOutlookEncoding = violateStandard;
197 }
198 
199 bool useOutlookAttachmentEncoding()
200 {
201  return u_seOutlookEncoding;
202 }
203 
204 
205 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
206  const QByteArray &defaultCS, bool forceCS )
207 {
208  QByteArray result;
209  QByteArray spaceBuffer;
210  const char *scursor = src.constData();
211  const char *send = scursor + src.length();
212  bool onlySpacesSinceLastWord = false;
213 
214  while ( scursor != send ) {
215  // space
216  if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
217  spaceBuffer += *scursor++;
218  continue;
219  }
220 
221  // possible start of an encoded word
222  if ( *scursor == '=' ) {
223  QByteArray language;
224  QString decoded;
225  ++scursor;
226  const char *start = scursor;
227  if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
228  result += decoded.toUtf8();
229  onlySpacesSinceLastWord = true;
230  spaceBuffer.clear();
231  } else {
232  if ( onlySpacesSinceLastWord ) {
233  result += spaceBuffer;
234  onlySpacesSinceLastWord = false;
235  }
236  result += '=';
237  scursor = start; // reset cursor after parsing failure
238  }
239  continue;
240  } else {
241  // unencoded data
242  if ( onlySpacesSinceLastWord ) {
243  result += spaceBuffer;
244  onlySpacesSinceLastWord = false;
245  }
246  result += *scursor;
247  ++scursor;
248  }
249  }
250  // If there are any chars that couldn't be decoded in UTF-8,
251  // use the fallback charset if it exists
252  const QString tryUtf8 = QString::fromUtf8( result );
253  if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
254  QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
255  return codec->toUnicode( result );
256  } else {
257  return tryUtf8;
258  }
259 }
260 
261 QString decodeRFC2047String( const QByteArray &src )
262 {
263  QByteArray usedCS;
264  return decodeRFC2047String( src, usedCS, "utf-8", false );
265 }
266 
267 static const char *reservedCharacters = "\"()<>@,.;:\\[]=";
268 
269 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
270  bool addressHeader, bool allow8BitHeaders )
271 {
272  QByteArray result;
273  int start=0, end=0;
274  bool nonAscii=false, ok=true, useQEncoding=false;
275 
276  // fromLatin1() is safe here, codecForName() uses toLatin1() internally
277  const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
278 
279  QByteArray usedCS;
280  if ( !ok ) {
281  //no codec available => try local8Bit and hope the best ;-)
282  usedCS = KGlobal::locale()->encoding();
283  codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
284  } else {
285  Q_ASSERT( codec );
286  if ( charset.isEmpty() ) {
287  usedCS = codec->name();
288  } else {
289  usedCS = charset;
290  }
291  }
292 
293  QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
294  QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
295  if ( converterState.invalidChars > 0 ) {
296  usedCS = "utf-8";
297  codec = QTextCodec::codecForName( usedCS );
298  encoded8Bit = codec->fromUnicode( src );
299  }
300 
301  if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets
302  useQEncoding = true;
303  }
304 
305  if ( allow8BitHeaders ) {
306  return encoded8Bit;
307  }
308 
309  uint encoded8BitLength = encoded8Bit.length();
310  for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
311  if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries
312  start = i + 1;
313  }
314 
315  // encode escape character, for japanese encodings...
316  if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
317  ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
318  end = start; // non us-ascii char found, now we determine where to stop encoding
319  nonAscii = true;
320  break;
321  }
322  }
323 
324  if ( nonAscii ) {
325  while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
326  // we encode complete words
327  end++;
328  }
329 
330  for ( int x=end; x<encoded8Bit.length(); x++ ) {
331  if ( ( (signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] == '\033' ) ||
332  ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) {
333  end = x; // we found another non-ascii word
334 
335  while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
336  // we encode complete words
337  end++;
338  }
339  }
340  }
341 
342  result = encoded8Bit.left( start ) + "=?" + usedCS;
343 
344  if ( useQEncoding ) {
345  result += "?Q?";
346 
347  char c, hexcode;// "Q"-encoding implementation described in RFC 2047
348  for ( int i=start; i<end; i++ ) {
349  c = encoded8Bit[i];
350  if ( c == ' ' ) { // make the result readable with not MIME-capable readers
351  result += '_';
352  } else {
353  if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems
354  ( ( c >= 'A' ) && ( c <= 'Z' ) ) || // with "From" & "To" headers
355  ( ( c >= '0' ) && ( c <= '9' ) ) ) {
356  result += c;
357  } else {
358  result += '='; // "stolen" from KMail ;-)
359  hexcode = ( ( c & 0xF0 ) >> 4 ) + 48;
360  if ( hexcode >= 58 ) {
361  hexcode += 7;
362  }
363  result += hexcode;
364  hexcode = ( c & 0x0F ) + 48;
365  if ( hexcode >= 58 ) {
366  hexcode += 7;
367  }
368  result += hexcode;
369  }
370  }
371  }
372  } else {
373  result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
374  }
375 
376  result +="?=";
377  result += encoded8Bit.right( encoded8Bit.length() - end );
378  } else {
379  result = encoded8Bit;
380  }
381 
382  return result;
383 }
384 
385 QByteArray encodeRFC2047Sentence(const QString& src, const QByteArray& charset )
386 {
387  QByteArray result;
388  QList<QChar> splitChars;
389  splitChars << QLatin1Char( ',' ) << QLatin1Char( '\"' ) << QLatin1Char( ';' ) << QLatin1Char( '\\' );
390  const QChar *ch = src.constData();
391  const int length = src.length();
392  int pos = 0;
393  int wordStart = 0;
394 
395  //qDebug() << "Input:" << src;
396  // Loop over all characters of the string.
397  // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
398  while ( pos < length ) {
399  //qDebug() << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1();
400  const bool isAscii = ch->unicode() < 127;
401  const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != 0 );
402  if ( isAscii && isReserved ) {
403  const int wordSize = pos - wordStart;
404  if ( wordSize > 0 ) {
405  const QString word = src.mid( wordStart, wordSize );
406  result += encodeRFC2047String( word, charset );
407  }
408 
409  result += ch->toLatin1();
410  wordStart = pos + 1;
411  }
412  ch++;
413  pos++;
414  }
415 
416  // Encode the last word
417  const int wordSize = pos - wordStart;
418  if ( wordSize > 0 ) {
419  const QString word = src.mid( wordStart, pos - wordStart );
420  result += encodeRFC2047String( word, charset );
421  }
422 
423  return result;
424 }
425 
426 
427 
428 //-----------------------------------------------------------------------------
429 QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset )
430 {
431  if ( str.isEmpty() ) {
432  return QByteArray();
433  }
434 
435  const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
436  QByteArray latin;
437  if ( charset == "us-ascii" ) {
438  latin = str.toLatin1();
439  } else if ( codec ) {
440  latin = codec->fromUnicode( str );
441  } else {
442  latin = str.toLocal8Bit();
443  }
444 
445  char *l;
446  for ( l = latin.data(); *l; ++l ) {
447  if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) {
448  // *l is control character or 8-bit char
449  break;
450  }
451  }
452  if ( !*l ) {
453  return latin;
454  }
455 
456  QByteArray result = charset + "''";
457  for ( l = latin.data(); *l; ++l ) {
458  bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' );
459  if ( !needsQuoting ) {
460  const QByteArray especials = "()<>@,;:\"/[]?.= \033";
461  int len = especials.length();
462  for ( int i = 0; i < len; i++ ) {
463  if ( *l == especials[i] ) {
464  needsQuoting = true;
465  break;
466  }
467  }
468  }
469  if ( needsQuoting ) {
470  result += '%';
471  unsigned char hexcode;
472  hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
473  if ( hexcode >= 58 ) {
474  hexcode += 7;
475  }
476  result += hexcode;
477  hexcode = ( *l & 0x0F ) + 48;
478  if ( hexcode >= 58 ) {
479  hexcode += 7;
480  }
481  result += hexcode;
482  } else {
483  result += *l;
484  }
485  }
486  return result;
487 }
488 
489 
490 //-----------------------------------------------------------------------------
491 QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
492  bool forceCS )
493 {
494  int p = str.indexOf( '\'' );
495  if ( p < 0 ) {
496  return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str );
497  }
498 
499 
500  QByteArray charset = str.left( p );
501 
502  QByteArray st = str.mid( str.lastIndexOf( '\'' ) + 1 );
503 
504  char ch, ch2;
505  p = 0;
506  while ( p < (int)st.length() ) {
507  if ( st.at( p ) == 37 ) {
508  // Only try to decode the percent-encoded character if the percent sign
509  // is really followed by two other characters, see testcase at bug 163024
510  if ( p + 2 < st.length() ) {
511  ch = st.at( p + 1 ) - 48;
512  if ( ch > 16 ) {
513  ch -= 7;
514  }
515  ch2 = st.at( p + 2 ) - 48;
516  if ( ch2 > 16 ) {
517  ch2 -= 7;
518  }
519  st[p] = ch * 16 + ch2;
520  st.remove( p + 1, 2 );
521  }
522  }
523  p++;
524  }
525  kDebug() << "Got pre-decoded:" << st;
526  QString result;
527  const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
528  if ( !charsetcodec || forceCS ) {
529  charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
530  }
531 
532  usedCS = charsetcodec->name();
533  return charsetcodec->toUnicode( st );
534 }
535 
536 QString decodeRFC2231String( const QByteArray &src )
537 {
538  QByteArray usedCS;
539  return decodeRFC2231String( src, usedCS, "utf-8", false );
540 }
541 
542 QByteArray uniqueString()
543 {
544  static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
545  time_t now;
546  char p[11];
547  int pos, ran;
548  unsigned int timeval;
549 
550  p[10] = '\0';
551  now = time( 0 );
552  ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) );
553  timeval = ( now / ran ) + getpid();
554 
555  for ( int i = 0; i < 10; i++ ) {
556  pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) );
557  //kDebug() << pos;
558  p[i] = chars[pos];
559  }
560 
561  QByteArray ret;
562  ret.setNum( timeval );
563  ret += '.';
564  ret += p;
565 
566  return ret;
567 }
568 
569 QByteArray multiPartBoundary()
570 {
571  return "nextPart" + uniqueString();
572 }
573 
574 QByteArray unfoldHeader( const QByteArray &header )
575 {
576  QByteArray result;
577  if ( header.isEmpty() ) {
578  return result;
579  }
580 
581  int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
582  while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
583  foldBegin = foldEnd = foldMid;
584  // find the first space before the line-break
585  while ( foldBegin > 0 ) {
586  if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
587  break;
588  }
589  --foldBegin;
590  }
591  // find the first non-space after the line-break
592  while ( foldEnd <= header.length() - 1 ) {
593  if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
594  ++foldEnd;
595  } else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' &&
596  header[foldEnd] == '=' && foldEnd + 2 < header.length() &&
597  ( ( header[foldEnd + 1] == '0' &&
598  header[foldEnd + 2] == '9' ) ||
599  ( header[foldEnd + 1] == '2' &&
600  header[foldEnd + 2] == '0' ) ) ) {
601  // bug #86302: malformed header continuation starting with =09/=20
602  foldEnd += 3;
603  }
604  else {
605  break;
606  }
607  }
608 
609  result += header.mid( pos, foldBegin - pos );
610  if ( foldEnd < header.length() - 1 ) {
611  result += ' ';
612  }
613  pos = foldEnd;
614  }
615  const int len = header.length();
616  if ( len > pos ) {
617  result += header.mid( pos, len - pos );
618  }
619  return result;
620 }
621 
622 int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded )
623 {
624  int end = dataBegin;
625  int len = src.length() - 1;
626 
627  if ( folded ) {
628  *folded = false;
629  }
630 
631  if ( dataBegin < 0 ) {
632  // Not found
633  return -1;
634  }
635 
636  if ( dataBegin > len ) {
637  // No data available
638  return len + 1;
639  }
640 
641  // If the first line contains nothing, but the next line starts with a space
642  // or a tab, that means a stupid mail client has made the first header field line
643  // entirely empty, and has folded the rest to the next line(s).
644  if ( src.at( end ) == '\n' && end + 1 < len &&
645  ( src[end + 1] == ' ' || src[end + 1] == '\t' ) ) {
646 
647  // Skip \n and first whitespace
648  dataBegin += 2;
649  end += 2;
650  }
651 
652  if ( src.at( end ) != '\n' ) { // check if the header is not empty
653  while ( true ) {
654  end = src.indexOf( '\n', end + 1 );
655  if ( end == -1 || end == len ) {
656  // end of string
657  break;
658  } else if ( src[end + 1] == ' ' || src[end + 1] == '\t' ||
659  ( src[end + 1] == '=' && end + 3 <= len &&
660  ( ( src[end + 2] == '0' && src[end + 3] == '9' ) ||
661  ( src[end + 2] == '2' && src[end + 3] == '0' ) ) ) ) {
662  // next line is header continuation or starts with =09/=20 (bug #86302)
663  if ( folded ) {
664  *folded = true;
665  }
666  } else {
667  // end of header (no header continuation)
668  break;
669  }
670  }
671  }
672 
673  if ( end < 0 ) {
674  end = len + 1; //take the rest of the string
675  }
676  return end;
677 }
678 
679 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
680 {
681  QByteArray n = name;
682  n.append( ':' );
683  int begin = -1;
684 
685  if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
686  begin = 0;
687  } else {
688  n.prepend( '\n' );
689  const char *p = strcasestr( src.constData(), n.constData() );
690  if ( !p ) {
691  begin = -1;
692  } else {
693  begin = p - src.constData();
694  ++begin;
695  }
696  }
697 
698  if ( begin > -1 ) { //there is a header with the given name
699  dataBegin = begin + name.length() + 1; //skip the name
700  // skip the usual space after the colon
701  if ( src.at( dataBegin ) == ' ' ) {
702  ++dataBegin;
703  }
704  end = findHeaderLineEnd( src, dataBegin, folded );
705  return begin;
706 
707  } else {
708  end = -1;
709  dataBegin = -1;
710  return -1; //header not found
711  }
712 }
713 
714 QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
715 {
716  int begin, end;
717  bool folded;
718  QByteArray result;
719 
720  if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
721  return result;
722  }
723 
724  if ( begin >= 0 ) {
725  if ( !folded ) {
726  result = src.mid( begin, end - begin );
727  } else {
728  if ( end > begin ) {
729  QByteArray hdrValue = src.mid( begin, end - begin );
730  result = unfoldHeader( hdrValue );
731  }
732  }
733  }
734  return result;
735 }
736 
737 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
738 {
739  int begin, end;
740  bool folded;
741  QList<QByteArray> result;
742  QByteArray copySrc( src );
743 
744  if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
745  return result;
746  }
747 
748  while ( begin >= 0 ) {
749  if ( !folded ) {
750  result.append( copySrc.mid( begin, end - begin ) );
751  } else {
752  QByteArray hdrValue = copySrc.mid( begin, end - begin );
753  result.append( unfoldHeader( hdrValue ) );
754  }
755 
756  // get the next one, a tiny bit ugly, but we don't want the previous to be found again...
757  copySrc = copySrc.mid( end );
758  if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
759  break;
760  }
761  }
762  return result;
763 }
764 
765 void removeHeader( QByteArray &header, const QByteArray &name )
766 {
767  int begin, end, dummy;
768  begin = indexOfHeader( header, name, end, dummy );
769  if ( begin >= 0 ) {
770  header.remove( begin, end - begin + 1 );
771  }
772 }
773 
774 QByteArray CRLFtoLF( const QByteArray &s )
775 {
776  QByteArray ret = s;
777  ret.replace( "\r\n", "\n" );
778  return ret;
779 }
780 
781 QByteArray CRLFtoLF( const char *s )
782 {
783  QByteArray ret = s;
784  return CRLFtoLF( ret );
785 }
786 
787 QByteArray LFtoCRLF( const QByteArray &s )
788 {
789  QByteArray ret = s;
790  ret.replace( '\n', "\r\n" );
791  return ret;
792 }
793 
794 QByteArray LFtoCRLF( const char *s )
795 {
796  QByteArray ret = s;
797  return LFtoCRLF( ret );
798 }
799 
800 namespace {
801 template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str )
802 {
803  bool inQuote = false;
804  for ( int i = 0; i < str.length(); ++i ) {
805  if ( str[i] == CharType( '"' ) ) {
806  str.remove( i, 1 );
807  i--;
808  inQuote = !inQuote;
809  } else {
810  if ( inQuote && ( str[i] == CharType( '\\' ) ) ) {
811  str.remove( i, 1 );
812  }
813  }
814  }
815 }
816 }
817 
818 void removeQuots( QByteArray &str )
819 {
820  removeQuotesGeneric<QByteArray, char>( str );
821 }
822 
823 void removeQuots( QString &str )
824 {
825  removeQuotesGeneric<QString, QLatin1Char>( str );
826 }
827 
828 template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString>
829 void addQuotes_impl( StringType &str, bool forceQuotes )
830 {
831  bool needsQuotes=false;
832  for ( int i=0; i < str.length(); i++ ) {
833  const CharType cur = str.at( i );
834  if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String( "\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
835  needsQuotes = true;
836  }
837  if ( cur == CharConverterType( '\\' ) || cur == CharConverterType( '\"' ) ) {
838  str.insert( i, CharConverterType( '\\' ) );
839  i++;
840  }
841  }
842 
843  if ( needsQuotes || forceQuotes ) {
844  str.insert( 0, CharConverterType( '\"' ) );
845  str.append( StringConverterType( "\"" ) );
846  }
847 }
848 
849 void addQuotes( QByteArray &str, bool forceQuotes )
850 {
851  addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
852 }
853 
854 void addQuotes( QString &str, bool forceQuotes )
855 {
856  addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
857 }
858 
859 KMIME_EXPORT QString balanceBidiState( const QString &input )
860 {
861  const int LRO = 0x202D;
862  const int RLO = 0x202E;
863  const int LRE = 0x202A;
864  const int RLE = 0x202B;
865  const int PDF = 0x202C;
866 
867  QString result = input;
868 
869  int openDirChangers = 0;
870  int numPDFsRemoved = 0;
871  for ( int i = 0; i < input.length(); i++ ) {
872  const ushort &code = input.at( i ).unicode();
873  if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
874  openDirChangers++;
875  } else if ( code == PDF ) {
876  if ( openDirChangers > 0 ) {
877  openDirChangers--;
878  } else {
879  // One PDF too much, remove it
880  kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
881  result.remove( i - numPDFsRemoved, 1 );
882  numPDFsRemoved++;
883  }
884  }
885  }
886 
887  if ( openDirChangers > 0 ) {
888  kWarning() << "Possible Unicode spoofing detected in" << input;
889 
890  // At PDF chars to the end until the correct state is restored.
891  // As a special exception, when encountering quoted strings, place the PDF before
892  // the last quote.
893  for ( int i = openDirChangers; i > 0; i-- ) {
894  if ( result.endsWith( QLatin1Char( '"' ) ) ) {
895  result.insert( result.length() - 1, QChar( PDF ) );
896  } else {
897  result += QChar( PDF );
898  }
899  }
900  }
901 
902  return result;
903 }
904 
905 QString removeBidiControlChars( const QString &input )
906 {
907  const int LRO = 0x202D;
908  const int RLO = 0x202E;
909  const int LRE = 0x202A;
910  const int RLE = 0x202B;
911  QString result = input;
912  result.remove( LRO );
913  result.remove( RLO );
914  result.remove( LRE );
915  result.remove( RLE );
916  return result;
917 }
918 
919 static bool isCryptoPart( Content* content )
920 {
921  if ( !content->contentType( false ) ) {
922  return false;
923  }
924 
925  if ( content->contentType()->subType().toLower() == "octet-stream" &&
926  !content->contentDisposition( false ) ) {
927  return false;
928  }
929 
930  const Headers::ContentType *contentType = content->contentType();
931  const QByteArray lowerSubType = contentType->subType().toLower();
932  return ( contentType->mediaType().toLower() == "application" &&
933  ( lowerSubType == "pgp-encrypted" ||
934  lowerSubType == "pgp-signature" ||
935  lowerSubType == "pkcs7-mime" ||
936  lowerSubType == "pkcs7-signature" ||
937  lowerSubType == "x-pkcs7-signature" ||
938  ( lowerSubType == "octet-stream" &&
939  content->contentDisposition()->filename().toLower() == QLatin1String( "msg.asc" ) ) ) );
940 }
941 
942 bool hasAttachment( Content* content )
943 {
944  if ( !content ) {
945  return false;
946  }
947 
948  bool emptyFilename = true;
949  if ( content->contentDisposition( false ) &&
950  !content->contentDisposition()->filename().isEmpty() ) {
951  emptyFilename = false;
952  }
953 
954  if ( emptyFilename &&
955  content->contentType( false ) &&
956  !content->contentType()->name().isEmpty() ) {
957  emptyFilename = false;
958  }
959 
960  // ignore crypto parts
961  if ( !emptyFilename && !isCryptoPart( content ) ) {
962  return true;
963  }
964 
965  // Ok, content itself is not an attachment. now we deal with multiparts
966  if ( content->contentType()->isMultipart() ) {
967  Q_FOREACH ( Content *child, content->contents() ) {
968  if ( hasAttachment( child ) ) {
969  return true;
970  }
971  }
972  }
973  return false;
974 }
975 
976 bool isSigned( Message *message )
977 {
978  if ( !message ) {
979  return false;
980  }
981 
982  const KMime::Headers::ContentType* const contentType = message->contentType();
983  if ( contentType->isSubtype( "signed" ) ||
984  contentType->isSubtype( "pgp-signature" ) ||
985  contentType->isSubtype( "pkcs7-signature" ) ||
986  contentType->isSubtype( "x-pkcs7-signature" ) ||
987  message->mainBodyPart( "multipart/signed" ) ||
988  message->mainBodyPart( "application/pgp-signature" ) ||
989  message->mainBodyPart( "application/pkcs7-signature" ) ||
990  message->mainBodyPart( "application/x-pkcs7-signature" ) ) {
991  return true;
992  }
993  return false;
994 }
995 
996 bool isEncrypted( Message *message )
997 {
998  if ( !message ) {
999  return false;
1000  }
1001 
1002  const KMime::Headers::ContentType* const contentType = message->contentType();
1003  if ( contentType->isSubtype( "encrypted" ) ||
1004  contentType->isSubtype( "pgp-encrypted" ) ||
1005  contentType->isSubtype( "pkcs7-mime" ) ||
1006  message->mainBodyPart( "multipart/encrypted" ) ||
1007  message->mainBodyPart( "application/pgp-encrypted" ) ||
1008  message->mainBodyPart( "application/pkcs7-mime" ) ) {
1009  return true;
1010  }
1011 
1012  return false;
1013 }
1014 
1015 bool isInvitation( Content *content )
1016 {
1017  if ( !content ) {
1018  return false;
1019  }
1020 
1021  const KMime::Headers::ContentType* const contentType = content->contentType( false );
1022 
1023  if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) ) {
1024  return true;
1025  }
1026 
1027  return false;
1028 }
1029 
1030 } // namespace KMime
kmime_codecs.h
This file is part of the API for handling MIME data and defines the Codec class.
KMime::CharFreq::EightBitData
8bit binary
Definition: kmime_charfreq.h:103
KMime::CharFreq::SevenBitData
7bit binary
Definition: kmime_charfreq.h:105
KMime::CharFreq::EightBitText
8bit text
Definition: kmime_charfreq.h:106
KMime::Headers::ContentType::subType
QByteArray subType() const
Returns the mime sub-type (second part of the mimetype).
Definition: kmime_headers.cpp:1760
KMime::Headers::ContentType::isMultipart
bool isMultipart() const
Returns true if the associated MIME entity is a mulitpart container.
Definition: kmime_headers.cpp:1824
KMime::Content::contents
List contents() const
For multipart contents, this will return a list of all multipart child contents.
Definition: kmime_content.cpp:526
KMime::Message::mainBodyPart
Content * mainBodyPart(const QByteArray &type=QByteArray())
Returns the first main body part of a given type, taking multipart/mixed and multipart/alternative no...
Definition: kmime_message.cpp:103
KMime::Content::contentDisposition
Headers::ContentDisposition * contentDisposition(bool create=true)
Returns the Content-Disposition header.
KMime::Headers::ContentType::isSubtype
bool isSubtype(const char *subtype) const
Tests if the mime sub-type equals subtype.
Definition: kmime_headers.cpp:1792
KMime::CharFreq::None
Unknown.
Definition: kmime_charfreq.h:102
kmime_charfreq.h
This file is part of the API for handling MIME data and defines the CharFreq class.
KMime::Content::contentType
Headers::ContentType * contentType(bool create=true)
Returns the Content-Type header.
KMime::Headers::ContentType::name
QString name() const
Returns the name of the associated MIME entity.
Definition: kmime_headers.cpp:1859
KMime::Message
Represents a (email) message.
Definition: kmime_message.h:81
KMime::Headers::ContentType::isMediatype
bool isMediatype(const char *mediatype) const
Tests if the media type equals mediatype.
Definition: kmime_headers.cpp:1784
KMime::Content
A class that encapsulates MIME encoded Content.
Definition: kmime_content.h:112
KMime::Headers::ContentDisposition::filename
QString filename() const
Returns the suggested filename for the associated MIME part.
Definition: kmime_headers.cpp:2201
KMime::Headers::ContentType::mediaType
QByteArray mediaType() const
Returns the media type (first part of the mimetype).
Definition: kmime_headers.cpp:1749
KMime::CharFreq
A class for performing basic data typing using frequency count heuristics.
Definition: kmime_charfreq.h:78
KMime::Headers::ContentType
Represents a "Content-Type" header.
Definition: kmime_headers.h:1031
KMime::CharFreq::SevenBitText
7bit text
Definition: kmime_charfreq.h:107
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 23:00:11 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • Related Pages

kdepimlibs API Reference

Skip menu "kdepimlibs API Reference"
  • akonadi
  •   contact
  •   kmime
  •   socialutils
  • kabc
  • kalarmcal
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kldap
  • kmbox
  • kmime
  • kpimidentities
  • kpimtextedit
  • kresources
  • ktnef
  • kxmlrpcclient
  • microblog

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal