KMime

kmime_util.cpp
1 /*
2  kmime_util.cpp
3 
4  KMime, the KDE Internet mail/usenet news message library.
5  SPDX-FileCopyrightText: 2001 the KMime authors.
6  See file AUTHORS for details
7 
8  SPDX-License-Identifier: LGPL-2.0-or-later
9 */
10 
11 #include "kmime_util.h"
12 #include "kmime_util_p.h"
13 
14 #include "kmime_charfreq.h"
15 #include "kmime_debug.h"
16 #include "kmime_header_parsing.h"
17 #include "kmime_message.h"
18 #include "kmime_warning.h"
19 
20 #include <config-kmime.h>
21 // #include <kdefakes.h> // for strcasestr
22 
23 #include <KCharsets>
24 #include <QCoreApplication>
25 
26 
27 #include <ctype.h>
28 #include <time.h>
29 #include <stdlib.h>
30 
31 using namespace KMime;
32 
33 namespace KMime
34 {
35 
36 QVector<QByteArray> c_harsetCache;
37 bool u_seOutlookEncoding = false;
38 
39 QByteArray cachedCharset(const QByteArray &name)
40 {
41  for (const QByteArray &charset : std::as_const(c_harsetCache)) {
42  if (qstricmp(name.data(), charset.data()) == 0) {
43  return charset;
44  }
45  }
46 
47  c_harsetCache.append(name.toUpper());
48  //qCDebug(KMIME_LOG) << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
49  return c_harsetCache.last();
50 }
51 
52 bool isUsAscii(const QString &s)
53 {
54  const uint sLength = s.length();
55  for (uint i = 0; i < sLength; i++) {
56  if (s.at(i).toLatin1() <= 0) { // c==0: non-latin1, c<0: non-us-ascii
57  return false;
58  }
59  }
60  return true;
61 }
62 
63 QString nameForEncoding(Headers::contentEncoding enc)
64 {
65  switch (enc) {
66  case Headers::CE7Bit: return QStringLiteral("7bit");
67  case Headers::CE8Bit: return QStringLiteral("8bit");
68  case Headers::CEquPr: return QStringLiteral("quoted-printable");
69  case Headers::CEbase64: return QStringLiteral("base64");
70  case Headers::CEuuenc: return QStringLiteral("uuencode");
71  case Headers::CEbinary: return QStringLiteral("binary");
72  default: return QStringLiteral("unknown");
73  }
74 }
75 
76 QVector<Headers::contentEncoding> encodingsForData(const QByteArray &data)
77 {
79  CharFreq cf(data);
80 
81  switch (cf.type()) {
83  allowed << Headers::CE7Bit;
84  Q_FALLTHROUGH();
86  allowed << Headers::CE8Bit;
87  Q_FALLTHROUGH();
89  if (cf.printableRatio() > 5.0 / 6.0) {
90  // let n the length of data and p the number of printable chars.
91  // Then base64 \approx 4n/3; qp \approx p + 3(n-p)
92  // => qp < base64 iff p > 5n/6.
93  allowed << Headers::CEquPr;
94  allowed << Headers::CEbase64;
95  } else {
96  allowed << Headers::CEbase64;
97  allowed << Headers::CEquPr;
98  }
99  break;
101  allowed << Headers::CEbase64;
102  break;
103  case CharFreq::None:
104  default:
105  Q_ASSERT(false);
106  }
107 
108  return allowed;
109 }
110 
111 // all except specials, CTLs, SPACE.
112 const uchar aTextMap[16] = {
113  0x00, 0x00, 0x00, 0x00,
114  0x5F, 0x35, 0xFF, 0xC5,
115  0x7F, 0xFF, 0xFF, 0xE3,
116  0xFF, 0xFF, 0xFF, 0xFE
117 };
118 
119 // all except tspecials, CTLs, SPACE.
120 const uchar tTextMap[16] = {
121  0x00, 0x00, 0x00, 0x00,
122  0x5F, 0x36, 0xFF, 0xC0,
123  0x7F, 0xFF, 0xFF, 0xE3,
124  0xFF, 0xFF, 0xFF, 0xFE
125 };
126 
127 void setUseOutlookAttachmentEncoding(bool violateStandard)
128 {
129  u_seOutlookEncoding = violateStandard;
130 }
131 
132 bool useOutlookAttachmentEncoding()
133 {
134  return u_seOutlookEncoding;
135 }
136 
137 QByteArray uniqueString()
138 {
139  static const char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
140  time_t now;
141  char p[11];
142  int ran;
143  unsigned int timeval;
144 
145  p[10] = '\0';
146  now = time(nullptr);
147  ran = 1 + (int)(1000.0 * rand() / (RAND_MAX + 1.0));
148  timeval = (now / ran) + QCoreApplication::applicationPid();
149 
150  for (int i = 0; i < 10; i++) {
151  int pos = (int)(61.0 * rand() / (RAND_MAX + 1.0));
152  //qCDebug(KMIME_LOG) << pos;
153  p[i] = chars[pos];
154  }
155 
156  QByteArray ret;
157  ret.setNum(timeval);
158  ret += '.';
159  ret += p;
160 
161  return ret;
162 }
163 
164 QByteArray multiPartBoundary()
165 {
166  return "nextPart" + uniqueString();
167 }
168 
169 QByteArray unfoldHeader(const char *header, size_t headerSize)
170 {
171  QByteArray result;
172  if (headerSize == 0) {
173  return result;
174  }
175 
176  // unfolding skips characters so result will be at worst headerSize long
177  result.reserve(headerSize);
178 
179  const char *end = header + headerSize;
180  const char *pos = header;
181  const char *foldBegin = nullptr;
182  const char *foldMid = nullptr;
183  const char *foldEnd = nullptr;
184  while ((foldMid = strchr(pos, '\n')) && foldMid < end) {
185  foldBegin = foldEnd = foldMid;
186  // find the first space before the line-break
187  while (foldBegin) {
188  if (!QChar::isSpace(*(foldBegin - 1))) {
189  break;
190  }
191  --foldBegin;
192  }
193  // find the first non-space after the line-break
194  while (foldEnd <= end - 1) {
195  if (QChar::isSpace(*foldEnd)) {
196  ++foldEnd;
197  } else if (foldEnd && *(foldEnd - 1) == '\n' &&
198  *foldEnd == '=' && foldEnd + 2 < (header + headerSize - 1) &&
199  ((*(foldEnd + 1) == '0' &&
200  *(foldEnd + 2) == '9') ||
201  (*(foldEnd + 1) == '2' &&
202  *(foldEnd + 2) == '0'))) {
203  // bug #86302: malformed header continuation starting with =09/=20
204  foldEnd += 3;
205  } else {
206  break;
207  }
208  }
209 
210  result.append(pos, foldBegin - pos);
211  if (foldEnd < end - 1) {
212  result += ' ';
213  }
214  pos = foldEnd;
215  }
216  if (end > pos) {
217  result.append(pos, end - pos);
218  }
219  return result;
220 }
221 
222 QByteArray unfoldHeader(const QByteArray &header)
223 {
224  return unfoldHeader(header.constData(), header.size());
225 }
226 
227 int findHeaderLineEnd(const QByteArray &src, int &dataBegin, bool *folded)
228 {
229  int end = dataBegin;
230  int len = src.length() - 1;
231 
232  if (folded) {
233  *folded = false;
234  }
235 
236  if (dataBegin < 0) {
237  // Not found
238  return -1;
239  }
240 
241  if (dataBegin > len) {
242  // No data available
243  return len + 1;
244  }
245 
246  // If the first line contains nothing, but the next line starts with a space
247  // or a tab, that means a stupid mail client has made the first header field line
248  // entirely empty, and has folded the rest to the next line(s).
249  if (src.at(end) == '\n' && end + 1 < len &&
250  (src[end + 1] == ' ' || src[end + 1] == '\t')) {
251 
252  // Skip \n and first whitespace
253  dataBegin += 2;
254  end += 2;
255  }
256 
257  if (src.at(end) != '\n') { // check if the header is not empty
258  while (true) {
259  end = src.indexOf('\n', end + 1);
260  if (end == -1 || end == len) {
261  // end of string
262  break;
263  } else if (src[end + 1] == ' ' || src[end + 1] == '\t' ||
264  (src[end + 1] == '=' && end + 3 <= len &&
265  ((src[end + 2] == '0' && src[end + 3] == '9') ||
266  (src[end + 2] == '2' && src[end + 3] == '0')))) {
267  // next line is header continuation or starts with =09/=20 (bug #86302)
268  if (folded) {
269  *folded = true;
270  }
271  } else {
272  // end of header (no header continuation)
273  break;
274  }
275  }
276  }
277 
278  if (end < 0) {
279  end = len + 1; //take the rest of the string
280  }
281  return end;
282 }
283 
284 #ifndef HAVE_STRCASESTR
285 #ifdef WIN32
286 #define strncasecmp _strnicmp
287 #endif
288 static const char *strcasestr(const char *haystack, const char *needle)
289 {
290  /* Copied from libreplace as part of qtwebengine 5.5.1 */
291  const char *s;
292  size_t nlen = strlen(needle);
293  for (s = haystack; *s; s++) {
294  if (toupper(*needle) == toupper(*s) && strncasecmp(s, needle, nlen) == 0) {
295  return (char *)((uintptr_t)s);
296  }
297  }
298  return NULL;
299 }
300 #endif
301 
302 int indexOfHeader(const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded)
303 {
304  QByteArray n = name;
305  n.append(':');
306  int begin = -1;
307 
308  if (qstrnicmp(n.constData(), src.constData(), n.length()) == 0) {
309  begin = 0;
310  } else {
311  n.prepend('\n');
312  const char *p = strcasestr(src.constData(), n.constData());
313  if (!p) {
314  begin = -1;
315  } else {
316  begin = p - src.constData();
317  ++begin;
318  }
319  }
320 
321  if (begin > -1) { //there is a header with the given name
322  dataBegin = begin + name.length() + 1; //skip the name
323  // skip the usual space after the colon
324  if (dataBegin < src.length() && src.at(dataBegin) == ' ') {
325  ++dataBegin;
326  }
327  end = findHeaderLineEnd(src, dataBegin, folded);
328  return begin;
329 
330  } else {
331  end = -1;
332  dataBegin = -1;
333  return -1; //header not found
334  }
335 }
336 
337 QByteArray extractHeader(const QByteArray &src, const QByteArray &name)
338 {
339  int begin;
340  int end;
341  bool folded;
342  QByteArray result;
343 
344  if (src.isEmpty() || indexOfHeader(src, name, end, begin, &folded) < 0) {
345  return result;
346  }
347 
348  if (begin >= 0) {
349  if (!folded) {
350  result = src.mid(begin, end - begin);
351  } else {
352  if (end > begin) {
353  result = unfoldHeader(src.constData() + begin, end - begin);
354  }
355  }
356  }
357  return result;
358 }
359 
360 QByteArray CRLFtoLF(const QByteArray &s)
361 {
362  if (!s.contains("\r\n")) {
363  return s;
364  }
365 
366  QByteArray ret = s;
367  ret.replace("\r\n", "\n");
368  return ret;
369 }
370 
371 QByteArray CRLFtoLF(const char *s)
372 {
373  QByteArray ret = s;
374  return CRLFtoLF(ret);
375 }
376 
377 QByteArray LFtoCRLF(const QByteArray &s)
378 {
379  const int firstNewline = s.indexOf('\n');
380  if (firstNewline == -1) {
381  return s;
382  }
383  if (firstNewline > 0 && s.at(firstNewline - 1) == '\r') {
384  // We found \r\n already, don't change anything
385  // This check assumes that input is consistent in terms of newlines,
386  // but so did if (s.contains("\r\n")), too.
387  return s;
388  }
389 
390  QByteArray ret = s;
391  ret.replace('\n', "\r\n");
392  return ret;
393 }
394 
395 QByteArray LFtoCRLF(const char *s)
396 {
397  QByteArray ret = s;
398  return LFtoCRLF(ret);
399 }
400 
401 QByteArray CRtoLF(const QByteArray &s)
402 {
403  const int firstNewline = s.indexOf('\r');
404  if (firstNewline == -1) {
405  return s;
406  }
407  if (firstNewline > 0 && (s.length() > firstNewline + 1) && s.at(firstNewline + 1) == '\n') {
408  // We found \r\n already, don't change anything
409  // This check assumes that input is consistent in terms of newlines,
410  // but so did if (s.contains("\r\n")), too.
411  return s;
412  }
413 
414  QByteArray ret = s;
415  ret.replace('\r', '\n');
416  return ret;
417 }
418 
419 QByteArray CRtoLF(const char *s)
420 {
421  const QByteArray ret = s;
422  return CRtoLF(ret);
423 }
424 
425 namespace
426 {
427 template < typename StringType, typename CharType > void removeQuotesGeneric(StringType &str)
428 {
429  bool inQuote = false;
430  for (int i = 0; i < str.length(); ++i) {
431  if (str[i] == CharType('"')) {
432  str.remove(i, 1);
433  i--;
434  inQuote = !inQuote;
435  } else {
436  if (inQuote && (str[i] == CharType('\\'))) {
437  str.remove(i, 1);
438  }
439  }
440  }
441 }
442 }
443 
444 void removeQuotes(QByteArray &str)
445 {
446  removeQuotesGeneric<QByteArray, char>(str);
447 }
448 
449 void removeQuotes(QString &str)
450 {
451  removeQuotesGeneric<QString, QLatin1Char>(str);
452 }
453 
454 template<class StringType, class CharType, class CharConverterType, class StringConverterType, class ToString>
455 void addQuotes_impl(StringType &str, bool forceQuotes)
456 {
457  bool needsQuotes = false;
458  for (int i = 0; i < str.length(); i++) {
459  const CharType cur = str.at(i);
460  if (QString(ToString(str)).contains(QRegExp(QStringLiteral("\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(")))) {
461  needsQuotes = true;
462  }
463  if (cur == CharConverterType('\\') || cur == CharConverterType('\"')) {
464  str.insert(i, CharConverterType('\\'));
465  i++;
466  }
467  }
468 
469  if (needsQuotes || forceQuotes) {
470  str.insert(0, CharConverterType('\"'));
471  str.append(StringConverterType("\""));
472  }
473 }
474 
475 void addQuotes(QByteArray &str, bool forceQuotes)
476 {
477  addQuotes_impl<QByteArray, char, char, char *, QLatin1String>(str, forceQuotes);
478 }
479 
480 void addQuotes(QString &str, bool forceQuotes)
481 {
482  addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>(str, forceQuotes);
483 }
484 
485 KMIME_EXPORT QString balanceBidiState(const QString &input)
486 {
487  const int LRO = 0x202D;
488  const int RLO = 0x202E;
489  const int LRE = 0x202A;
490  const int RLE = 0x202B;
491  const int PDF = 0x202C;
492 
493  QString result = input;
494 
495  int openDirChangers = 0;
496  int numPDFsRemoved = 0;
497  for (int i = 0; i < input.length(); i++) {
498  const ushort &code = input.at(i).unicode();
499  if (code == LRO || code == RLO || code == LRE || code == RLE) {
500  openDirChangers++;
501  } else if (code == PDF) {
502  if (openDirChangers > 0) {
503  openDirChangers--;
504  } else {
505  // One PDF too much, remove it
506  qCWarning(KMIME_LOG) << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
507  result.remove(i - numPDFsRemoved, 1);
508  numPDFsRemoved++;
509  }
510  }
511  }
512 
513  if (openDirChangers > 0) {
514  qCWarning(KMIME_LOG) << "Possible Unicode spoofing detected in" << input;
515 
516  // At PDF chars to the end until the correct state is restored.
517  // As a special exception, when encountering quoted strings, place the PDF before
518  // the last quote.
519  for (int i = openDirChangers; i > 0; i--) {
520  if (result.endsWith(QLatin1Char('"'))) {
521  result.insert(result.length() - 1, QChar(PDF));
522  } else {
523  result += QChar(PDF);
524  }
525  }
526  }
527 
528  return result;
529 }
530 
531 QString removeBidiControlChars(const QString &input)
532 {
533  const int LRO = 0x202D;
534  const int RLO = 0x202E;
535  const int LRE = 0x202A;
536  const int RLE = 0x202B;
537  QString result = input;
538  result.remove(LRO);
539  result.remove(RLO);
540  result.remove(LRE);
541  result.remove(RLE);
542  return result;
543 }
544 
545 bool isCryptoPart(Content *content)
546 {
547  auto ct = content->contentType(false);
548  if (!ct || !ct->isMediatype("application")) {
549  return false;
550  }
551 
552  const QByteArray lowerSubType = ct->subType().toLower();
553  if (lowerSubType == "pgp-encrypted" ||
554  lowerSubType == "pgp-signature" ||
555  lowerSubType == "pkcs7-mime" ||
556  lowerSubType == "x-pkcs7-mime" ||
557  lowerSubType == "pkcs7-signature" ||
558  lowerSubType == "x-pkcs7-signature") {
559  return true;
560  }
561 
562  if (lowerSubType == "octet-stream") {
563  auto cd = content->contentDisposition(false);
564  if (!cd) {
565  return false;
566  }
567  const auto fileName = cd->filename().toLower();
568  return fileName == QLatin1String("msg.asc") || fileName == QLatin1String("encrypted.asc");
569  }
570 
571  return false;
572 }
573 
574 bool isAttachment(Content* content)
575 {
576  if (!content) {
577  return false;
578  }
579 
580  const auto contentType = content->contentType(false);
581  // multipart/* is never an attachment itself, message/rfc822 always is
582  if (contentType) {
583  if (contentType->isMultipart()) {
584  return false;
585  }
586  if (contentType->isMimeType("message/rfc822")) {
587  return true;
588  }
589  }
590 
591  // the main body part is not an attachment
592  if (content->parent()) {
593  const auto top = content->topLevel();
594  if (content == top->textContent()) {
595  return false;
596  }
597  }
598 
599  // ignore crypto parts
600  if (isCryptoPart(content)) {
601  return false;
602  }
603 
604  // content type or content disposition having a file name set looks like an attachment
605  const auto contentDisposition = content->contentDisposition(false);
606  if (contentDisposition && !contentDisposition->filename().isEmpty()) {
607  return true;
608  }
609 
610  if (contentType && !contentType->name().isEmpty()) {
611  return true;
612  }
613 
614  // "attachment" content disposition is otherwise a good indicator though
615  if (contentDisposition && contentDisposition->disposition() == Headers::CDattachment) {
616  return true;
617  }
618 
619  return false;
620 }
621 
622 bool hasAttachment(Content *content)
623 {
624  if (!content) {
625  return false;
626  }
627 
628  if (isAttachment(content)) {
629  return true;
630  }
631 
632  // Ok, content itself is not an attachment. now we deal with multiparts
633  auto ct = content->contentType(false);
634  if (ct && ct->isMultipart() && !ct->isSubtype("related")) {// && !ct->isSubtype("alternative")) {
635  const auto contents = content->contents();
636  for (Content *child : contents) {
637  if (hasAttachment(child)) {
638  return true;
639  }
640  }
641  }
642  return false;
643 }
644 
645 bool hasInvitation(Content *content)
646 {
647  if (!content) {
648  return false;
649  }
650 
651  if (isInvitation(content)) {
652  return true;
653  }
654 
655  // Ok, content itself is not an invitation. now we deal with multiparts
656  if (content->contentType()->isMultipart()) {
657  const auto contents = content->contents();
658  for (Content *child : contents) {
659  if (hasInvitation(child)) {
660  return true;
661  }
662  }
663  }
664  return false;
665 }
666 
667 bool isSigned(Message *message)
668 {
669  if (!message) {
670  return false;
671  }
672 
673  const KMime::Headers::ContentType *const contentType = message->contentType();
674  if (contentType->isSubtype("signed") ||
675  contentType->isSubtype("pgp-signature") ||
676  contentType->isSubtype("pkcs7-signature") ||
677  contentType->isSubtype("x-pkcs7-signature") ||
678  message->mainBodyPart("multipart/signed") ||
679  message->mainBodyPart("application/pgp-signature") ||
680  message->mainBodyPart("application/pkcs7-signature") ||
681  message->mainBodyPart("application/x-pkcs7-signature")) {
682  return true;
683  }
684  return false;
685 }
686 
687 bool isEncrypted(Message *message)
688 {
689  if (!message) {
690  return false;
691  }
692 
693  const KMime::Headers::ContentType *const contentType = message->contentType();
694  if (contentType->isSubtype("encrypted") ||
695  contentType->isSubtype("pgp-encrypted") ||
696  contentType->isSubtype("pkcs7-mime") ||
697  contentType->isSubtype("x-pkcs7-mime") ||
698  message->mainBodyPart("multipart/encrypted") ||
699  message->mainBodyPart("application/pgp-encrypted") ||
700  message->mainBodyPart("application/pkcs7-mime") ||
701  message->mainBodyPart("application/x-pkcs7-mime")) {
702  return true;
703  }
704 
705  return false;
706 }
707 
708 bool isInvitation(Content *content)
709 {
710  if (!content) {
711  return false;
712  }
713 
714  const KMime::Headers::ContentType *const contentType = content->contentType(false);
715 
716  if (contentType && contentType->isMediatype("text") && contentType->isSubtype("calendar")) {
717  return true;
718  }
719 
720  return false;
721 }
722 
723 } // namespace KMime
bool isMultipart() const
Returns true if the associated MIME entity is a multipart container.
QByteArray toLower() const const
void append(const T &value)
void reserve(int size)
char at(int i) const const
QByteArray & setNum(short n, int base)
T & last()
QByteArray toUpper() const const
bool isEmpty() const const
Content * mainBodyPart(const QByteArray &type=QByteArray())
Returns the first main body part of a given type, taking multipart/mixed and multipart/alternative no...
Headers::ContentDisposition * contentDisposition(bool create=true)
Returns the Content-Disposition header.
int length() const const
QString & remove(int position, int n)
bool isSubtype(const char *subtype) const
Tests if the mime sub-type equals subtype.
const QList< QKeySequence > & begin()
int indexOf(char ch, int from) const const
contentDisposition
Various possible values for the "Content-Disposition" header.
Definition: kmime_headers.h:71
This file is part of the API for handling MIME data and defines the CharFreq class.
QVector< Content * > contents() const
For multipart contents, this will return a list of all multipart child contents.
bool isSpace() const const
QString & insert(int position, QChar ch)
QByteArray & prepend(char ch)
const char * constData() const const
QByteArray & replace(int pos, int len, const char *after)
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const const
qint64 applicationPid()
QByteArray mid(int pos, int len) const const
ushort unicode() const const
Headers::ContentType * contentType(bool create=true)
Returns the Content-Type header.
QByteArray & append(char ch)
QString toLower() const const
char toLatin1() const const
Content * topLevel() const
Returns the toplevel content object, 0 if there is no such object.
const QList< QKeySequence > & end()
Represents a (email) message.
Definition: kmime_message.h:66
Content * textContent()
Returns the first Content with mimetype text/.
const QChar at(int position) const const
bool isMediatype(const char *mediatype) const
Tests if the media type equals mediatype.
bool contains(char ch) const const
int length() const const
A class that encapsulates MIME encoded Content.
Definition: kmime_content.h:98
char * data()
Content * parent() const
Returns the parent content object, or 0 if the content doesn&#39;t have a parent.
QString filename() const
Returns the suggested filename for the associated MIME part.
A class for performing basic data typing using frequency count heuristics.
int size() const const
Represents a "Content-Type" header.
contentEncoding
Various possible values for the "Content-Transfer-Encoding" header.
Definition: kmime_headers.h:59
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Mon Sep 27 2021 23:15:57 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.