KMime

kmime_parsers.cpp
1 /*
2  kmime_parsers.cpp
3 
4  KMime, the KDE Internet mail/usenet news message library.
5  SPDX-FileCopyrightText: 2001 the KMime authors.
6  See file AUTHORS for details
7 
8  SPDX-License-Identifier: LGPL-2.0-or-later
9 */
10 #include "kmime_parsers.h"
11 
12 #include <QRegularExpression>
13 
14 using namespace KMime::Parser;
15 
16 namespace KMime
17 {
18 namespace Parser
19 {
20 
21 MultiPart::MultiPart(const QByteArray &src, const QByteArray &boundary)
22  : m_src(src)
23  , m_boundary(boundary)
24 {
25 }
26 
27 bool MultiPart::parse()
28 {
29  QByteArray b = "--" + m_boundary;
30  QByteArray part;
31  int pos1 = 0;
32  int pos2 = 0;
33  int blen = b.length();
34 
35  m_parts.clear();
36 
37  //find the first valid boundary
38  while (true) {
39  if ((pos1 = m_src.indexOf(b, pos1)) == -1 || pos1 == 0 ||
40  m_src[pos1 - 1] == '\n') { //valid boundary found or no boundary at all
41  break;
42  }
43  pos1 += blen; //boundary found but not valid => skip it;
44  }
45 
46  if (pos1 > -1) {
47  pos1 += blen;
48  if (m_src[pos1] == '-' && m_src[pos1 + 1] == '-') {
49  // the only valid boundary is the end-boundary
50  // this message is *really* broken
51  pos1 = -1; //we give up
52  } else if ((pos1 - blen) > 1) { //preamble present
53  m_preamble = m_src.left(pos1 - blen - 1);
54  }
55  }
56 
57  while (pos1 > -1 && pos2 > -1) {
58 
59  //skip the rest of the line for the first boundary - the message-part starts here
60  if ((pos1 = m_src.indexOf('\n', pos1)) > -1) {
61  //now search the next linebreak
62  //now find the next valid boundary
63  pos2 = ++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
64  while (true) {
65  if ((pos2 = m_src.indexOf(b, pos2)) == -1 ||
66  m_src[pos2 - 1] == '\n') { //valid boundary or no more boundaries found
67  break;
68  }
69  pos2 += blen; //boundary is invalid => skip it;
70  }
71 
72  if (pos2 == -1) { // no more boundaries found
73  part = m_src.mid(pos1, m_src.length() - pos1); //take the rest of the string
74  m_parts.append(part);
75  pos1 = -1;
76  pos2 = -1; //break;
77  } else {
78  part = m_src.mid(pos1, pos2 - pos1 - 1); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
79  m_parts.append(part);
80  pos2 += blen; //pos2 points now to the first character after the boundary
81  if (m_src[pos2] == '-' && m_src[pos2 + 1] == '-') { //end-boundary
82  pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary
83 
84  if ((pos1 = m_src.indexOf('\n', pos1)) > -1) { //skip the rest of this line
85  //everything after the end-boundary is considered as the epilouge
86  m_epilouge = m_src.mid(pos1 + 1, m_src.length() - pos1 - 1);
87  }
88  pos1 = -1;
89  pos2 = -1; //break
90  } else {
91  pos1 = pos2; //the search continues ...
92  }
93  }
94  }
95  }
96 
97  return !m_parts.isEmpty();
98 }
99 
100 //=============================================================================
101 
102 NonMimeParser::NonMimeParser(const QByteArray &src) :
103  m_src(src), m_partNr(-1), m_totalNr(-1)
104 {
105 }
106 
107 NonMimeParser::~NonMimeParser() {}
108 
109 /**
110  * try to guess the mimetype from the file-extension
111  */
112 
113 QByteArray NonMimeParser::guessMimeType(const QByteArray &fileName)
114 {
115  QByteArray tmp;
117 
118  if (!fileName.isEmpty()) {
119  int pos = fileName.lastIndexOf('.');
120  if (pos++ != -1) {
121  tmp = fileName.mid(pos, fileName.length() - pos).toUpper();
122  if (tmp == "JPG" || tmp == "JPEG") {
123  mimeType = QByteArrayLiteral("image/jpeg");
124  } else if (tmp == "GIF") {
125  mimeType = QByteArrayLiteral("image/gif");
126  } else if (tmp == "PNG") {
127  mimeType = QByteArrayLiteral("image/png");
128  } else if (tmp == "TIFF" || tmp == "TIF") {
129  mimeType = QByteArrayLiteral("image/tiff");
130  } else if (tmp == "XPM") {
131  mimeType = QByteArrayLiteral("image/x-xpixmap");
132  } else if (tmp == "XBM") {
133  mimeType = QByteArrayLiteral("image/x-xbitmap");
134  } else if (tmp == "BMP") {
135  mimeType = QByteArrayLiteral("image/bmp");
136  } else if (tmp == "TXT" ||
137  tmp == "ASC" ||
138  tmp == "H" ||
139  tmp == "C" ||
140  tmp == "CC" ||
141  tmp == "CPP") {
142  mimeType = QByteArrayLiteral("text/plain");
143  } else if (tmp == "HTML" || tmp == "HTM") {
144  mimeType = QByteArrayLiteral("text/html");
145  } else {
146  mimeType = QByteArrayLiteral("application/octet-stream");
147  }
148  } else {
149  mimeType = QByteArrayLiteral("application/octet-stream");
150  }
151  } else {
152  mimeType = QByteArrayLiteral("application/octet-stream");
153  }
154 
155  return mimeType;
156 }
157 
158 //==============================================================================
159 
160 UUEncoded::UUEncoded(const QByteArray &src, const QByteArray &subject) :
161  NonMimeParser(src), m_subject(subject)
162 {}
163 
164 bool UUEncoded::parse()
165 {
166  int currentPos = 0;
167  bool success = true;
168  bool firstIteration = true;
169 
170  const auto srcStr = QString::fromLatin1(m_src);
171  const QRegularExpression beginRegex(QStringLiteral("begin [0-9][0-9][0-9]"));
172  const QRegularExpression subjectRegex(QStringLiteral("[0-9]+/[0-9]+"));
173 
174  while (success) {
175  int beginPos = currentPos;
176  int uuStart = currentPos;
177  int endPos = 0;
178  int lineCount = 0;
179  int MCount = 0;
180  int pos = 0;
181  int len = 0;
182  bool containsBegin = false;
183  bool containsEnd = false;
184  QByteArray tmp;
185  QByteArray fileName;
186 
187  if ((beginPos = srcStr.indexOf(beginRegex, currentPos)) > -1 &&
188  (beginPos == 0 || m_src.at(beginPos - 1) == '\n')) {
189  containsBegin = true;
190  uuStart = m_src.indexOf('\n', beginPos);
191  if (uuStart == -1) { //no more line breaks found, we give up
192  success = false;
193  break;
194  } else {
195  uuStart++; //points now at the beginning of the next line
196  }
197  } else {
198  beginPos = currentPos;
199  }
200 
201  if ((endPos = m_src.indexOf("\nend", (uuStart > 0) ? uuStart - 1 : 0)) == -1) {
202  endPos = m_src.length(); //no end found
203  } else {
204  containsEnd = true;
205  }
206 
207  if ((containsBegin && containsEnd) || firstIteration) {
208 
209  //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
210  //all lines in a uuencoded text start with 'M'
211  for (int idx = uuStart; idx < endPos; idx++) {
212  if (m_src[idx] == '\n') {
213  lineCount++;
214  if (idx + 1 < endPos && m_src[idx + 1] == 'M') {
215  idx++;
216  MCount++;
217  }
218  }
219  }
220 
221  //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
222  if (MCount == 0 || (lineCount - MCount) > 10 ||
223  ((!containsBegin || !containsEnd) && (MCount < 15))) {
224  // harder check for split-articles
225  success = false;
226  break; //too many "non-M-Lines" found, we give up
227  }
228 
229  if ((!containsBegin || !containsEnd) && !m_subject.isNull()) {
230  // message may be split up => parse subject
231  const auto match = subjectRegex.match(QLatin1String(m_subject));
232  pos = match.capturedStart(0);
233  len = match.capturedLength(0);
234  if (pos != -1) {
235  tmp = m_subject.mid(pos, len);
236  pos = tmp.indexOf('/');
237  m_partNr = tmp.left(pos).toInt();
238  m_totalNr = tmp.right(tmp.length() - pos - 1).toInt();
239  } else {
240  success = false;
241  break; //no "part-numbers" found in the subject, we give up
242  }
243  }
244 
245  //everything before "begin" is text
246  if (beginPos > 0) {
247  m_text.append(m_src.mid(currentPos, beginPos - currentPos));
248  }
249 
250  if (containsBegin) {
251  //everything between "begin ### " and the next LF is considered as the filename
252  fileName = m_src.mid(beginPos + 10, uuStart - beginPos - 11);
253  } else {
254  fileName = "";
255  }
256  m_filenames.append(fileName);
257  //everything between "begin" and "end" is uuencoded
258  m_bins.append(m_src.mid(uuStart, endPos - uuStart + 1));
259  m_mimeTypes.append(guessMimeType(fileName));
260  firstIteration = false;
261 
262  int next = m_src.indexOf('\n', endPos + 1);
263  if (next == -1) { //no more line breaks found, we give up
264  success = false;
265  break;
266  } else {
267  next++; //points now at the beginning of the next line
268  }
269  currentPos = next;
270 
271  } else {
272  success = false;
273  }
274  }
275 
276  // append trailing text part of the article
277  m_text.append(m_src.right(m_src.length() - currentPos));
278 
279  return ((!m_bins.isEmpty()) || isPartial());
280 }
281 
282 //==============================================================================
283 
284 YENCEncoded::YENCEncoded(const QByteArray &src) :
285  NonMimeParser(src)
286 {
287 }
288 
289 bool YENCEncoded::yencMeta(QByteArray &src, const QByteArray &name, int *value)
290 {
291  bool found = false;
292  QByteArray sought = name + '=';
293 
294  int iPos = src.indexOf(sought);
295  if (iPos > -1) {
296  int pos1 = src.indexOf(' ', iPos);
297  int pos2 = src.indexOf('\r', iPos);
298  int pos3 = src.indexOf('\t', iPos);
299  int pos4 = src.indexOf('\n', iPos);
300  if (pos2 >= 0 && (pos1 < 0 || pos1 > pos2)) {
301  pos1 = pos2;
302  }
303  if (pos3 >= 0 && (pos1 < 0 || pos1 > pos3)) {
304  pos1 = pos3;
305  }
306  if (pos4 >= 0 && (pos1 < 0 || pos1 > pos4)) {
307  pos1 = pos4;
308  }
309  iPos = src.lastIndexOf('=', pos1) + 1;
310  if (iPos < pos1) {
311  char c = src.at(iPos);
312  if (c >= '0' && c <= '9') {
313  found = true;
314  *value = src.mid(iPos, pos1 - iPos).toInt();
315  }
316  }
317  }
318  return found;
319 }
320 
321 bool YENCEncoded::parse()
322 {
323  int currentPos = 0;
324  bool success = true;
325  while (success) {
326  int beginPos = currentPos;
327  int yencStart = currentPos;
328  bool containsPart = false;
329  QByteArray fileName;
330 
331  if ((beginPos = m_src.indexOf("=ybegin ", currentPos)) > -1 &&
332  (beginPos == 0 || m_src.at(beginPos - 1) == '\n')) {
333  yencStart = m_src.indexOf('\n', beginPos);
334  if (yencStart == -1) { // no more line breaks found, give up
335  success = false;
336  break;
337  } else {
338  yencStart++;
339  if (m_src.indexOf("=ypart", yencStart) == yencStart) {
340  containsPart = true;
341  yencStart = m_src.indexOf('\n', yencStart);
342  if (yencStart == -1) {
343  success = false;
344  break;
345  }
346  yencStart++;
347  }
348  }
349  // Try to identify yenc meta data
350 
351  // Filenames can contain any embedded chars until end of line
352  QByteArray meta = m_src.mid(beginPos, yencStart - beginPos);
353  int namePos = meta.indexOf("name=");
354  if (namePos == -1) {
355  success = false;
356  break;
357  }
358  int eolPos = meta.indexOf('\r', namePos);
359  if (eolPos == -1) {
360  eolPos = meta.indexOf('\n', namePos);
361  }
362  if (eolPos == -1) {
363  success = false;
364  break;
365  }
366  fileName = meta.mid(namePos + 5, eolPos - (namePos + 5));
367 
368  // Other metadata is integer
369  int yencLine;
370  if (!yencMeta(meta, "line", &yencLine)) {
371  success = false;
372  break;
373  }
374  int yencSize;
375  if (!yencMeta(meta, "size", &yencSize)) {
376  success = false;
377  break;
378  }
379 
380  int partBegin;
381  int partEnd;
382  if (containsPart) {
383  if (!yencMeta(meta, "part", &m_partNr)) {
384  success = false;
385  break;
386  }
387  if (!yencMeta(meta, "begin", &partBegin) ||
388  !yencMeta(meta, "end", &partEnd)) {
389  success = false;
390  break;
391  }
392  if (!yencMeta(meta, "total", &m_totalNr)) {
393  m_totalNr = m_partNr + 1;
394  }
395  if (yencSize == partEnd - partBegin + 1) {
396  m_totalNr = 1;
397  } else {
398  yencSize = partEnd - partBegin + 1;
399  }
400  }
401 
402  // We have a valid yenc header; now we extract the binary data
403  int totalSize = 0;
404  int pos = yencStart;
405  int len = m_src.length();
406  bool lineStart = true;
407  int lineLength = 0;
408  bool containsEnd = false;
409  QByteArray binary;
410  binary.resize(yencSize);
411  while (pos < len) {
412  int ch = m_src.at(pos);
413  if (ch < 0) {
414  ch += 256;
415  }
416  if (ch == '\r') {
417  if (lineLength != yencLine && totalSize != yencSize) {
418  break;
419  }
420  pos++;
421  } else if (ch == '\n') {
422  lineStart = true;
423  lineLength = 0;
424  pos++;
425  } else {
426  if (ch == '=') {
427  if (pos + 1 < len) {
428  ch = m_src.at(pos + 1);
429  if (lineStart && ch == 'y') {
430  containsEnd = true;
431  break;
432  }
433  pos += 2;
434  ch -= 64 + 42;
435  if (ch < 0) {
436  ch += 256;
437  }
438  if (totalSize >= yencSize) {
439  break;
440  }
441  binary[totalSize++] = ch;
442  lineLength++;
443  } else {
444  break;
445  }
446  } else {
447  ch -= 42;
448  if (ch < 0) {
449  ch += 256;
450  }
451  if (totalSize >= yencSize) {
452  break;
453  }
454  binary[totalSize++] = ch;
455  lineLength++;
456  pos++;
457  }
458  lineStart = false;
459  }
460  }
461 
462  if (!containsEnd) {
463  success = false;
464  break;
465  }
466  if (totalSize != yencSize) {
467  success = false;
468  break;
469  }
470 
471  // pos now points to =yend; get end data
472  eolPos = m_src.indexOf('\n', pos);
473  if (eolPos == -1) {
474  success = false;
475  break;
476  }
477  meta = m_src.mid(pos, eolPos - pos);
478  if (!yencMeta(meta, "size", &totalSize)) {
479  success = false;
480  break;
481  }
482  if (totalSize != yencSize) {
483  success = false;
484  break;
485  }
486 
487  m_filenames.append(fileName);
488  m_mimeTypes.append(guessMimeType(fileName));
489  m_bins.append(binary);
490 
491  //everything before "begin" is text
492  if (beginPos > 0) {
493  m_text.append(m_src.mid(currentPos, beginPos - currentPos));
494  }
495  currentPos = eolPos + 1;
496 
497  } else {
498  success = false;
499  }
500  }
501 
502  // append trailing text part of the article
503  m_text.append(m_src.right(m_src.length() - currentPos));
504 
505  return !m_bins.isEmpty();
506 }
507 
508 } // namespace Parser
509 
510 } // namespace KMime
QRegularExpressionMatch match(const QString &subject, int offset, QRegularExpression::MatchType matchType, QRegularExpression::MatchOptions matchOptions) const const
int toInt(bool *ok, int base) const const
char at(int i) const const
int lastIndexOf(char ch, int from) const const
bool isEmpty() const const
int length() const const
void resize(int size)
int indexOf(char ch, int from) const const
QByteArray right(int len) const const
KMime::Headers::Subject * subject(bool create=true)
Returns the Subject header.
QByteArray mid(int pos, int len) const const
QByteArray & append(char ch)
QByteArray left(int len) const const
static QString mimeType()
Returns the MIME type used for Messages.
QString fromLatin1(const char *str, int size)
Helper-class: abstract base class of all parsers for non-mime binary data (uuencoded, yenc)
Definition: kmime_parsers.h:56
int lineCount() const
Line count of this Content and all sub-Contents.
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Sat Sep 25 2021 23:14:46 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.