KMime

kmime_parsers.cpp
1 /*
2  kmime_parsers.cpp
3 
4  KMime, the KDE Internet mail/usenet news message library.
5  SPDX-FileCopyrightText: 2001 the KMime authors.
6  See file AUTHORS for details
7 
8  SPDX-License-Identifier: LGPL-2.0-or-later
9 */
10 #include "kmime_parsers.h"
11 
12 #include <QRegularExpression>
13 
14 using namespace KMime::Parser;
15 
16 namespace KMime
17 {
18 namespace Parser
19 {
20 
21 MultiPart::MultiPart(const QByteArray &src, const QByteArray &boundary)
22  : m_src(src)
23  , m_boundary(boundary)
24 {
25 }
26 
27 bool MultiPart::parse()
28 {
29  QByteArray b = "--" + m_boundary;
30  QByteArray part;
31  int pos1 = 0;
32  int pos2 = 0;
33  int blen = b.length();
34 
35  m_parts.clear();
36 
37  //find the first valid boundary
38  while (true) {
39  if ((pos1 = m_src.indexOf(b, pos1)) == -1 || pos1 == 0 ||
40  m_src[pos1 - 1] == '\n') { //valid boundary found or no boundary at all
41  break;
42  }
43  pos1 += blen; //boundary found but not valid => skip it;
44  }
45 
46  if (pos1 > -1) {
47  pos1 += blen;
48  if (m_src[pos1] == '-' && m_src[pos1 + 1] == '-') {
49  // the only valid boundary is the end-boundary
50  // this message is *really* broken
51  pos1 = -1; //we give up
52  } else if ((pos1 - blen) > 1) { //preamble present
53  m_preamble = m_src.left(pos1 - blen - 1);
54  }
55  }
56 
57  while (pos1 > -1 && pos2 > -1) {
58 
59  //skip the rest of the line for the first boundary - the message-part starts here
60  if ((pos1 = m_src.indexOf('\n', pos1)) > -1) {
61  //now search the next linebreak
62  //now find the next valid boundary
63  pos2 = ++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
64  while (true) {
65  if ((pos2 = m_src.indexOf(b, pos2)) == -1 ||
66  m_src[pos2 - 1] == '\n') { //valid boundary or no more boundaries found
67  break;
68  }
69  pos2 += blen; //boundary is invalid => skip it;
70  }
71 
72  if (pos2 == -1) { // no more boundaries found
73  part = m_src.mid(pos1, m_src.length() - pos1); //take the rest of the string
74  m_parts.append(part);
75  pos1 = -1;
76  pos2 = -1; //break;
77  } else {
78  part = m_src.mid(pos1, pos2 - pos1 - 1); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
79  m_parts.append(part);
80  pos2 += blen; //pos2 points now to the first character after the boundary
81  if (m_src[pos2] == '-' && m_src[pos2 + 1] == '-') { //end-boundary
82  pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary
83 
84  if ((pos1 = m_src.indexOf('\n', pos1)) > -1) { //skip the rest of this line
85  //everything after the end-boundary is considered as the epilouge
86  m_epilouge = m_src.mid(pos1 + 1, m_src.length() - pos1 - 1);
87  }
88  pos1 = -1;
89  pos2 = -1; //break
90  } else {
91  pos1 = pos2; //the search continues ...
92  }
93  }
94  }
95  }
96 
97  return !m_parts.isEmpty();
98 }
99 
100 //=============================================================================
101 
102 NonMimeParser::NonMimeParser(const QByteArray &src) :
103  m_src(src), m_partNr(-1), m_totalNr(-1)
104 {
105 }
106 
107 NonMimeParser::~NonMimeParser() = default;
108 
109 /**
110  * try to guess the mimetype from the file-extension
111  */
112 
113 QByteArray NonMimeParser::guessMimeType(const QByteArray &fileName)
114 {
115  QByteArray tmp;
116  QByteArray mimeType;
117 
118  if (!fileName.isEmpty()) {
119  int pos = fileName.lastIndexOf('.');
120  if (pos++ != -1) {
121  tmp = fileName.mid(pos, fileName.length() - pos).toUpper();
122  if (tmp == "JPG" || tmp == "JPEG") {
123  mimeType = QByteArrayLiteral("image/jpeg");
124  } else if (tmp == "GIF") {
125  mimeType = QByteArrayLiteral("image/gif");
126  } else if (tmp == "PNG") {
127  mimeType = QByteArrayLiteral("image/png");
128  } else if (tmp == "TIFF" || tmp == "TIF") {
129  mimeType = QByteArrayLiteral("image/tiff");
130  } else if (tmp == "XPM") {
131  mimeType = QByteArrayLiteral("image/x-xpixmap");
132  } else if (tmp == "XBM") {
133  mimeType = QByteArrayLiteral("image/x-xbitmap");
134  } else if (tmp == "BMP") {
135  mimeType = QByteArrayLiteral("image/bmp");
136  } else if (tmp == "TXT" ||
137  tmp == "ASC" ||
138  tmp == "H" ||
139  tmp == "C" ||
140  tmp == "CC" ||
141  tmp == "CPP") {
142  mimeType = QByteArrayLiteral("text/plain");
143  } else if (tmp == "HTML" || tmp == "HTM") {
144  mimeType = QByteArrayLiteral("text/html");
145  } else {
146  mimeType = QByteArrayLiteral("application/octet-stream");
147  }
148  } else {
149  mimeType = QByteArrayLiteral("application/octet-stream");
150  }
151  } else {
152  mimeType = QByteArrayLiteral("application/octet-stream");
153  }
154 
155  return mimeType;
156 }
157 
158 //==============================================================================
159 
160 UUEncoded::UUEncoded(const QByteArray &src, const QByteArray &subject) :
161  NonMimeParser(src), m_subject(subject)
162 {}
163 
164 bool UUEncoded::parse()
165 {
166  int currentPos = 0;
167  bool success = true;
168  bool firstIteration = true;
169 
170  const auto srcStr = QString::fromLatin1(m_src);
171  const QRegularExpression beginRegex(QStringLiteral("begin [0-9][0-9][0-9]"));
172  const QRegularExpression subjectRegex(QStringLiteral("[0-9]+/[0-9]+"));
173 
174  while (success) {
175  int beginPos = currentPos;
176  int uuStart = currentPos;
177  int endPos = 0;
178  int lineCount = 0;
179  int MCount = 0;
180  int pos = 0;
181  int len = 0;
182  bool containsBegin = false;
183  bool containsEnd = false;
184  QByteArray tmp;
185  QByteArray fileName;
186 
187  if ((beginPos = srcStr.indexOf(beginRegex, currentPos)) > -1 &&
188  (beginPos == 0 || m_src.at(beginPos - 1) == '\n')) {
189  containsBegin = true;
190  uuStart = m_src.indexOf('\n', beginPos);
191  if (uuStart == -1) { //no more line breaks found, we give up
192  success = false;
193  break;
194  } else {
195  uuStart++; //points now at the beginning of the next line
196  }
197  } else {
198  beginPos = currentPos;
199  }
200 
201  if ((endPos = m_src.indexOf("\nend", (uuStart > 0) ? uuStart - 1 : 0)) == -1) {
202  endPos = m_src.length(); //no end found
203  } else {
204  containsEnd = true;
205  }
206 
207  if ((containsBegin && containsEnd) || firstIteration) {
208 
209  //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
210  //all lines in a uuencoded text start with 'M'
211  for (int idx = uuStart; idx < endPos; idx++) {
212  if (m_src[idx] == '\n') {
213  lineCount++;
214  if (idx + 1 < endPos && m_src[idx + 1] == 'M') {
215  idx++;
216  MCount++;
217  }
218  }
219  }
220 
221  //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
222  if (MCount == 0 || (lineCount - MCount) > 10 ||
223  ((!containsBegin || !containsEnd) && (MCount < 15))) {
224  // harder check for split-articles
225  success = false;
226  break; //too many "non-M-Lines" found, we give up
227  }
228 
229  if ((!containsBegin || !containsEnd) && !m_subject.isNull()) {
230  // message may be split up => parse subject
231  const auto match = subjectRegex.match(QLatin1String(m_subject));
232  pos = match.capturedStart(0);
233  len = match.capturedLength(0);
234  if (pos != -1) {
235  tmp = m_subject.mid(pos, len);
236  pos = tmp.indexOf('/');
237  m_partNr = tmp.left(pos).toInt();
238  m_totalNr = tmp.right(tmp.length() - pos - 1).toInt();
239  } else {
240  success = false;
241  break; //no "part-numbers" found in the subject, we give up
242  }
243  }
244 
245  //everything before "begin" is text
246  if (beginPos > 0) {
247  m_text.append(m_src.mid(currentPos, beginPos - currentPos));
248  }
249 
250  if (containsBegin) {
251  //everything between "begin ### " and the next LF is considered as the filename
252  fileName = m_src.mid(beginPos + 10, uuStart - beginPos - 11);
253  } else {
254  fileName = "";
255  }
256  m_filenames.append(fileName);
257  //everything between "begin" and "end" is uuencoded
258  m_bins.append(m_src.mid(uuStart, endPos - uuStart + 1));
259  m_mimeTypes.append(guessMimeType(fileName));
260  firstIteration = false;
261 
262  int next = m_src.indexOf('\n', endPos + 1);
263  if (next == -1) { //no more line breaks found, we give up
264  success = false;
265  break;
266  } else {
267  next++; //points now at the beginning of the next line
268  }
269  currentPos = next;
270 
271  } else {
272  success = false;
273  }
274  }
275 
276  // append trailing text part of the article
277  m_text.append(m_src.right(m_src.length() - currentPos));
278 
279  return ((!m_bins.isEmpty()) || isPartial());
280 }
281 
282 //==============================================================================
283 
284 YENCEncoded::YENCEncoded(const QByteArray &src) :
285  NonMimeParser(src)
286 {
287 }
288 
289 bool YENCEncoded::yencMeta(QByteArray &src, const QByteArray &name, int *value)
290 {
291  bool found = false;
292  QByteArray sought = name + '=';
293 
294  int iPos = src.indexOf(sought);
295  if (iPos > -1) {
296  int pos1 = src.indexOf(' ', iPos);
297  int pos2 = src.indexOf('\r', iPos);
298  int pos3 = src.indexOf('\t', iPos);
299  int pos4 = src.indexOf('\n', iPos);
300  if (pos2 >= 0 && (pos1 < 0 || pos1 > pos2)) {
301  pos1 = pos2;
302  }
303  if (pos3 >= 0 && (pos1 < 0 || pos1 > pos3)) {
304  pos1 = pos3;
305  }
306  if (pos4 >= 0 && (pos1 < 0 || pos1 > pos4)) {
307  pos1 = pos4;
308  }
309  iPos = src.lastIndexOf('=', pos1) + 1;
310  if (iPos < pos1) {
311  char c = src.at(iPos);
312  if (c >= '0' && c <= '9') {
313  found = true;
314  *value = src.mid(iPos, pos1 - iPos).toInt();
315  }
316  }
317  }
318  return found;
319 }
320 
321 bool YENCEncoded::parse()
322 {
323  int currentPos = 0;
324  bool success = true;
325  while (success) {
326  int beginPos = currentPos;
327  int yencStart = currentPos;
328  bool containsPart = false;
329  QByteArray fileName;
330 
331  if ((beginPos = m_src.indexOf("=ybegin ", currentPos)) > -1 &&
332  (beginPos == 0 || m_src.at(beginPos - 1) == '\n')) {
333  yencStart = m_src.indexOf('\n', beginPos);
334  if (yencStart == -1) { // no more line breaks found, give up
335  success = false;
336  break;
337  } else {
338  yencStart++;
339  if (m_src.indexOf("=ypart", yencStart) == yencStart) {
340  containsPart = true;
341  yencStart = m_src.indexOf('\n', yencStart);
342  if (yencStart == -1) {
343  success = false;
344  break;
345  }
346  yencStart++;
347  }
348  }
349  // Try to identify yenc meta data
350 
351  // Filenames can contain any embedded chars until end of line
352  QByteArray meta = m_src.mid(beginPos, yencStart - beginPos);
353  int namePos = meta.indexOf("name=");
354  if (namePos == -1) {
355  success = false;
356  break;
357  }
358  int eolPos = meta.indexOf('\r', namePos);
359  if (eolPos == -1) {
360  eolPos = meta.indexOf('\n', namePos);
361  }
362  if (eolPos == -1) {
363  success = false;
364  break;
365  }
366  fileName = meta.mid(namePos + 5, eolPos - (namePos + 5));
367 
368  // Other metadata is integer
369  int yencLine;
370  if (!yencMeta(meta, "line", &yencLine)) {
371  success = false;
372  break;
373  }
374  int yencSize;
375  if (!yencMeta(meta, "size", &yencSize)) {
376  success = false;
377  break;
378  }
379 
380  int partBegin;
381  int partEnd;
382  if (containsPart) {
383  if (!yencMeta(meta, "part", &m_partNr)) {
384  success = false;
385  break;
386  }
387  if (!yencMeta(meta, "begin", &partBegin) ||
388  !yencMeta(meta, "end", &partEnd)) {
389  success = false;
390  break;
391  }
392  if (!yencMeta(meta, "total", &m_totalNr)) {
393  m_totalNr = m_partNr + 1;
394  }
395  if (yencSize == partEnd - partBegin + 1) {
396  m_totalNr = 1;
397  } else {
398  yencSize = partEnd - partBegin + 1;
399  }
400  }
401 
402  // We have a valid yenc header; now we extract the binary data
403  int totalSize = 0;
404  int pos = yencStart;
405  int len = m_src.length();
406  bool lineStart = true;
407  int lineLength = 0;
408  bool containsEnd = false;
409  QByteArray binary;
410  binary.resize(yencSize);
411  while (pos < len) {
412  int ch = m_src.at(pos);
413  if (ch < 0) {
414  ch += 256;
415  }
416  if (ch == '\r') {
417  if (lineLength != yencLine && totalSize != yencSize) {
418  break;
419  }
420  pos++;
421  } else if (ch == '\n') {
422  lineStart = true;
423  lineLength = 0;
424  pos++;
425  } else {
426  if (ch == '=') {
427  if (pos + 1 < len) {
428  ch = m_src.at(pos + 1);
429  if (lineStart && ch == 'y') {
430  containsEnd = true;
431  break;
432  }
433  pos += 2;
434  ch -= 64 + 42;
435  if (ch < 0) {
436  ch += 256;
437  }
438  if (totalSize >= yencSize) {
439  break;
440  }
441  binary[totalSize++] = ch;
442  lineLength++;
443  } else {
444  break;
445  }
446  } else {
447  ch -= 42;
448  if (ch < 0) {
449  ch += 256;
450  }
451  if (totalSize >= yencSize) {
452  break;
453  }
454  binary[totalSize++] = ch;
455  lineLength++;
456  pos++;
457  }
458  lineStart = false;
459  }
460  }
461 
462  if (!containsEnd) {
463  success = false;
464  break;
465  }
466  if (totalSize != yencSize) {
467  success = false;
468  break;
469  }
470 
471  // pos now points to =yend; get end data
472  eolPos = m_src.indexOf('\n', pos);
473  if (eolPos == -1) {
474  success = false;
475  break;
476  }
477  meta = m_src.mid(pos, eolPos - pos);
478  if (!yencMeta(meta, "size", &totalSize)) {
479  success = false;
480  break;
481  }
482  if (totalSize != yencSize) {
483  success = false;
484  break;
485  }
486 
487  m_filenames.append(fileName);
488  m_mimeTypes.append(guessMimeType(fileName));
489  m_bins.append(binary);
490 
491  //everything before "begin" is text
492  if (beginPos > 0) {
493  m_text.append(m_src.mid(currentPos, beginPos - currentPos));
494  }
495  currentPos = eolPos + 1;
496 
497  } else {
498  success = false;
499  }
500  }
501 
502  // append trailing text part of the article
503  m_text.append(m_src.right(m_src.length() - currentPos));
504 
505  return !m_bins.isEmpty();
506 }
507 
508 } // namespace Parser
509 
510 } // namespace KMime
bool isNull() const const
bool isEmpty() const const
QByteArray right(int len) const const
int indexOf(char ch, int from) const const
QByteArray & append(char ch)
void append(const T &value)
static QByteArray guessMimeType(const QByteArray &fileName)
try to guess the mimetype from the file-extension
char at(int i) const const
int lastIndexOf(char ch, int from) const const
QByteArray mid(int pos, int len) const const
QByteArray toUpper() const const
int toInt(bool *ok, int base) const const
QByteArray left(int len) const const
bool isEmpty() const const
void resize(int size)
QString fromLatin1(const char *str, int size)
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
QString name(StandardShortcut id)
const QList< QKeySequence > & next()
int length() const const
Helper-class: abstract base class of all parsers for non-mime binary data (uuencoded,...
Definition: kmime_parsers.h:56
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon May 8 2023 03:53:03 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.