KMime

kmime_parsers.cpp
1/*
2 kmime_parsers.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 SPDX-FileCopyrightText: 2001 the KMime authors.
6 See file AUTHORS for details
7
8 SPDX-License-Identifier: LGPL-2.0-or-later
9*/
10#include "kmime_parsers_p.h"
11
12#include <QRegularExpression>
13
14using namespace KMime::Parser;
15
16namespace KMime
17{
18namespace Parser
19{
20
21MultiPart::MultiPart(const QByteArray &src, const QByteArray &boundary)
22 : m_src(src)
23 , m_boundary(boundary)
24{
25}
26
27bool MultiPart::parse()
28{
29 QByteArray b = "--" + m_boundary;
30 QByteArray part;
31 int pos1 = 0;
32 int pos2 = 0;
33 int blen = b.length();
34
35 m_parts.clear();
36
37 //find the first valid boundary
38 while (true) {
39 if ((pos1 = m_src.indexOf(b, pos1)) == -1 || pos1 == 0 ||
40 m_src[pos1 - 1] == '\n') { //valid boundary found or no boundary at all
41 break;
42 }
43 pos1 += blen; //boundary found but not valid => skip it;
44 }
45
46 if (pos1 > -1) {
47 pos1 += blen;
48 if (m_src[pos1] == '-' && m_src[pos1 + 1] == '-') {
49 // the only valid boundary is the end-boundary
50 // this message is *really* broken
51 pos1 = -1; //we give up
52 } else if ((pos1 - blen) > 1) { //preamble present
53 m_preamble = m_src.left(pos1 - blen - 1);
54 }
55 }
56
57 while (pos1 > -1 && pos2 > -1) {
58
59 //skip the rest of the line for the first boundary - the message-part starts here
60 if ((pos1 = m_src.indexOf('\n', pos1)) > -1) {
61 //now search the next linebreak
62 //now find the next valid boundary
63 pos2 = ++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
64 while (true) {
65 if ((pos2 = m_src.indexOf(b, pos2)) == -1 ||
66 m_src[pos2 - 1] == '\n') { //valid boundary or no more boundaries found
67 break;
68 }
69 pos2 += blen; //boundary is invalid => skip it;
70 }
71
72 if (pos2 == -1) { // no more boundaries found
73 part = m_src.mid(pos1, m_src.length() - pos1); //take the rest of the string
74 m_parts.append(part);
75 pos1 = -1;
76 pos2 = -1; //break;
77 } else {
78 part = m_src.mid(pos1, pos2 - pos1 - 1); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
79 m_parts.append(part);
80 pos2 += blen; //pos2 points now to the first character after the boundary
81 if (m_src[pos2] == '-' && m_src[pos2 + 1] == '-') { //end-boundary
82 pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary
83
84 if ((pos1 = m_src.indexOf('\n', pos1)) > -1) { //skip the rest of this line
85 //everything after the end-boundary is considered as the epilouge
86 m_epilouge = m_src.mid(pos1 + 1, m_src.length() - pos1 - 1);
87 }
88 pos1 = -1;
89 pos2 = -1; //break
90 } else {
91 pos1 = pos2; //the search continues ...
92 }
93 }
94 }
95 }
96
97 return !m_parts.isEmpty();
98}
99
100//=============================================================================
101
102NonMimeParser::NonMimeParser(const QByteArray &src) :
103 m_src(src), m_partNr(-1), m_totalNr(-1)
104{
105}
106
107NonMimeParser::~NonMimeParser() = default;
108
109/**
110 * try to guess the mimetype from the file-extension
111 */
112
113QByteArray NonMimeParser::guessMimeType(const QByteArray &fileName)
114{
115 QByteArray tmp;
117
118 if (!fileName.isEmpty()) {
119 int pos = fileName.lastIndexOf('.');
120 if (pos++ != -1) {
121 tmp = fileName.mid(pos, fileName.length() - pos).toUpper();
122 if (tmp == "JPG" || tmp == "JPEG") {
123 mimeType = QByteArrayLiteral("image/jpeg");
124 } else if (tmp == "GIF") {
125 mimeType = QByteArrayLiteral("image/gif");
126 } else if (tmp == "PNG") {
127 mimeType = QByteArrayLiteral("image/png");
128 } else if (tmp == "TIFF" || tmp == "TIF") {
129 mimeType = QByteArrayLiteral("image/tiff");
130 } else if (tmp == "XPM") {
131 mimeType = QByteArrayLiteral("image/x-xpixmap");
132 } else if (tmp == "XBM") {
133 mimeType = QByteArrayLiteral("image/x-xbitmap");
134 } else if (tmp == "BMP") {
135 mimeType = QByteArrayLiteral("image/bmp");
136 } else if (tmp == "TXT" ||
137 tmp == "ASC" ||
138 tmp == "H" ||
139 tmp == "C" ||
140 tmp == "CC" ||
141 tmp == "CPP") {
142 mimeType = QByteArrayLiteral("text/plain");
143 } else if (tmp == "HTML" || tmp == "HTM") {
144 mimeType = QByteArrayLiteral("text/html");
145 } else {
146 mimeType = QByteArrayLiteral("application/octet-stream");
147 }
148 } else {
149 mimeType = QByteArrayLiteral("application/octet-stream");
150 }
151 } else {
152 mimeType = QByteArrayLiteral("application/octet-stream");
153 }
154
155 return mimeType;
156}
157
158//==============================================================================
159
160UUEncoded::UUEncoded(const QByteArray &src, const QByteArray &subject) :
161 NonMimeParser(src), m_subject(subject)
162{}
163
164bool UUEncoded::parse()
165{
166 int currentPos = 0;
167 bool success = true;
168 bool firstIteration = true;
169
170 const auto srcStr = QString::fromLatin1(m_src);
171 const QRegularExpression beginRegex(QStringLiteral("begin [0-9][0-9][0-9]"));
172 const QRegularExpression subjectRegex(QStringLiteral("[0-9]+/[0-9]+"));
173
174 while (success) {
175 int beginPos = currentPos;
176 int uuStart = currentPos;
177 int endPos = 0;
178 int lineCount = 0;
179 int MCount = 0;
180 int pos = 0;
181 int len = 0;
182 bool containsBegin = false;
183 bool containsEnd = false;
184 QByteArray tmp;
185 QByteArray fileName;
186
187 if ((beginPos = srcStr.indexOf(beginRegex, currentPos)) > -1 &&
188 (beginPos == 0 || m_src.at(beginPos - 1) == '\n')) {
189 containsBegin = true;
190 uuStart = m_src.indexOf('\n', beginPos);
191 if (uuStart == -1) { //no more line breaks found, we give up
192 success = false;
193 break;
194 } else {
195 uuStart++; //points now at the beginning of the next line
196 }
197 } else {
198 beginPos = currentPos;
199 }
200
201 if ((endPos = m_src.indexOf("\nend", (uuStart > 0) ? uuStart - 1 : 0)) == -1) {
202 endPos = m_src.length(); //no end found
203 } else {
204 containsEnd = true;
205 }
206
207 if ((containsBegin && containsEnd) || firstIteration) {
208
209 //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
210 //all lines in a uuencoded text start with 'M'
211 for (int idx = uuStart; idx < endPos; idx++) {
212 if (m_src[idx] == '\n') {
213 lineCount++;
214 if (idx + 1 < endPos && m_src[idx + 1] == 'M') {
215 idx++;
216 MCount++;
217 }
218 }
219 }
220
221 //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
222 if (MCount == 0 || (lineCount - MCount) > 10 ||
223 ((!containsBegin || !containsEnd) && (MCount < 15))) {
224 // harder check for split-articles
225 success = false;
226 break; //too many "non-M-Lines" found, we give up
227 }
228
229 if ((!containsBegin || !containsEnd) && !m_subject.isNull()) {
230 // message may be split up => parse subject
231 const auto match =
232 subjectRegex.match(QLatin1StringView(m_subject));
233 pos = match.capturedStart(0);
234 len = match.capturedLength(0);
235 if (pos != -1) {
236 tmp = m_subject.mid(pos, len);
237 pos = tmp.indexOf('/');
238 m_partNr = tmp.left(pos).toInt();
239 m_totalNr = tmp.right(tmp.length() - pos - 1).toInt();
240 } else {
241 success = false;
242 break; //no "part-numbers" found in the subject, we give up
243 }
244 }
245
246 //everything before "begin" is text
247 if (beginPos > 0) {
248 m_text.append(m_src.mid(currentPos, beginPos - currentPos));
249 }
250
251 if (containsBegin) {
252 //everything between "begin ### " and the next LF is considered as the filename
253 fileName = m_src.mid(beginPos + 10, uuStart - beginPos - 11);
254 } else {
255 fileName = "";
256 }
257 m_filenames.append(fileName);
258 //everything between "begin" and "end" is uuencoded
259 m_bins.append(m_src.mid(uuStart, endPos - uuStart + 1));
260 m_mimeTypes.append(guessMimeType(fileName));
261 firstIteration = false;
262
263 int next = m_src.indexOf('\n', endPos + 1);
264 if (next == -1) { //no more line breaks found, we give up
265 success = false;
266 break;
267 } else {
268 next++; //points now at the beginning of the next line
269 }
270 currentPos = next;
271
272 } else {
273 success = false;
274 }
275 }
276
277 // append trailing text part of the article
278 m_text.append(m_src.right(m_src.length() - currentPos));
279
280 return ((!m_bins.isEmpty()) || isPartial());
281}
282
283//==============================================================================
284
285YENCEncoded::YENCEncoded(const QByteArray &src) :
286 NonMimeParser(src)
287{
288}
289
290bool YENCEncoded::yencMeta(QByteArray &src, const QByteArray &name, int *value)
291{
292 bool found = false;
293 QByteArray sought = name + '=';
294
295 int iPos = src.indexOf(sought);
296 if (iPos > -1) {
297 int pos1 = src.indexOf(' ', iPos);
298 int pos2 = src.indexOf('\r', iPos);
299 int pos3 = src.indexOf('\t', iPos);
300 int pos4 = src.indexOf('\n', iPos);
301 if (pos2 >= 0 && (pos1 < 0 || pos1 > pos2)) {
302 pos1 = pos2;
303 }
304 if (pos3 >= 0 && (pos1 < 0 || pos1 > pos3)) {
305 pos1 = pos3;
306 }
307 if (pos4 >= 0 && (pos1 < 0 || pos1 > pos4)) {
308 pos1 = pos4;
309 }
310 iPos = src.lastIndexOf('=', pos1) + 1;
311 if (iPos < pos1) {
312 char c = src.at(iPos);
313 if (c >= '0' && c <= '9') {
314 found = true;
315 *value = src.mid(iPos, pos1 - iPos).toInt();
316 }
317 }
318 }
319 return found;
320}
321
322bool YENCEncoded::parse()
323{
324 int currentPos = 0;
325 bool success = true;
326 while (success) {
327 int beginPos = currentPos;
328 int yencStart = currentPos;
329 bool containsPart = false;
330 QByteArray fileName;
331
332 if ((beginPos = m_src.indexOf("=ybegin ", currentPos)) > -1 &&
333 (beginPos == 0 || m_src.at(beginPos - 1) == '\n')) {
334 yencStart = m_src.indexOf('\n', beginPos);
335 if (yencStart == -1) { // no more line breaks found, give up
336 success = false;
337 break;
338 } else {
339 yencStart++;
340 if (m_src.indexOf("=ypart", yencStart) == yencStart) {
341 containsPart = true;
342 yencStart = m_src.indexOf('\n', yencStart);
343 if (yencStart == -1) {
344 success = false;
345 break;
346 }
347 yencStart++;
348 }
349 }
350 // Try to identify yenc meta data
351
352 // Filenames can contain any embedded chars until end of line
353 QByteArray meta = m_src.mid(beginPos, yencStart - beginPos);
354 int namePos = meta.indexOf("name=");
355 if (namePos == -1) {
356 success = false;
357 break;
358 }
359 int eolPos = meta.indexOf('\r', namePos);
360 if (eolPos == -1) {
361 eolPos = meta.indexOf('\n', namePos);
362 }
363 if (eolPos == -1) {
364 success = false;
365 break;
366 }
367 fileName = meta.mid(namePos + 5, eolPos - (namePos + 5));
368
369 // Other metadata is integer
370 int yencLine;
371 if (!yencMeta(meta, "line", &yencLine)) {
372 success = false;
373 break;
374 }
375 int yencSize;
376 if (!yencMeta(meta, "size", &yencSize)) {
377 success = false;
378 break;
379 }
380
381 int partBegin;
382 int partEnd;
383 if (containsPart) {
384 if (!yencMeta(meta, "part", &m_partNr)) {
385 success = false;
386 break;
387 }
388 if (!yencMeta(meta, "begin", &partBegin) ||
389 !yencMeta(meta, "end", &partEnd)) {
390 success = false;
391 break;
392 }
393 if (!yencMeta(meta, "total", &m_totalNr)) {
394 m_totalNr = m_partNr + 1;
395 }
396 if (yencSize == partEnd - partBegin + 1) {
397 m_totalNr = 1;
398 } else {
399 yencSize = partEnd - partBegin + 1;
400 }
401 }
402
403 // We have a valid yenc header; now we extract the binary data
404 int totalSize = 0;
405 int pos = yencStart;
406 int len = m_src.length();
407 bool lineStart = true;
408 int lineLength = 0;
409 bool containsEnd = false;
410 QByteArray binary;
411 binary.resize(yencSize);
412 while (pos < len) {
413 int ch = m_src.at(pos);
414 if (ch < 0) {
415 ch += 256;
416 }
417 if (ch == '\r') {
418 if (lineLength != yencLine && totalSize != yencSize) {
419 break;
420 }
421 pos++;
422 } else if (ch == '\n') {
423 lineStart = true;
424 lineLength = 0;
425 pos++;
426 } else {
427 if (ch == '=') {
428 if (pos + 1 < len) {
429 ch = m_src.at(pos + 1);
430 if (lineStart && ch == 'y') {
431 containsEnd = true;
432 break;
433 }
434 pos += 2;
435 ch -= 64 + 42;
436 if (ch < 0) {
437 ch += 256;
438 }
439 if (totalSize >= yencSize) {
440 break;
441 }
442 binary[totalSize++] = ch;
443 lineLength++;
444 } else {
445 break;
446 }
447 } else {
448 ch -= 42;
449 if (ch < 0) {
450 ch += 256;
451 }
452 if (totalSize >= yencSize) {
453 break;
454 }
455 binary[totalSize++] = ch;
456 lineLength++;
457 pos++;
458 }
459 lineStart = false;
460 }
461 }
462
463 if (!containsEnd) {
464 success = false;
465 break;
466 }
467 if (totalSize != yencSize) {
468 success = false;
469 break;
470 }
471
472 // pos now points to =yend; get end data
473 eolPos = m_src.indexOf('\n', pos);
474 if (eolPos == -1) {
475 success = false;
476 break;
477 }
478 meta = m_src.mid(pos, eolPos - pos);
479 if (!yencMeta(meta, "size", &totalSize)) {
480 success = false;
481 break;
482 }
483 if (totalSize != yencSize) {
484 success = false;
485 break;
486 }
487
488 m_filenames.append(fileName);
489 m_mimeTypes.append(guessMimeType(fileName));
490 m_bins.append(binary);
491
492 //everything before "begin" is text
493 if (beginPos > 0) {
494 m_text.append(m_src.mid(currentPos, beginPos - currentPos));
495 }
496 currentPos = eolPos + 1;
497
498 } else {
499 success = false;
500 }
501 }
502
503 // append trailing text part of the article
504 m_text.append(m_src.right(m_src.length() - currentPos));
505
506 return !m_bins.isEmpty();
507}
508
509} // namespace Parser
510
511} // namespace KMime
KCALUTILS_EXPORT QString mimeType()
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
const QList< QKeySequence > & next()
QString name(StandardShortcut id)
QByteArray & append(QByteArrayView data)
char at(qsizetype i) const const
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
bool isEmpty() const const
qsizetype lastIndexOf(QByteArrayView bv) const const
QByteArray left(qsizetype len) const const
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
void resize(qsizetype newSize, char c)
QByteArray right(qsizetype len) const const
int toInt(bool *ok, int base) const const
QByteArray toUpper() const const
QString fromLatin1(QByteArrayView str)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:20:12 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.