KMime

headerparsing.cpp
1/* -*- c++ -*-
2 kmime_header_parsing.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org>
6
7 SPDX-License-Identifier: LGPL-2.0-or-later
8*/
9
10#include "headerparsing.h"
11#include "headerparsing_p.h"
12
13#include "headerfactory_p.h"
14#include "headers.h"
15#include "headers_p.h"
16#include "util.h"
17#include "util_p.h"
18#include "codecs_p.h"
19#include "kmime_debug.h"
20#include "warning_p.h"
21
22#include <KCodecs>
23
24#include <QStringDecoder>
25#include <QTimeZone>
26
27#include <cassert>
28#include <cctype> // for isdigit
29
30using namespace KMime;
31using namespace KMime::Types;
32
33namespace KMime
34{
35
36 namespace Types
37 {
38 // Optimization to avoid allocating QStrings when the value isn't encoded
39 struct KMIME_EXPORT QStringOrQPair {
40 QString qstring;
41 QByteArrayView view;
42 };
43 } // namespace Types
44
45namespace HeaderParsing
46{
47
48// parse the encoded-word (scursor points to after the initial '=')
49bool parseEncodedWord(const char *&scursor, const char *const send,
50 QString &result, QByteArray &language,
51 QByteArray &usedCS, const QByteArray &defaultCS)
52{
53 // make sure the caller already did a bit of the work.
54 assert(*(scursor - 1) == '=');
55
56 //
57 // STEP 1:
58 // scan for the charset/language portion of the encoded-word
59 //
60
61 char ch = *scursor++;
62
63 if (ch != '?') {
64 // qCDebug(KMIME_LOG) << "first";
65 //KMIME_WARN_PREMATURE_END_OF( EncodedWord );
66 return false;
67 }
68
69 // remember start of charset (i.e. just after the initial "=?") and
70 // language (just after the first '*') fields:
71 const char *charsetStart = scursor;
72 const char *languageStart = nullptr;
73
74 // find delimiting '?' (and the '*' separating charset and language
75 // tags, if any):
76 for (; scursor != send ; scursor++) {
77 if (*scursor == '?') {
78 break;
79 } else if (*scursor == '*' && languageStart == nullptr) {
80 languageStart = scursor + 1;
81 }
82 }
83
84 // not found? can't be an encoded-word!
85 if (scursor == send || *scursor != '?') {
86 // qCDebug(KMIME_LOG) << "second";
87 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
88 return false;
89 }
90
91 // extract the language information, if any (if languageStart is 0,
92 // language will be null, too):
93 QByteArray maybeLanguage(languageStart, scursor - languageStart);
94 // extract charset information (keep in mind: the size given to the
95 // ctor is one off due to the \0 terminator):
96 QByteArray maybeCharset(charsetStart,
97 (languageStart ? languageStart - 1 : scursor) - charsetStart);
98
99 //
100 // STEP 2:
101 // scan for the encoding portion of the encoded-word
102 //
103
104 // remember start of encoding (just _after_ the second '?'):
105 scursor++;
106 const char *encodingStart = scursor;
107
108 // find next '?' (ending the encoding tag):
109 for (; scursor != send ; scursor++) {
110 if (*scursor == '?') {
111 break;
112 }
113 }
114
115 // not found? Can't be an encoded-word!
116 if (scursor == send || *scursor != '?') {
117 // qCDebug(KMIME_LOG) << "third";
118 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
119 return false;
120 }
121
122 // extract the encoding information:
123 QByteArray maybeEncoding(encodingStart, scursor - encodingStart);
124
125 // qCDebug(KMIME_LOG) << "parseEncodedWord: found charset == \"" << maybeCharset
126 // << "\"; language == \"" << maybeLanguage
127 // << "\"; encoding == \"" << maybeEncoding << "\"";
128
129 //
130 // STEP 3:
131 // scan for encoded-text portion of encoded-word
132 //
133
134 // remember start of encoded-text (just after the third '?'):
135 scursor++;
136 const char *encodedTextStart = scursor;
137
138 // find the '?=' sequence (ending the encoded-text):
139 for (; scursor != send ; scursor++) {
140 if (*scursor == '?') {
141 if (scursor + 1 != send) {
142 if (*(scursor + 1) != '=') { // We expect a '=' after the '?', but we got something else; ignore
143 KMIME_WARN << "Stray '?' in q-encoded word, ignoring this.";
144 continue;
145 } else { // yep, found a '?=' sequence
146 scursor += 2;
147 break;
148 }
149 } else { // The '?' is the last char, but we need a '=' after it!
150 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
151 return false;
152 }
153 }
154 }
155
156 if (*(scursor - 2) != '?' || *(scursor - 1) != '=' ||
157 scursor < encodedTextStart + 2) {
158 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
159 return false;
160 }
161
162 // set end sentinel for encoded-text:
163 const char *const encodedTextEnd = scursor - 2;
164
165 //
166 // STEP 4:
167 // setup decoders for the transfer encoding and the charset
168 //
169
170 // try if there's a codec for the encoding found:
171 KCodecs::Codec *codec = KCodecs::Codec::codecForName(maybeEncoding);
172 if (!codec) {
173 KMIME_WARN_UNKNOWN(Encoding, maybeEncoding);
174 return false;
175 }
176
177 // get an instance of a corresponding decoder:
178 KCodecs::Decoder *dec = codec->makeDecoder();
179 assert(dec);
180
181 // try if there's a (text)codec for the charset found:
182 QStringDecoder textCodec;
183 if (maybeCharset.isEmpty()) {
184 textCodec = QStringDecoder(defaultCS.constData());
185 if (!textCodec.isValid()) {
187 }
188 usedCS = cachedCharset(defaultCS);
189 } else {
190 textCodec = QStringDecoder(maybeCharset.constData());
191 if (textCodec.isValid()) { //no suitable codec found => use default charset
192 usedCS = cachedCharset(defaultCS);
193 } else {
195 usedCS = cachedCharset(maybeCharset);
196 }
197 }
198
199 if (!textCodec.isValid()) {
200 KMIME_WARN_UNKNOWN(Charset, maybeCharset);
201 delete dec;
202 return false;
203 };
204
205 // qCDebug(KMIME_LOG) << "mimeName(): \"" << textCodec->name() << "\"";
206
207 // allocate a temporary buffer to store the 8bit text:
208 const auto encodedTextLength = encodedTextEnd - encodedTextStart;
209 QByteArray buffer;
210 buffer.resize(codec->maxDecodedSizeFor(encodedTextLength));
211 char *bbegin = buffer.data();
212 char *bend = bbegin + buffer.length();
213
214 //
215 // STEP 5:
216 // do the actual decoding
217 //
218
219 if (!dec->decode(encodedTextStart, encodedTextEnd, bbegin, bend)) {
220 KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor("
221 << encodedTextLength << ")\nresult may be truncated";
222 }
223
224 result = textCodec.decode(QByteArrayView(buffer.data(), bbegin - buffer.data()));
225
226 // qCDebug(KMIME_LOG) << "result now: \"" << result << "\"";
227 // cleanup:
228 delete dec;
229 language = maybeLanguage;
230
231 return true;
232}
233
234static inline void eatWhiteSpace(const char *&scursor, const char *const send)
235{
236 while (scursor != send &&
237 (*scursor == ' ' || *scursor == '\n' ||
238 *scursor == '\t' || *scursor == '\r')) {
239 scursor++;
240 }
241}
242
243bool parseAtom(const char*&scursor, const char *const send,
244 QByteArrayView &result, bool allow8Bit)
245{
246 bool success = false;
247 const char *start = scursor;
248
249 while (scursor != send) {
250 signed char ch = *scursor++;
251 if (ch > 0 && isAText(ch)) {
252 // AText: OK
253 success = true;
254 } else if (allow8Bit && ch < 0) {
255 // 8bit char: not OK, but be tolerant.
256 KMIME_WARN_8BIT(ch);
257 success = true;
258 } else {
259 // CTL or special - marking the end of the atom:
260 // re-set sursor to point to the offending
261 // char and return:
262 scursor--;
263 break;
264 }
265 }
266 result = QByteArrayView(start, scursor - start);
267 return success;
268}
269
270bool parseToken(const char*&scursor, const char *const send,
271 QByteArrayView &result, ParseTokenFlags flags)
272{
273 bool success = false;
274 const char *start = scursor;
275
276 while (scursor != send) {
277 signed char ch = *scursor++;
278 if (ch > 0 && isTText(ch)) {
279 // TText: OK
280 success = true;
281 } else if ((flags & ParseTokenAllow8Bit) && ch < 0) {
282 // 8bit char: not OK, but be tolerant.
283 KMIME_WARN_8BIT(ch);
284 success = true;
285 } else if ((flags & ParseTokenRelaxedTText) && ch == '/') {
286 success = true;
287 } else {
288 // CTL or tspecial - marking the end of the atom:
289 // re-set sursor to point to the offending
290 // char and return:
291 scursor--;
292 break;
293 }
294 }
295 result = QByteArrayView(start, scursor - start);
296 return success;
297}
298
299#define READ_ch_OR_FAIL if ( scursor == send ) { \
300 KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
301 return false; \
302 } else { \
303 ch = *scursor++; \
304 }
305
306// known issues:
307//
308// - doesn't handle quoted CRLF
309
310bool parseGenericQuotedString(const char *&scursor, const char *const send,
311 QString &result, bool isCRLF,
312 const char openChar, const char closeChar)
313{
314 // We are in a quoted-string or domain-literal or comment and the
315 // cursor points to the first char after the openChar.
316 // We will apply unfolding and quoted-pair removal.
317 // We return when we either encounter the end or unescaped openChar
318 // or closeChar.
319 assert(*(scursor - 1) == openChar || *(scursor - 1) == closeChar);
320
321 while (scursor != send) {
322 char ch = *scursor++;
323
324 if (ch == closeChar || ch == openChar) {
325 // end of quoted-string or another opening char:
326 // let caller decide what to do.
327 return true;
328 }
329
330 switch (ch) {
331 case '\\': // quoted-pair
332 // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
333 READ_ch_OR_FAIL;
334 KMIME_WARN_IF_8BIT(ch);
335 result += QLatin1Char(ch);
336 break;
337 case '\r':
338 // ###
339 // The case of lonely '\r' is easy to solve, as they're
340 // not part of Unix Line-ending conventions.
341 // But I see a problem if we are given Unix-native
342 // line-ending-mails, where we cannot determine anymore
343 // whether a given '\n' was part of a CRLF or was occurring
344 // on its own.
345 READ_ch_OR_FAIL;
346 if (ch != '\n') {
347 // CR on its own...
348 KMIME_WARN_LONE(CR);
349 result += QLatin1Char('\r');
350 scursor--; // points to after the '\r' again
351 } else {
352 // CRLF encountered.
353 // lookahead: check for folding
354 READ_ch_OR_FAIL;
355 if (ch == ' ' || ch == '\t') {
356 // correct folding;
357 // position cursor behind the CRLF WSP (unfolding)
358 // and add the WSP to the result
359 result += QLatin1Char(ch);
360 } else {
361 // this is the "shouldn't happen"-case. There is a CRLF
362 // inside a quoted-string without it being part of FWS.
363 // We take it verbatim.
364 KMIME_WARN_NON_FOLDING(CRLF);
365 result += QLatin1StringView("\r\n");
366 // the cursor is decremented again, we need not
367 // duplicate the whole switch here. "ch" could've been
368 // everything (incl. openChar or closeChar).
369 scursor--;
370 }
371 }
372 break;
373 case '\n':
374 // Note: CRLF has been handled above already!
375 // ### LF needs special treatment, depending on whether isCRLF
376 // is true (we can be sure a lonely '\n' was meant this way) or
377 // false ('\n' alone could have meant LF or CRLF in the original
378 // message. This parser assumes CRLF iff the LF is followed by
379 // either WSP (folding) or NULL (premature end of quoted-string;
380 // Should be fixed, since NULL is allowed as per rfc822).
381 READ_ch_OR_FAIL;
382 if (!isCRLF && (ch == ' ' || ch == '\t')) {
383 // folding
384 // correct folding
385 result += QLatin1Char(ch);
386 } else {
387 // non-folding
388 KMIME_WARN_LONE(LF);
389 result += QLatin1Char('\n');
390 // pos is decremented, so's we need not duplicate the whole
391 // switch here. ch could've been everything (incl. <">, "\").
392 scursor--;
393 }
394 break;
395 case '=': {
396 // ### Work around broken clients that send encoded words in quoted-strings
397 // For example, older KMail versions.
398 if (scursor == send) {
399 break;
400 }
401
402 const char *oldscursor = scursor;
403 QString tmp;
404 QByteArray lang;
405 QByteArray charset;
406 if (*scursor++ == '?') {
407 --scursor;
408 if (parseEncodedWord(scursor, send, tmp, lang, charset)) {
409 result += tmp;
410 //qDebug() << " tmp " << tmp;
411 if (scursor == send) {
412 break;
413 } else if (*scursor++ == ' ') { //Workaround Bug 362650 thunderbird add space for each new line
414 if (scursor == send) {
415 --scursor;
416 break;
417 } else if (*scursor++ == '=') {
418 if (scursor == send) {
419 --scursor;
420 --scursor;
421 break;
422 } else if (*scursor++ == '?') {
423 --scursor;
424 --scursor;
425 break;
426 }
427 } else {
428 --scursor;
429 --scursor;
430 }
431 } else {
432 --scursor;
433 }
434
435 break;
436 } else {
437 scursor = oldscursor;
438 }
439 } else {
440 scursor = oldscursor;
441 }
442 // fall through
443 [[fallthrough]];
444 }
445 default:
446 KMIME_WARN_IF_8BIT(ch);
447 result += QLatin1Char(ch);
448 }
449 }
450
451 return false;
452}
453
454// known issues:
455//
456// - doesn't handle encoded-word inside comments.
457
458bool parseComment(const char *&scursor, const char *const send,
459 QString &result, bool isCRLF, bool reallySave)
460{
461 int commentNestingDepth = 1;
462 const char *afterLastClosingParenPos = nullptr;
463 QString maybeCmnt;
464 const char *oldscursor = scursor;
465
466 assert(*(scursor - 1) == '(');
467
468 while (commentNestingDepth) {
469 QString cmntPart;
470 if (parseGenericQuotedString(scursor, send, cmntPart, isCRLF, '(', ')')) {
471 assert(*(scursor - 1) == ')' || *(scursor - 1) == '(');
472 // see the kdoc for the above function for the possible conditions
473 // we have to check:
474 switch (*(scursor - 1)) {
475 case ')':
476 if (reallySave) {
477 // add the chunk that's now surely inside the comment.
478 result += maybeCmnt;
479 result += cmntPart;
480 if (commentNestingDepth > 1) {
481 // don't add the outermost ')'...
482 result += QLatin1Char(')');
483 }
484 maybeCmnt.clear();
485 }
486 afterLastClosingParenPos = scursor;
487 --commentNestingDepth;
488 break;
489 case '(':
490 if (reallySave) {
491 // don't add to "result" yet, because we might find that we
492 // are already outside the (broken) comment...
493 maybeCmnt += cmntPart;
494 maybeCmnt += QLatin1Char('(');
495 }
496 ++commentNestingDepth;
497 break;
498 default: assert(0);
499 } // switch
500 } else {
501 // !parseGenericQuotedString, i.e. premature end
502 if (afterLastClosingParenPos) {
503 scursor = afterLastClosingParenPos;
504 } else {
505 scursor = oldscursor;
506 }
507 return false;
508 }
509 } // while
510
511 return true;
512}
513
514// known issues: none.
515
516bool parsePhrase(const char *&scursor, const char *const send,
517 QString &result, bool isCRLF)
518{
519 enum {
520 None, Phrase, Atom, EncodedWord, QuotedString
521 } found = None;
522
523 QString tmp;
524 QByteArray lang;
525 QByteArray charset;
526 QByteArrayView tmpAtom;
527 const char *successfullyParsed = nullptr;
528 // only used by the encoded-word branch
529 const char *oldscursor;
530 // used to suppress whitespace between adjacent encoded-words
531 // (rfc2047, 6.2):
532 bool lastWasEncodedWord = false;
533
534 while (scursor != send) {
535 char ch = *scursor++;
536 switch (ch) {
537 case '.': // broken, but allow for intorop's sake
538 if (found == None) {
539 --scursor;
540 return false;
541 } else {
542 if (scursor != send && (*scursor == ' ' || *scursor == '\t')) {
543 result += QLatin1StringView(". ");
544 } else {
545 result += QLatin1Char('.');
546 }
547 successfullyParsed = scursor;
548 }
549 break;
550 case '"': // quoted-string
551 tmp.clear();
552 if (parseGenericQuotedString(scursor, send, tmp, isCRLF, '"', '"')) {
553 successfullyParsed = scursor;
554 assert(*(scursor - 1) == '"');
555 switch (found) {
556 case None:
557 found = QuotedString;
558 break;
559 case Phrase:
560 case Atom:
561 case EncodedWord:
562 case QuotedString:
563 found = Phrase;
564 result += QLatin1Char(' '); // rfc822, 3.4.4
565 break;
566 default:
567 assert(0);
568 }
569 lastWasEncodedWord = false;
570 result += tmp;
571 } else {
572 // premature end of quoted string.
573 // What to do? Return leading '"' as special? Return as quoted-string?
574 // We do the latter if we already found something, else signal failure.
575 if (found == None) {
576 return false;
577 } else {
578 result += QLatin1Char(' '); // rfc822, 3.4.4
579 result += tmp;
580 return true;
581 }
582 }
583 break;
584 case '(': // comment
585 // parse it, but ignore content:
586 tmp.clear();
587 if (parseComment(scursor, send, tmp, isCRLF,
588 false /*don't bother with the content*/)) {
589 successfullyParsed = scursor;
590 lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
591 } else {
592 if (found == None) {
593 return false;
594 } else {
595 scursor = successfullyParsed;
596 return true;
597 }
598 }
599 break;
600 case '=': // encoded-word
601 tmp.clear();
602 oldscursor = scursor;
603 lang.clear();
604 charset.clear();
605 if (parseEncodedWord(scursor, send, tmp, lang, charset)) {
606 successfullyParsed = scursor;
607 switch (found) {
608 case None:
609 found = EncodedWord;
610 break;
611 case Phrase:
612 case EncodedWord:
613 case Atom:
614 case QuotedString:
615 if (!lastWasEncodedWord) {
616 result += QLatin1Char(' '); // rfc822, 3.4.4
617 }
618 found = Phrase;
619 break;
620 default: assert(0);
621 }
622 lastWasEncodedWord = true;
623 result += tmp;
624 break;
625 } else {
626 // parse as atom:
627 scursor = oldscursor;
628 }
629 [[fallthrough]];
630 // fall though...
631
632 default: //atom
633 scursor--;
634 if (parseAtom(scursor, send, tmpAtom, true /* allow 8bit */)) {
635 successfullyParsed = scursor;
636 switch (found) {
637 case None:
638 found = Atom;
639 break;
640 case Phrase:
641 case Atom:
642 case EncodedWord:
643 case QuotedString:
644 found = Phrase;
645 result += QLatin1Char(' '); // rfc822, 3.4.4
646 break;
647 default:
648 assert(0);
649 }
650 lastWasEncodedWord = false;
651 result += QLatin1StringView(tmpAtom);
652 } else {
653 if (found == None) {
654 return false;
655 } else {
656 scursor = successfullyParsed;
657 return true;
658 }
659 }
660 }
661 eatWhiteSpace(scursor, send);
662 }
663
664 return found != None;
665}
666
667bool parseDotAtom(const char *&scursor, const char *const send,
668 QByteArray &result, bool isCRLF)
669{
670 eatCFWS(scursor, send, isCRLF);
671
672 // always points to just after the last atom parsed:
673 const char *successfullyParsed;
674
675 QByteArrayView maybeAtom;
676 if (!parseAtom(scursor, send, maybeAtom, false /* no 8bit */)) {
677 return false;
678 }
679 result += maybeAtom;
680 successfullyParsed = scursor;
681
682 while (scursor != send) {
683
684 // end of header or no '.' -> return
685 if (scursor == send || *scursor != '.') {
686 return true;
687 }
688 scursor++; // eat '.'
689
690 if (scursor == send || !isAText(*scursor)) {
691 // end of header or no AText, but this time following a '.'!:
692 // reset cursor to just after last successfully parsed char and
693 // return:
694 scursor = successfullyParsed;
695 return true;
696 }
697
698 // try to parse the next atom:
699 maybeAtom = {};
700 if (!parseAtom(scursor, send, maybeAtom, false /*no 8bit*/)) {
701 scursor = successfullyParsed;
702 return true;
703 }
704
705 result += '.';
706 result += maybeAtom;
707 successfullyParsed = scursor;
708 }
709
710 scursor = successfullyParsed;
711 return true;
712}
713
714void eatCFWS(const char *&scursor, const char *const send, bool isCRLF)
715{
716 QString dummy;
717
718 while (scursor != send) {
719 const char *oldscursor = scursor;
720
721 char ch = *scursor++;
722
723 switch (ch) {
724 case ' ':
725 case '\t': // whitespace
726 case '\r':
727 case '\n': // folding
728 continue;
729
730 case '(': // comment
731 if (parseComment(scursor, send, dummy, isCRLF, false /*don't save*/)) {
732 continue;
733 }
734 scursor = oldscursor;
735 return;
736
737 default:
738 scursor = oldscursor;
739 return;
740 }
741 }
742}
743
744bool parseDomain(const char *&scursor, const char *const send,
745 QString &result, bool isCRLF)
746{
747 eatCFWS(scursor, send, isCRLF);
748 if (scursor == send) {
749 return false;
750 }
751
752 // domain := dot-atom / domain-literal / atom *("." atom)
753 //
754 // equivalent to:
755 // domain = dot-atom / domain-literal,
756 // since parseDotAtom does allow CFWS between atoms and dots
757
758 if (*scursor == '[') {
759 // domain-literal:
760 QString maybeDomainLiteral;
761 // eat '[':
762 scursor++;
763 while (parseGenericQuotedString(scursor, send, maybeDomainLiteral,
764 isCRLF, '[', ']')) {
765 if (scursor == send) {
766 // end of header: check for closing ']':
767 if (*(scursor - 1) == ']') {
768 // OK, last char was ']':
769 result = maybeDomainLiteral;
770 return true;
771 } else {
772 // not OK, domain-literal wasn't closed:
773 return false;
774 }
775 }
776 // we hit openChar in parseGenericQuotedString.
777 // include it in maybeDomainLiteral and keep on parsing:
778 if (*(scursor - 1) == '[') {
779 maybeDomainLiteral += QLatin1Char('[');
780 continue;
781 }
782 // OK, real end of domain-literal:
783 result = maybeDomainLiteral;
784 return true;
785 }
786 } else {
787 // dot-atom:
788 QByteArray maybeDotAtom;
789 if (parseDotAtom(scursor, send, maybeDotAtom, isCRLF)) {
790 // Domain may end with '.', if so preserve it'
791 if (scursor != send && *scursor == '.') {
792 maybeDotAtom += '.';
793 scursor++;
794 }
795 result = QString::fromLatin1(maybeDotAtom);
796 return true;
797 }
798 }
799 return false;
800}
801
802bool parseObsRoute(const char *&scursor, const char *const send,
803 QStringList &result, bool isCRLF, bool save)
804{
805 while (scursor != send) {
806 eatCFWS(scursor, send, isCRLF);
807 if (scursor == send) {
808 return false;
809 }
810
811 // empty entry:
812 if (*scursor == ',') {
813 scursor++;
814 if (save) {
815 result.append(QString());
816 }
817 continue;
818 }
819
820 // empty entry ending the list:
821 if (*scursor == ':') {
822 scursor++;
823 if (save) {
824 result.append(QString());
825 }
826 return true;
827 }
828
829 // each non-empty entry must begin with '@':
830 if (*scursor != '@') {
831 return false;
832 } else {
833 scursor++;
834 }
835
836 QString maybeDomain;
837 if (!parseDomain(scursor, send, maybeDomain, isCRLF)) {
838 return false;
839 }
840 if (save) {
841 result.append(maybeDomain);
842 }
843
844 // eat the following (optional) comma:
845 eatCFWS(scursor, send, isCRLF);
846 if (scursor == send) {
847 return false;
848 }
849 if (*scursor == ':') {
850 scursor++;
851 return true;
852 }
853 if (*scursor == ',') {
854 scursor++;
855 }
856 }
857
858 return false;
859}
860
861bool parseAddrSpec(const char *&scursor, const char *const send,
862 AddrSpec &result, bool isCRLF)
863{
864 //
865 // STEP 1:
866 // local-part := dot-atom / quoted-string / word *("." word)
867 //
868 // this is equivalent to:
869 // local-part := word *("." word)
870
871 QString maybeLocalPart;
872 QString tmp;
873 QByteArrayView tmpAtom;
874
875 while (scursor != send) {
876 // first, eat any whitespace
877 eatCFWS(scursor, send, isCRLF);
878
879 char ch = *scursor++;
880 switch (ch) {
881 case '.': // dot
882 maybeLocalPart += QLatin1Char('.');
883 break;
884
885 case '@':
886 goto SAW_AT_SIGN;
887 break;
888
889 case '"': // quoted-string
890 tmp.clear();
891 if (parseGenericQuotedString(scursor, send, tmp, isCRLF, '"', '"')) {
892 maybeLocalPart += tmp;
893 } else {
894 return false;
895 }
896 break;
897
898 default: // atom
899 scursor--; // re-set scursor to point to ch again
900 if (parseAtom(scursor, send, tmpAtom, false /* no 8bit */)) {
901 maybeLocalPart += QLatin1StringView(tmpAtom);
902 } else {
903 return false; // parseAtom can only fail if the first char is non-atext.
904 }
905 break;
906 }
907 }
908
909 return false;
910
911 //
912 // STEP 2:
913 // domain
914 //
915
916SAW_AT_SIGN:
917
918 assert(*(scursor - 1) == '@');
919
920 QString maybeDomain;
921 if (!parseDomain(scursor, send, maybeDomain, isCRLF)) {
922 return false;
923 }
924
925 result.localPart = maybeLocalPart;
926 result.domain = maybeDomain;
927
928 return true;
929}
930
931bool parseAngleAddr(const char *&scursor, const char *const send,
932 AddrSpec &result, bool isCRLF)
933{
934 // first, we need an opening angle bracket:
935 eatCFWS(scursor, send, isCRLF);
936 if (scursor == send || *scursor != '<') {
937 return false;
938 }
939 scursor++; // eat '<'
940
941 eatCFWS(scursor, send, isCRLF);
942 if (scursor == send) {
943 return false;
944 }
945
946 if (*scursor == '@' || *scursor == ',') {
947 // obs-route: parse, but ignore:
948 KMIME_WARN << "obsolete source route found! ignoring.";
949 QStringList dummy;
950 if (!parseObsRoute(scursor, send, dummy,
951 isCRLF, false /* don't save */)) {
952 return false;
953 }
954 // angle-addr isn't complete until after the '>':
955 if (scursor == send) {
956 return false;
957 }
958 }
959
960 // parse addr-spec:
961 AddrSpec maybeAddrSpec;
962 if (!parseAddrSpec(scursor, send, maybeAddrSpec, isCRLF)) {
963 return false;
964 }
965
966 eatCFWS(scursor, send, isCRLF);
967 if (scursor == send || *scursor != '>') {
968 return false;
969 }
970 scursor++;
971
972 result = maybeAddrSpec;
973 return true;
974
975}
976
977static QString stripQuotes(const QString &input)
978{
979 const QLatin1Char quotes('"');
980 if (input.startsWith(quotes) && input.endsWith(quotes)) {
981 QString stripped(input.mid(1, input.size() - 2));
982 return stripped;
983 } else {
984 return input;
985 }
986}
987
988bool parseMailbox(const char *&scursor, const char *const send,
989 Mailbox &result, bool isCRLF)
990{
991 eatCFWS(scursor, send, isCRLF);
992 if (scursor == send) {
993 return false;
994 }
995
996 AddrSpec maybeAddrSpec;
997 QString maybeDisplayName;
998
999 // first, try if it's a vanilla addr-spec:
1000 const char *oldscursor = scursor;
1001 if (parseAddrSpec(scursor, send, maybeAddrSpec, isCRLF)) {
1002 result.setAddress(maybeAddrSpec);
1003 // check for the obsolete form of display-name (as comment):
1004 eatWhiteSpace(scursor, send);
1005 if (scursor != send && *scursor == '(') {
1006 scursor++;
1007 if (!parseComment(scursor, send, maybeDisplayName, isCRLF, true /*keep*/)) {
1008 return false;
1009 }
1010 }
1011 result.setName(stripQuotes(maybeDisplayName));
1012 return true;
1013 }
1014 scursor = oldscursor;
1015
1016 // second, see if there's a display-name:
1017 if (!parsePhrase(scursor, send, maybeDisplayName, isCRLF)) {
1018 // failed: reset cursor, note absent display-name
1019 maybeDisplayName.clear();
1020 scursor = oldscursor;
1021 } else {
1022 // succeeded: eat CFWS
1023 eatCFWS(scursor, send, isCRLF);
1024 if (scursor == send) {
1025 return false;
1026 }
1027 }
1028
1029 // third, parse the angle-addr:
1030 if (!parseAngleAddr(scursor, send, maybeAddrSpec, isCRLF)) {
1031 return false;
1032 }
1033
1034 if (maybeDisplayName.isNull()) {
1035 // check for the obsolete form of display-name (as comment):
1036 eatWhiteSpace(scursor, send);
1037 if (scursor != send && *scursor == '(') {
1038 scursor++;
1039 if (!parseComment(scursor, send, maybeDisplayName, isCRLF, true /*keep*/)) {
1040 return false;
1041 }
1042 }
1043 }
1044
1045 result.setName(stripQuotes(maybeDisplayName));
1046 result.setAddress(maybeAddrSpec);
1047 return true;
1048}
1049
1050bool parseGroup(const char *&scursor, const char *const send,
1051 Address &result, bool isCRLF)
1052{
1053 // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
1054 //
1055 // equivalent to:
1056 // group := display-name ":" [ obs-mbox-list ] ";"
1057
1058 eatCFWS(scursor, send, isCRLF);
1059 if (scursor == send) {
1060 return false;
1061 }
1062
1063 // get display-name:
1064 QString maybeDisplayName;
1065 if (!parsePhrase(scursor, send, maybeDisplayName, isCRLF)) {
1066 return false;
1067 }
1068
1069 // get ":":
1070 eatCFWS(scursor, send, isCRLF);
1071 if (scursor == send || *scursor != ':') {
1072 return false;
1073 }
1074
1075 // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that
1076 // automatically calls removeBidiControlChars
1077 result.displayName = removeBidiControlChars(maybeDisplayName);
1078
1079 // get obs-mbox-list (may contain empty entries):
1080 scursor++;
1081 while (scursor != send) {
1082 eatCFWS(scursor, send, isCRLF);
1083 if (scursor == send) {
1084 return false;
1085 }
1086
1087 // empty entry:
1088 if (*scursor == ',') {
1089 scursor++;
1090 continue;
1091 }
1092
1093 // empty entry ending the list:
1094 if (*scursor == ';') {
1095 scursor++;
1096 return true;
1097 }
1098
1099 Mailbox maybeMailbox;
1100 if (!parseMailbox(scursor, send, maybeMailbox, isCRLF)) {
1101 return false;
1102 }
1103 result.mailboxList.append(maybeMailbox);
1104
1105 eatCFWS(scursor, send, isCRLF);
1106 // premature end:
1107 if (scursor == send) {
1108 return false;
1109 }
1110 // regular end of the list:
1111 if (*scursor == ';') {
1112 scursor++;
1113 return true;
1114 }
1115 // eat regular list entry separator:
1116 if (*scursor == ',') {
1117 scursor++;
1118 }
1119 }
1120 return false;
1121}
1122
1123bool parseAddress(const char *&scursor, const char *const send,
1124 Address &result, bool isCRLF)
1125{
1126 // address := mailbox / group
1127
1128 eatCFWS(scursor, send, isCRLF);
1129 if (scursor == send) {
1130 return false;
1131 }
1132
1133 // first try if it's a single mailbox:
1134 Mailbox maybeMailbox;
1135 const char *oldscursor = scursor;
1136 if (parseMailbox(scursor, send, maybeMailbox, isCRLF)) {
1137 // yes, it is:
1138 result.displayName.clear();
1139 result.mailboxList.append(maybeMailbox);
1140 return true;
1141 }
1142 scursor = oldscursor;
1143
1144 Address maybeAddress;
1145
1146 // no, it's not a single mailbox. Try if it's a group:
1147 if (!parseGroup(scursor, send, maybeAddress, isCRLF)) {
1148 return false;
1149 }
1150
1151 result = maybeAddress;
1152 return true;
1153}
1154
1155bool parseAddressList(const char *&scursor, const char *const send,
1156 AddressList &result, bool isCRLF)
1157{
1158 while (scursor != send) {
1159 eatCFWS(scursor, send, isCRLF);
1160 // end of header: this is OK.
1161 if (scursor == send) {
1162 return true;
1163 }
1164 // empty entry: ignore:
1165 if (*scursor == ',') {
1166 scursor++;
1167 continue;
1168 }
1169 // broken clients might use ';' as list delimiter, accept that as well
1170 if (*scursor == ';') {
1171 scursor++;
1172 continue;
1173 }
1174
1175 // parse one entry
1176 Address maybeAddress;
1177 if (!parseAddress(scursor, send, maybeAddress, isCRLF)) {
1178 return false;
1179 }
1180 result.append(maybeAddress);
1181
1182 eatCFWS(scursor, send, isCRLF);
1183 // end of header: this is OK.
1184 if (scursor == send) {
1185 return true;
1186 }
1187 // comma separating entries: eat it.
1188 if (*scursor == ',') {
1189 scursor++;
1190 }
1191 }
1192 return true;
1193}
1194
1195static bool parseParameter(const char *&scursor, const char *const send,
1196 QPair<QByteArray, QStringOrQPair> &result, bool isCRLF)
1197{
1198 // parameter = regular-parameter / extended-parameter
1199 // regular-parameter = regular-parameter-name "=" value
1200 // extended-parameter =
1201 // value = token / quoted-string
1202 //
1203 // note that rfc2231 handling is out of the scope of this function.
1204 // Therefore we return the attribute as QByteArray and the value as
1205 // (start,length) tuple if we see that the value is encoded
1206 // (trailing asterisk), for parseParameterList to decode...
1207
1208 eatCFWS(scursor, send, isCRLF);
1209 if (scursor == send) {
1210 return false;
1211 }
1212
1213 //
1214 // parse the parameter name:
1215 //
1216 QByteArrayView maybeAttribute;
1217 if (!parseToken(scursor, send, maybeAttribute, ParseTokenNoFlag)) {
1218 return false;
1219 }
1220
1221 eatCFWS(scursor, send, isCRLF);
1222 // premature end: not OK (haven't seen '=' yet).
1223 if (scursor == send || *scursor != '=') {
1224 return false;
1225 }
1226 scursor++; // eat '='
1227
1228 eatCFWS(scursor, send, isCRLF);
1229 if (scursor == send) {
1230 // don't choke on attribute=, meaning the value was omitted:
1231 if (maybeAttribute.endsWith('*')) {
1232 KMIME_WARN << "attribute ends with \"*\", but value is empty!"
1233 "Chopping away \"*\".";
1234 maybeAttribute.chop(1);
1235 }
1236 result = qMakePair(maybeAttribute.toByteArray().toLower(), QStringOrQPair());
1237 return true;
1238 }
1239
1240 const char *oldscursor = scursor;
1241
1242 //
1243 // parse the parameter value:
1244 //
1245 QStringOrQPair maybeValue;
1246 if (*scursor == '"') {
1247 // value is a quoted-string:
1248 scursor++;
1249 if (maybeAttribute.endsWith('*')) {
1250 // attributes ending with "*" designate extended-parameters,
1251 // which cannot have quoted-strings as values. So we remove the
1252 // trailing "*" to not confuse upper layers.
1253 KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
1254 "Chopping away \"*\".";
1255 maybeAttribute.chop(1);
1256 }
1257
1258 if (!parseGenericQuotedString(scursor, send, maybeValue.qstring, isCRLF)) {
1259 scursor = oldscursor;
1260 result = qMakePair(maybeAttribute.toByteArray().toLower(), QStringOrQPair());
1261 return false; // this case needs further processing by upper layers!!
1262 }
1263 } else {
1264 // value is a token:
1265 if (!parseToken(scursor, send, maybeValue.view, ParseTokenRelaxedTText)) {
1266 scursor = oldscursor;
1267 result = qMakePair(maybeAttribute.toByteArray().toLower(), QStringOrQPair());
1268 return false; // this case needs further processing by upper layers!!
1269 }
1270 }
1271
1272 result = qMakePair(maybeAttribute.toByteArray().toLower(), maybeValue);
1273 return true;
1274}
1275
1276static bool parseRawParameterList(const char *&scursor, const char *const send,
1277 std::map<QByteArray, QStringOrQPair> &result,
1278 bool isCRLF)
1279{
1280 // we use parseParameter() consecutively to obtain a map of raw
1281 // attributes to raw values. "Raw" here means that we don't do
1282 // rfc2231 decoding and concatenation. This is left to
1283 // parseParameterList(), which will call this function.
1284 //
1285 // The main reason for making this chunk of code a separate
1286 // (private) method is that we can deal with broken parameters
1287 // _here_ and leave the rfc2231 handling solely to
1288 // parseParameterList(), which will still be enough work.
1289 while (scursor != send) {
1290 eatCFWS(scursor, send, isCRLF);
1291 // empty entry ending the list: OK.
1292 if (scursor == send) {
1293 return true;
1294 }
1295 // empty list entry: ignore.
1296 if (*scursor == ';') {
1297 scursor++;
1298 continue;
1299 }
1300 QPair<QByteArray, QStringOrQPair> maybeParameter;
1301 if (!parseParameter(scursor, send, maybeParameter, isCRLF)) {
1302 // we need to do a bit of work if the attribute is not
1303 // NULL. These are the cases marked with "needs further
1304 // processing" in parseParameter(). Specifically, parsing of the
1305 // token or the quoted-string, which should represent the value,
1306 // failed. We take the easy way out and simply search for the
1307 // next ';' to start parsing again. (Another option would be to
1308 // take the text between '=' and ';' as value)
1309 if (maybeParameter.first.isNull()) {
1310 return false;
1311 }
1312 while (scursor != send) {
1313 if (*scursor++ == ';') {
1314 goto IS_SEMICOLON;
1315 }
1316 }
1317 // scursor == send case: end of list.
1318 return true;
1319 IS_SEMICOLON:
1320 // *scursor == ';' case: parse next entry.
1321 continue;
1322 }
1323 // successful parsing brings us here:
1324 result[maybeParameter.first] = maybeParameter.second;
1325
1326 eatCFWS(scursor, send, isCRLF);
1327 // end of header: ends list.
1328 if (scursor == send) {
1329 return true;
1330 }
1331 // regular separator: eat it.
1332 if (*scursor == ';') {
1333 scursor++;
1334 }
1335 }
1336 return true;
1337}
1338
1339static void decodeRFC2231Value(KCodecs::Codec *&rfc2231Codec,
1340 QStringDecoder &textcodec,
1341 bool isContinuation, QString &value,
1342 QByteArrayView &source, QByteArray &charset)
1343{
1344 //
1345 // parse the raw value into (charset,language,text):
1346 //
1347
1348 const char *decBegin = source.data();
1349 const char *decCursor = decBegin;
1350 const char *decEnd = decCursor + source.size();
1351
1352 if (!isContinuation) {
1353 // find the first single quote
1354 while (decCursor != decEnd) {
1355 if (*decCursor == '\'') {
1356 break;
1357 } else {
1358 decCursor++;
1359 }
1360 }
1361
1362 if (decCursor == decEnd) {
1363 // there wasn't a single single quote at all!
1364 // take the whole value to be in latin-1:
1365 KMIME_WARN << "No charset in extended-initial-value."
1366 "Assuming \"iso-8859-1\".";
1367 value += QLatin1StringView(decBegin, source.size());
1368 return;
1369 }
1370
1371 charset = QByteArray(decBegin, decCursor - decBegin);
1372
1373 const char *oldDecCursor = ++decCursor;
1374 // find the second single quote (we ignore the language tag):
1375 while (decCursor != decEnd) {
1376 if (*decCursor == '\'') {
1377 break;
1378 } else {
1379 decCursor++;
1380 }
1381 }
1382 if (decCursor == decEnd) {
1383 KMIME_WARN << "No language in extended-initial-value."
1384 "Trying to recover.";
1385 decCursor = oldDecCursor;
1386 } else {
1387 decCursor++;
1388 }
1389
1390 // decCursor now points to the start of the
1391 // "extended-other-values":
1392
1393 //
1394 // get the decoders:
1395 //
1396
1397 textcodec = QStringDecoder(charset.constData());
1398 if (!textcodec.isValid()) {
1399 KMIME_WARN_UNKNOWN(Charset, charset);
1400 }
1401 }
1402
1403 if (!rfc2231Codec) {
1404 rfc2231Codec = KCodecs::Codec::codecForName("x-kmime-rfc2231");
1405 assert(rfc2231Codec);
1406 }
1407
1408 if (!textcodec.isValid()) {
1409 value += QString::fromLatin1(decCursor, decEnd - decCursor);
1410 return;
1411 }
1412
1413 KCodecs::Decoder *dec = rfc2231Codec->makeDecoder();
1414 assert(dec);
1415
1416 //
1417 // do the decoding:
1418 //
1419
1420 QByteArray buffer;
1421 buffer.resize(rfc2231Codec->maxDecodedSizeFor(decEnd - decCursor));
1422 QByteArray::Iterator bit = buffer.begin();
1423 QByteArray::ConstIterator bend = buffer.end();
1424
1425 if (!dec->decode(decCursor, decEnd, bit, bend)) {
1426 KMIME_WARN << rfc2231Codec->name()
1427 << "codec lies about its maxDecodedSizeFor()"
1428 << Qt::endl
1429 << "result may be truncated";
1430 }
1431
1432 value += textcodec.decode(QByteArrayView(buffer.begin(), bit - buffer.begin()));
1433
1434 // qCDebug(KMIME_LOG) << "value now: \"" << value << "\"";
1435 // cleanup:
1436 delete dec;
1437}
1438
1439// known issues:
1440// - permutes rfc2231 continuations when the total number of parts
1441// exceeds 10 (other-sections then becomes *xy, i.e. two digits)
1442
1443bool parseParameterListWithCharset(const char *&scursor,
1444 const char *const send,
1445 Headers::ParameterMap &result,
1446 QByteArray &charset, bool isCRLF)
1447{
1448 // parse the list into raw attribute-value pairs:
1449 std::map<QByteArray, QStringOrQPair> rawParameterList;
1450 if (!parseRawParameterList(scursor, send, rawParameterList, isCRLF)) {
1451 return false;
1452 }
1453
1454 if (rawParameterList.empty()) {
1455 return true;
1456 }
1457
1458 // decode rfc 2231 continuations and alternate charset encoding:
1459
1460 // NOTE: this code assumes that what QMapIterator delivers is sorted
1461 // by the key!
1462
1463 KCodecs::Codec *rfc2231Codec = nullptr;
1464 QStringDecoder textcodec;
1465 QByteArray attribute;
1466 QString value;
1467 enum Mode {
1468 NoMode = 0x0, Continued = 0x1, Encoded = 0x2
1469 };
1470
1471 enum EncodingMode {
1472 NoEncoding,
1473 RFC2047,
1474 RFC2231
1475 };
1476
1477 for (auto &it : rawParameterList) {
1478 if (attribute.isNull() || !it.first.startsWith(attribute)) {
1479 //
1480 // new attribute:
1481 //
1482
1483 // store the last attribute/value pair in the result map now:
1484 if (!attribute.isNull()) {
1485 result[attribute] = value;
1486 }
1487 // and extract the information from the new raw attribute:
1488 value.clear();
1489 attribute = it.first;
1490 int mode = NoMode;
1491 EncodingMode encodingMode = NoEncoding;
1492
1493 // is the value rfc2331-encoded?
1494 if (attribute.endsWith('*')) {
1495 attribute.chop(1);
1496 mode |= Encoded;
1497 encodingMode = RFC2231;
1498 }
1499 // is the value rfc2047-encoded?
1500 if (!it.second.qstring.isNull() &&
1501 it.second.qstring.contains(QLatin1StringView("=?"))) {
1502 mode |= Encoded;
1503 encodingMode = RFC2047;
1504 }
1505 // is the value continued?
1506 if (attribute.endsWith(QLatin1StringView("*0"))) {
1507 attribute.chop(2);
1508 mode |= Continued;
1509 }
1510 //
1511 // decode if necessary:
1512 //
1513 if (mode & Encoded) {
1514 if (encodingMode == RFC2231) {
1515 decodeRFC2231Value(rfc2231Codec, textcodec,
1516 false, /* isn't continuation */
1517 value, it.second.view, charset);
1518 } else if (encodingMode == RFC2047) {
1519 value += KCodecs::decodeRFC2047String(it.second.qstring.toLatin1(), &charset);
1520 }
1521 } else {
1522 // not encoded.
1523 if (!it.second.view.isNull()) {
1524 value += QLatin1StringView(it.second.view);
1525 } else {
1526 value += it.second.qstring;
1527 }
1528 }
1529
1530 //
1531 // shortcut-processing when the value isn't encoded:
1532 //
1533
1534 if (!(mode & Continued)) {
1535 // save result already:
1536 result[attribute] = value;
1537 // force begin of a new attribute:
1538 attribute.clear();
1539 }
1540 } else { // it.key().startsWith( attribute )
1541 //
1542 // continuation
1543 //
1544
1545 // ignore the section and trust QMap to have sorted the keys:
1546 if (it.first.endsWith('*')) {
1547 // encoded
1548 decodeRFC2231Value(rfc2231Codec, textcodec,
1549 true, /* is continuation */
1550 value, it.second.view, charset);
1551 } else {
1552 // not encoded
1553 if (!it.second.view.isNull()) {
1554 value += QLatin1StringView(it.second.view);
1555 } else {
1556 value += it.second.qstring;
1557 }
1558 }
1559 }
1560 }
1561 // write last attr/value pair:
1562 if (!attribute.isNull()) {
1563 result[attribute] = value;
1564 }
1565
1566 return true;
1567}
1568
1569static const char stdDayNames[][4] = {
1570 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
1571};
1572static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
1573
1574static bool parseDayName(const char *&scursor, const char *const send)
1575{
1576 // check bounds:
1577 if (send - scursor < 3) {
1578 return false;
1579 }
1580
1581 for (int i = 0 ; i < stdDayNamesLen ; ++i) {
1582 if (qstrnicmp(scursor, stdDayNames[i], 3) == 0) {
1583 scursor += 3;
1584 // qCDebug(KMIME_LOG) << "found" << stdDayNames[i];
1585 return true;
1586 }
1587 }
1588
1589 return false;
1590}
1591
1592static const char stdMonthNames[][4] = {
1593 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1594 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
1595};
1596static const int stdMonthNamesLen =
1597 sizeof stdMonthNames / sizeof *stdMonthNames;
1598
1599static bool parseMonthName(const char *&scursor, const char *const send,
1600 int &result)
1601{
1602 // check bounds:
1603 if (send - scursor < 3) {
1604 return false;
1605 }
1606
1607 for (result = 0 ; result < stdMonthNamesLen ; ++result) {
1608 if (qstrnicmp(scursor, stdMonthNames[result], 3) == 0) {
1609 scursor += 3;
1610 return true;
1611 }
1612 }
1613
1614 // not found:
1615 return false;
1616}
1617
1618static const struct {
1619 const char tzName[5];
1620 int secsEastOfGMT;
1621} timeZones[] = {
1622 // rfc 822 timezones:
1623 { "GMT", 0 },
1624 { "UT", 0 },
1625 { "EDT", -4 * 3600 },
1626 { "EST", -5 * 3600 },
1627 { "MST", -5 * 3600 },
1628 { "CST", -6 * 3600 },
1629 { "MDT", -6 * 3600 },
1630 { "MST", -7 * 3600 },
1631 { "PDT", -7 * 3600 },
1632 { "PST", -8 * 3600 },
1633 // common, non-rfc-822 zones:
1634 { "CET", 1 * 3600 },
1635 { "MET", 1 * 3600 },
1636 { "UTC", 0 },
1637 { "CEST", 2 * 3600 },
1638 { "BST", 1 * 3600 },
1639 // rfc 822 military timezones:
1640 { "Z", 0 },
1641 { "A", -1 * 3600 },
1642 { "B", -2 * 3600 },
1643 { "C", -3 * 3600 },
1644 { "D", -4 * 3600 },
1645 { "E", -5 * 3600 },
1646 { "F", -6 * 3600 },
1647 { "G", -7 * 3600 },
1648 { "H", -8 * 3600 },
1649 { "I", -9 * 3600 },
1650 // J is not used!
1651 { "K", -10 * 3600 },
1652 { "L", -11 * 3600 },
1653 { "M", -12 * 3600 },
1654 { "N", 1 * 3600 },
1655 { "O", 2 * 3600 },
1656 { "P", 3 * 3600 },
1657 { "Q", 4 * 3600 },
1658 { "R", 5 * 3600 },
1659 { "S", 6 * 3600 },
1660 { "T", 7 * 3600 },
1661 { "U", 8 * 3600 },
1662 { "V", 9 * 3600 },
1663 { "W", 10 * 3600 },
1664 { "X", 11 * 3600 },
1665 { "Y", 12 * 3600 },
1666};
1667static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
1668
1669static bool parseAlphaNumericTimeZone(const char *&scursor,
1670 const char *const send,
1671 long int &secsEastOfGMT,
1672 bool &timeZoneKnown)
1673{
1674 // allow the timezone to be wrapped in quotes; bug 260761
1675 if (scursor < send && *scursor == '"') {
1676 scursor++;
1677
1678 if (scursor == send) {
1679 return false;
1680 }
1681 }
1682
1683 QByteArrayView maybeTimeZone;
1684 if (!parseToken(scursor, send, maybeTimeZone, ParseTokenNoFlag)) {
1685 return false;
1686 }
1687 for (int i = 0 ; i < timeZonesLen ; ++i) {
1688 if (maybeTimeZone.compare(timeZones[i].tzName, Qt::CaseInsensitive) == 0) {
1689 scursor += maybeTimeZone.size();
1690 secsEastOfGMT = timeZones[i].secsEastOfGMT;
1691 timeZoneKnown = true;
1692
1693 if (scursor < send && *scursor == '"') {
1694 scursor++;
1695 }
1696
1697 return true;
1698 }
1699 }
1700
1701 // don't choke just because we don't happen to know the time zone
1702 KMIME_WARN_UNKNOWN(time zone, maybeTimeZone);
1703 secsEastOfGMT = 0;
1704 timeZoneKnown = false;
1705 return true;
1706}
1707
1708// parse a number and return the number of digits parsed:
1709int parseDigits(const char *&scursor, const char *const send, int &result)
1710{
1711 result = 0;
1712 int digits = 0;
1713 for (; scursor != send && isdigit(*scursor) ; scursor++, digits++) {
1714 result *= 10;
1715 result += int(*scursor - '0');
1716 }
1717 return digits;
1718}
1719
1720static bool parseTimeOfDay(const char *&scursor, const char *const send,
1721 int &hour, int &min, int &sec, bool isCRLF = false)
1722{
1723 // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
1724
1725 //
1726 // 2DIGIT representing "hour":
1727 //
1728 if (!parseDigits(scursor, send, hour)) {
1729 return false;
1730 }
1731
1732 eatCFWS(scursor, send, isCRLF);
1733 if (scursor == send || *scursor != ':') {
1734 return false;
1735 }
1736 scursor++; // eat ':'
1737
1738 eatCFWS(scursor, send, isCRLF);
1739 if (scursor == send) {
1740 return false;
1741 }
1742
1743 //
1744 // 2DIGIT representing "minute":
1745 //
1746 if (!parseDigits(scursor, send, min)) {
1747 return false;
1748 }
1749
1750 eatCFWS(scursor, send, isCRLF);
1751 if (scursor == send) {
1752 return true; // seconds are optional
1753 }
1754
1755 //
1756 // let's see if we have a 2DIGIT representing "second":
1757 //
1758 if (*scursor == ':') {
1759 // yepp, there are seconds:
1760 scursor++; // eat ':'
1761 eatCFWS(scursor, send, isCRLF);
1762 if (scursor == send) {
1763 return false;
1764 }
1765
1766 if (!parseDigits(scursor, send, sec)) {
1767 return false;
1768 }
1769 } else {
1770 sec = 0;
1771 }
1772
1773 return true;
1774}
1775
1776bool parseTime(const char *&scursor, const char *send,
1777 int &hour, int &min, int &sec, long int &secsEastOfGMT,
1778 bool &timeZoneKnown, bool isCRLF)
1779{
1780 // time := time-of-day CFWS ( zone / obs-zone )
1781 //
1782 // obs-zone := "UT" / "GMT" /
1783 // "EST" / "EDT" / ; -0500 / -0400
1784 // "CST" / "CDT" / ; -0600 / -0500
1785 // "MST" / "MDT" / ; -0700 / -0600
1786 // "PST" / "PDT" / ; -0800 / -0700
1787 // "A"-"I" / "a"-"i" /
1788 // "K"-"Z" / "k"-"z"
1789
1790 eatCFWS(scursor, send, isCRLF);
1791 if (scursor == send) {
1792 return false;
1793 }
1794
1795 if (!parseTimeOfDay(scursor, send, hour, min, sec, isCRLF)) {
1796 return false;
1797 }
1798
1799 eatCFWS(scursor, send, isCRLF);
1800 // there might be no timezone but a year following
1801 if ((scursor == send) || isdigit(*scursor)) {
1802 timeZoneKnown = false;
1803 secsEastOfGMT = 0;
1804 return true; // allow missing timezone
1805 }
1806
1807 timeZoneKnown = true;
1808 if (*scursor == '+' || *scursor == '-') {
1809 // remember and eat '-'/'+':
1810 const char sign = *scursor++;
1811 // numerical timezone:
1812 int maybeTimeZone;
1813 const int tzDigits = parseDigits(scursor, send, maybeTimeZone);
1814 if (tzDigits != 4) {
1815 // Allow timezones in 02:00 format
1816 if (tzDigits == 2 && scursor != send && *scursor == ':') {
1817 scursor++;
1818 int maybeTimeZone2;
1819 if (parseDigits(scursor, send, maybeTimeZone2) != 2) {
1820 return false;
1821 }
1822 maybeTimeZone = maybeTimeZone * 100 + maybeTimeZone2;
1823 } else {
1824 return false;
1825 }
1826 }
1827 secsEastOfGMT = 60 * (maybeTimeZone / 100 * 60 + maybeTimeZone % 100);
1828 if (sign == '-') {
1829 secsEastOfGMT *= -1;
1830 if (secsEastOfGMT == 0) {
1831 timeZoneKnown = false; // -0000 means indetermined tz
1832 }
1833 }
1834 } else {
1835 // maybe alphanumeric timezone:
1836 if (!parseAlphaNumericTimeZone(scursor, send, secsEastOfGMT, timeZoneKnown)) {
1837 return false;
1838 }
1839 }
1840 return true;
1841}
1842
1843bool parseQDateTime(const char *&scursor, const char *const send,
1844 QDateTime &result, bool isCRLF)
1845{
1846 eatCFWS(scursor, send, isCRLF);
1847 if (scursor == send) {
1848 return false;
1849 }
1850 // In qt6 yy == 1900 ! => for sure we use 2000 here.
1851 result = QDateTime::fromString(QString::fromLatin1(scursor, 17), QStringLiteral("dd/MM/yy HH:mm:ss"));
1852 QDate resultDate = result.date();
1853 resultDate.setDate(resultDate.year() + 100, resultDate.month(), resultDate.day());
1854 result.setDate(resultDate);
1855 return result.isValid();
1856}
1857
1858bool parseDateTime(const char *&scursor, const char *const send,
1859 QDateTime &result, bool isCRLF)
1860{
1861 // Parsing date-time; strict mode:
1862 //
1863 // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday
1864 // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
1865 // time
1866 //
1867 // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
1868 // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
1869 // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
1870
1871 result = QDateTime();
1872
1873 eatCFWS(scursor, send, isCRLF);
1874 if (scursor == send) {
1875 return false;
1876 }
1877
1878 //
1879 // let's see if there's a day-of-week:
1880 //
1881 if (parseDayName(scursor, send)) {
1882 eatCFWS(scursor, send, isCRLF);
1883 if (scursor == send) {
1884 return false;
1885 }
1886 // day-name should be followed by ',' but we treat it as optional:
1887 if (*scursor == ',') {
1888 scursor++; // eat ','
1889 eatCFWS(scursor, send, isCRLF);
1890 }
1891 }
1892
1893 int maybeMonth = -1;
1894 bool asctimeFormat = false;
1895
1896 // ANSI-C asctime() format is: Wed Jun 30 21:49:08 1993
1897 if (!isdigit(*scursor) && parseMonthName(scursor, send, maybeMonth)) {
1898 asctimeFormat = true;
1899 eatCFWS(scursor, send, isCRLF);
1900 }
1901
1902 //
1903 // 1*2DIGIT representing "day" (of month):
1904 //
1905 int maybeDay;
1906 if (!parseDigits(scursor, send, maybeDay)) {
1907 return false;
1908 }
1909
1910 eatCFWS(scursor, send, isCRLF);
1911 if (scursor == send) {
1912 return false;
1913 }
1914
1915 // ignore ","; bug 54098
1916 if (*scursor == ',') {
1917 scursor++;
1918 }
1919
1920 //
1921 // month-name:
1922 //
1923 if (!asctimeFormat && !parseMonthName(scursor, send, maybeMonth)) {
1924 return false;
1925 }
1926 if (scursor == send) {
1927 return false;
1928 }
1929 assert(maybeMonth >= 0); assert(maybeMonth <= 11);
1930 ++maybeMonth; // 0-11 -> 1-12
1931
1932 eatCFWS(scursor, send, isCRLF);
1933 if (scursor == send) {
1934 return false;
1935 }
1936
1937 // check for "year HH:MM:SS" or only "HH:MM:SS" (or "H:MM:SS")
1938 bool timeAfterYear = true;
1939 if ((send - scursor > 3) && ((scursor[1] == ':') || (scursor[2] == ':'))) {
1940 timeAfterYear = false; // first read time, then year
1941 }
1942
1943 //
1944 // 2*DIGIT representing "year":
1945 //
1946 int maybeYear = 0;
1947
1948 if (timeAfterYear && !parseDigits(scursor, send, maybeYear)) {
1949 return false;
1950 }
1951
1952 eatCFWS(scursor, send, isCRLF);
1953 int maybeHour;
1954 int maybeMinute;
1955 int maybeSecond;
1956 long int secsEastOfGMT = 0;
1957 QDate maybeDate;
1958 QTime maybeTime;
1959 if (scursor != send) {
1960 //
1961 // time
1962 //
1963 bool timeZoneKnown = true;
1964
1965 if (!parseTime(scursor, send,
1966 maybeHour, maybeMinute, maybeSecond,
1967 secsEastOfGMT, timeZoneKnown, isCRLF)) {
1968 return false;
1969 }
1970
1971 // in asctime() the year follows the time
1972 if (!timeAfterYear) {
1973 eatCFWS(scursor, send, isCRLF);
1974 if (scursor == send) {
1975 return false;
1976 }
1977
1978 if (!parseDigits(scursor, send, maybeYear)) {
1979 return false;
1980 }
1981 }
1982
1983 // RFC 2822 4.3 processing:
1984 if (maybeYear < 50) {
1985 maybeYear += 2000;
1986 } else if (maybeYear < 1000) {
1987 maybeYear += 1900;
1988 }
1989 // else keep as is
1990 if (maybeYear < 1900) {
1991 return false; // rfc2822, 3.3
1992 }
1993
1994 maybeDate = QDate(maybeYear, maybeMonth, maybeDay);
1995 maybeTime = QTime(maybeHour, maybeMinute, maybeSecond);
1996
1997 if (!maybeDate.isValid() || !maybeTime.isValid()) {
1998 return false;
1999 }
2000 } else {
2001 maybeDate = QDate(maybeYear, maybeMonth, maybeDay);
2002 maybeTime = QTime(0, 0, 0);
2003 }
2004
2005 result = QDateTime(maybeDate, maybeTime, QTimeZone::fromSecondsAheadOfUtc(secsEastOfGMT));
2006 if (!result.isValid()) {
2007 return false;
2008 }
2009 return true;
2010}
2011
2012namespace {
2013
2014Headers::Base *extractHeader(QByteArrayView head, const qsizetype headerStart, qsizetype &endOfFieldBody)
2015{
2016 Headers::Base *header = {};
2017
2018 auto startOfFieldBody = head.indexOf(':', headerStart);
2019 if (startOfFieldBody < 0) {
2020 return nullptr;
2021 }
2022
2023 const char *rawType = head.constData() + headerStart;
2024 const size_t rawTypeLen = startOfFieldBody - headerStart;
2025
2026 startOfFieldBody++; //skip the ':'
2027 if (startOfFieldBody < head.size() - 1 && head[startOfFieldBody] == ' ') { // skip the space after the ':', if there's any
2028 startOfFieldBody++;
2029 }
2030
2031 bool folded = false;
2032 endOfFieldBody = findHeaderLineEnd(head, startOfFieldBody, &folded);
2033
2034 // We might get an invalid mail without a field name, don't crash on that.
2035 if (rawTypeLen > 0) {
2036 header = HeaderFactory::createHeader(rawType, rawTypeLen);
2037 }
2038 if (!header) {
2039 //qCWarning(KMIME_LOG)() << "Returning Generic header of type" << rawType;
2040 header = new Headers::Generic(rawType, rawTypeLen);
2041 }
2042 if (folded) {
2043 const auto unfoldedBody = unfoldHeader(head.constData() + startOfFieldBody, endOfFieldBody - startOfFieldBody);
2044 header->from7BitString(unfoldedBody);
2045 } else {
2046 header->from7BitString(QByteArrayView(head.constData() + startOfFieldBody, endOfFieldBody - startOfFieldBody));
2047 }
2048
2049 return header;
2050}
2051
2052}
2053
2054std::unique_ptr<KMime::Headers::Base> parseNextHeader(QByteArrayView &head)
2055{
2056 qsizetype endOfFieldBody = 0;
2057 std::unique_ptr<KMime::Headers::Base> header(extractHeader(head, 0, endOfFieldBody));
2058 if (header) {
2059 head = head.mid(endOfFieldBody + 1);
2060 } else {
2061 head = {};
2062 }
2063
2064 return header;
2065}
2066
2067void extractHeaderAndBody(const QByteArray &content, QByteArray &header, QByteArray &body)
2068{
2069 header.clear();
2070 body.clear();
2071
2072 // empty header
2073 if (content.startsWith('\n')) {
2074 body = content.right(content.length() - 1);
2075 return;
2076 }
2077
2078 auto pos = content.indexOf("\n\n", 0);
2079 if (pos > -1) {
2080 header = content.left(++pos); //header *must* end with "\n" !!
2081 body = content.mid(pos + 1);
2082 if (body.startsWith("\n")) {
2083 body = "\n" + body;
2084 }
2085 } else {
2086 header = content;
2087 }
2088}
2089
2090QList<Headers::Base *> parseHeaders(const QByteArray &head) {
2092
2093 qsizetype cursor = 0;
2094 while (cursor < head.size()) {
2095 const auto headerStart = cursor;
2096 qsizetype endOfFieldBody;
2097 if (auto header = extractHeader(head, headerStart, endOfFieldBody)) {
2098 ret << header;
2099 cursor = endOfFieldBody + 1;
2100 } else {
2101 break;
2102 }
2103 }
2104
2105 return ret;
2106}
2107
2108} // namespace HeaderParsing
2109
2110} // namespace KMime
static Codec * codecForName(QByteArrayView name)
virtual Decoder * makeDecoder(NewlineType newline=NewlineLF) const=0
virtual const char * name() const=0
virtual qsizetype maxDecodedSizeFor(qsizetype insize, NewlineType newline=NewlineLF) const=0
Baseclass of all header-classes.
Definition headers.h:97
virtual void from7BitString(QByteArrayView s)=0
Parses the given string.
Represents an arbitrary header, that can contain any header-field.
Definition headers.h:1205
Represents an (email address, display name) pair according RFC 2822, section 3.4.
Definition types.h:38
void setName(const QString &name)
Sets the name.
Definition types.cpp:132
void setAddress(const AddrSpec &addr)
Sets the email address.
Definition types.cpp:114
Q_SCRIPTABLE Q_NOREPLY void start()
This file is part of the API for handling MIME data and defines the various header classes:
KCODECS_EXPORT QString decodeRFC2047String(QByteArrayView src, QByteArray *usedCS, const QByteArray &defaultCS=QByteArray(), CharsetOption option=NoOption)
iterator begin()
void chop(qsizetype n)
void clear()
const char * constData() const const
char * data()
iterator end()
bool endsWith(QByteArrayView bv) const const
QByteArray first(qsizetype n) const const
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
bool isEmpty() const const
bool isNull() const const
QByteArray left(qsizetype len) const const
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
void resize(qsizetype newSize, char c)
QByteArray right(qsizetype len) const const
qsizetype size() const const
bool startsWith(QByteArrayView bv) const const
QByteArray toLower() const const
QByteArrayView mid(qsizetype start, qsizetype length) const const
void chop(qsizetype length)
int compare(QByteArrayView bv, Qt::CaseSensitivity cs) const const
const_pointer constData() const const
const_pointer data() const const
bool endsWith(QByteArrayView bv) const const
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
qsizetype size() const const
QByteArray toByteArray() const const
int day() const const
bool isValid(int year, int month, int day)
int month() const const
bool setDate(int year, int month, int day)
int year() const const
QDate date() const const
QDateTime fromString(QStringView string, QStringView format, QCalendar cal)
bool isValid() const const
void setDate(QDate date)
void append(QList< T > &&value)
void clear()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromLatin1(QByteArrayView str)
bool isNull() const const
QString mid(qsizetype position, qsizetype n) const const
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
bool isValid() const const
EncodedData< QByteArrayView > decode(QByteArrayView ba)
CaseInsensitive
QTextStream & dec(QTextStream &stream)
QTextStream & endl(QTextStream &stream)
bool isValid(int h, int m, int s, int ms)
QTimeZone fromSecondsAheadOfUtc(int offset)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Jul 26 2024 11:51:33 by doxygen 1.11.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.