KMime

kmime_header_parsing.cpp
1/* -*- c++ -*-
2 kmime_header_parsing.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org>
6
7 SPDX-License-Identifier: LGPL-2.0-or-later
8*/
9
10#include "kmime_header_parsing.h"
11
12#include "kmime_headerfactory_p.h"
13#include "kmime_headers.h"
14#include "kmime_util.h"
15#include "kmime_util_p.h"
16#include "kmime_codecs_p.h"
17#include "kmime_dateformatter.h"
18#include "kmime_debug.h"
19#include "kmime_warning_p.h"
20
21#include <KCodecs>
22
23#include <QMap>
24#include <QStringDecoder>
25#include <QTimeZone>
26
27#include <cassert>
28#include <cctype> // for isdigit
29
30using namespace KMime;
31using namespace KMime::Types;
32
33namespace KMime
34{
35
36 namespace Types
37 {
38 // Optimization to avoid allocating QStrings when the value isn't encoded
39 struct KMIME_EXPORT QStringOrQPair {
40 QStringOrQPair() : qstring(), qpair(nullptr, 0) {}
41 QString qstring;
42 QPair<const char *, int> qpair;
43 };
44 } // namespace Types
45
46namespace HeaderParsing
47{
48
49// parse the encoded-word (scursor points to after the initial '=')
50bool parseEncodedWord(const char *&scursor, const char *const send,
51 QString &result, QByteArray &language,
52 QByteArray &usedCS, const QByteArray &defaultCS)
53{
54 // make sure the caller already did a bit of the work.
55 assert(*(scursor - 1) == '=');
56
57 //
58 // STEP 1:
59 // scan for the charset/language portion of the encoded-word
60 //
61
62 char ch = *scursor++;
63
64 if (ch != '?') {
65 // qCDebug(KMIME_LOG) << "first";
66 //KMIME_WARN_PREMATURE_END_OF( EncodedWord );
67 return false;
68 }
69
70 // remember start of charset (ie. just after the initial "=?") and
71 // language (just after the first '*') fields:
72 const char *charsetStart = scursor;
73 const char *languageStart = nullptr;
74
75 // find delimiting '?' (and the '*' separating charset and language
76 // tags, if any):
77 for (; scursor != send ; scursor++) {
78 if (*scursor == '?') {
79 break;
80 } else if (*scursor == '*' && languageStart == nullptr) {
81 languageStart = scursor + 1;
82 }
83 }
84
85 // not found? can't be an encoded-word!
86 if (scursor == send || *scursor != '?') {
87 // qCDebug(KMIME_LOG) << "second";
88 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
89 return false;
90 }
91
92 // extract the language information, if any (if languageStart is 0,
93 // language will be null, too):
94 QByteArray maybeLanguage(languageStart, scursor - languageStart);
95 // extract charset information (keep in mind: the size given to the
96 // ctor is one off due to the \0 terminator):
97 QByteArray maybeCharset(charsetStart,
98 (languageStart ? languageStart - 1 : scursor) - charsetStart);
99
100 //
101 // STEP 2:
102 // scan for the encoding portion of the encoded-word
103 //
104
105 // remember start of encoding (just _after_ the second '?'):
106 scursor++;
107 const char *encodingStart = scursor;
108
109 // find next '?' (ending the encoding tag):
110 for (; scursor != send ; scursor++) {
111 if (*scursor == '?') {
112 break;
113 }
114 }
115
116 // not found? Can't be an encoded-word!
117 if (scursor == send || *scursor != '?') {
118 // qCDebug(KMIME_LOG) << "third";
119 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
120 return false;
121 }
122
123 // extract the encoding information:
124 QByteArray maybeEncoding(encodingStart, scursor - encodingStart);
125
126 // qCDebug(KMIME_LOG) << "parseEncodedWord: found charset == \"" << maybeCharset
127 // << "\"; language == \"" << maybeLanguage
128 // << "\"; encoding == \"" << maybeEncoding << "\"";
129
130 //
131 // STEP 3:
132 // scan for encoded-text portion of encoded-word
133 //
134
135 // remember start of encoded-text (just after the third '?'):
136 scursor++;
137 const char *encodedTextStart = scursor;
138
139 // find the '?=' sequence (ending the encoded-text):
140 for (; scursor != send ; scursor++) {
141 if (*scursor == '?') {
142 if (scursor + 1 != send) {
143 if (*(scursor + 1) != '=') { // We expect a '=' after the '?', but we got something else; ignore
144 KMIME_WARN << "Stray '?' in q-encoded word, ignoring this.";
145 continue;
146 } else { // yep, found a '?=' sequence
147 scursor += 2;
148 break;
149 }
150 } else { // The '?' is the last char, but we need a '=' after it!
151 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
152 return false;
153 }
154 }
155 }
156
157 if (*(scursor - 2) != '?' || *(scursor - 1) != '=' ||
158 scursor < encodedTextStart + 2) {
159 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
160 return false;
161 }
162
163 // set end sentinel for encoded-text:
164 const char *const encodedTextEnd = scursor - 2;
165
166 //
167 // STEP 4:
168 // setup decoders for the transfer encoding and the charset
169 //
170
171 // try if there's a codec for the encoding found:
172 KCodecs::Codec *codec = KCodecs::Codec::codecForName(maybeEncoding);
173 if (!codec) {
174 KMIME_WARN_UNKNOWN(Encoding, maybeEncoding);
175 return false;
176 }
177
178 // get an instance of a corresponding decoder:
179 KCodecs::Decoder *dec = codec->makeDecoder();
180 assert(dec);
181
182 // try if there's a (text)codec for the charset found:
183 QStringDecoder textCodec;
184 if (maybeCharset.isEmpty()) {
185 textCodec = QStringDecoder(defaultCS.constData());
186 if (!textCodec.isValid()) {
188 }
189 usedCS = cachedCharset(defaultCS);
190 } else {
191 textCodec = QStringDecoder(maybeCharset.constData());
192 if (textCodec.isValid()) { //no suitable codec found => use default charset
193 usedCS = cachedCharset(defaultCS);
194 } else {
196 usedCS = cachedCharset(maybeCharset);
197 }
198 }
199
200 if (!textCodec.isValid()) {
201 KMIME_WARN_UNKNOWN(Charset, maybeCharset);
202 delete dec;
203 return false;
204 };
205
206 // qCDebug(KMIME_LOG) << "mimeName(): \"" << textCodec->name() << "\"";
207
208 // allocate a temporary buffer to store the 8bit text:
209 int encodedTextLength = encodedTextEnd - encodedTextStart;
210 QByteArray buffer;
211 buffer.resize(codec->maxDecodedSizeFor(encodedTextLength));
212 char *bbegin = buffer.data();
213 char *bend = bbegin + buffer.length();
214
215 //
216 // STEP 5:
217 // do the actual decoding
218 //
219
220 if (!dec->decode(encodedTextStart, encodedTextEnd, bbegin, bend)) {
221 KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor("
222 << encodedTextLength << ")\nresult may be truncated";
223 }
224
225 result = textCodec.decode(QByteArrayView(buffer.data(), bbegin - buffer.data()));
226
227 // qCDebug(KMIME_LOG) << "result now: \"" << result << "\"";
228 // cleanup:
229 delete dec;
230 language = maybeLanguage;
231
232 return true;
233}
234
235static inline void eatWhiteSpace(const char *&scursor, const char *const send)
236{
237 while (scursor != send &&
238 (*scursor == ' ' || *scursor == '\n' ||
239 *scursor == '\t' || *scursor == '\r')) {
240 scursor++;
241 }
242}
243
244bool parseAtom(const char*&scursor, const char *const send,
245 QByteArray &result, bool allow8Bit)
246{
247 QPair<const char *, int> maybeResult;
248
249 if (parseAtom(scursor, send, maybeResult, allow8Bit)) {
250 result = QByteArray(maybeResult.first, maybeResult.second);
251 return true;
252 }
253
254 return false;
255}
256
257bool parseAtom(const char*&scursor, const char *const send,
258 QPair<const char *, int> &result, bool allow8Bit)
259{
260 bool success = false;
261 const char *start = scursor;
262
263 while (scursor != send) {
264 signed char ch = *scursor++;
265 if (ch > 0 && isAText(ch)) {
266 // AText: OK
267 success = true;
268 } else if (allow8Bit && ch < 0) {
269 // 8bit char: not OK, but be tolerant.
270 KMIME_WARN_8BIT(ch);
271 success = true;
272 } else {
273 // CTL or special - marking the end of the atom:
274 // re-set sursor to point to the offending
275 // char and return:
276 scursor--;
277 break;
278 }
279 }
280 result.first = start;
281 result.second = scursor - start;
282 return success;
283}
284
285bool parseToken(const char*&scursor, const char *const send,
286 QByteArray &result, ParseTokenFlags flags)
287{
288 QPair<const char *, int> maybeResult;
289
290 if (parseToken(scursor, send, maybeResult, flags)) {
291 result = QByteArray(maybeResult.first, maybeResult.second);
292 return true;
293 }
294
295 return false;
296}
297
298bool parseToken(const char*&scursor, const char *const send,
299 QPair<const char *, int> &result, ParseTokenFlags flags)
300{
301 bool success = false;
302 const char *start = scursor;
303
304 while (scursor != send) {
305 signed char ch = *scursor++;
306 if (ch > 0 && isTText(ch)) {
307 // TText: OK
308 success = true;
309 } else if ((flags & ParseTokenAllow8Bit) && ch < 0) {
310 // 8bit char: not OK, but be tolerant.
311 KMIME_WARN_8BIT(ch);
312 success = true;
313 } else if ((flags & ParseTokenRelaxedTText) && ch == '/') {
314 success = true;
315 } else {
316 // CTL or tspecial - marking the end of the atom:
317 // re-set sursor to point to the offending
318 // char and return:
319 scursor--;
320 break;
321 }
322 }
323 result.first = start;
324 result.second = scursor - start;
325 return success;
326}
327
328#define READ_ch_OR_FAIL if ( scursor == send ) { \
329 KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
330 return false; \
331 } else { \
332 ch = *scursor++; \
333 }
334
335// known issues:
336//
337// - doesn't handle quoted CRLF
338
339bool parseGenericQuotedString(const char *&scursor, const char *const send,
340 QString &result, bool isCRLF,
341 const char openChar, const char closeChar)
342{
343 // We are in a quoted-string or domain-literal or comment and the
344 // cursor points to the first char after the openChar.
345 // We will apply unfolding and quoted-pair removal.
346 // We return when we either encounter the end or unescaped openChar
347 // or closeChar.
348 assert(*(scursor - 1) == openChar || *(scursor - 1) == closeChar);
349
350 while (scursor != send) {
351 char ch = *scursor++;
352
353 if (ch == closeChar || ch == openChar) {
354 // end of quoted-string or another opening char:
355 // let caller decide what to do.
356 return true;
357 }
358
359 switch (ch) {
360 case '\\': // quoted-pair
361 // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
362 READ_ch_OR_FAIL;
363 KMIME_WARN_IF_8BIT(ch);
364 result += QLatin1Char(ch);
365 break;
366 case '\r':
367 // ###
368 // The case of lonely '\r' is easy to solve, as they're
369 // not part of Unix Line-ending conventions.
370 // But I see a problem if we are given Unix-native
371 // line-ending-mails, where we cannot determine anymore
372 // whether a given '\n' was part of a CRLF or was occurring
373 // on it's own.
374 READ_ch_OR_FAIL;
375 if (ch != '\n') {
376 // CR on it's own...
377 KMIME_WARN_LONE(CR);
378 result += QLatin1Char('\r');
379 scursor--; // points to after the '\r' again
380 } else {
381 // CRLF encountered.
382 // lookahead: check for folding
383 READ_ch_OR_FAIL;
384 if (ch == ' ' || ch == '\t') {
385 // correct folding;
386 // position cursor behind the CRLF WSP (unfolding)
387 // and add the WSP to the result
388 result += QLatin1Char(ch);
389 } else {
390 // this is the "shouldn't happen"-case. There is a CRLF
391 // inside a quoted-string without it being part of FWS.
392 // We take it verbatim.
393 KMIME_WARN_NON_FOLDING(CRLF);
394 result += QLatin1StringView("\r\n");
395 // the cursor is decremented again, so's we need not
396 // duplicate the whole switch here. "ch" could've been
397 // everything (incl. openChar or closeChar).
398 scursor--;
399 }
400 }
401 break;
402 case '\n':
403 // Note: CRLF has been handled above already!
404 // ### LF needs special treatment, depending on whether isCRLF
405 // is true (we can be sure a lonely '\n' was meant this way) or
406 // false ('\n' alone could have meant LF or CRLF in the original
407 // message. This parser assumes CRLF iff the LF is followed by
408 // either WSP (folding) or NULL (premature end of quoted-string;
409 // Should be fixed, since NULL is allowed as per rfc822).
410 READ_ch_OR_FAIL;
411 if (!isCRLF && (ch == ' ' || ch == '\t')) {
412 // folding
413 // correct folding
414 result += QLatin1Char(ch);
415 } else {
416 // non-folding
417 KMIME_WARN_LONE(LF);
418 result += QLatin1Char('\n');
419 // pos is decremented, so's we need not duplicate the whole
420 // switch here. ch could've been everything (incl. <">, "\").
421 scursor--;
422 }
423 break;
424 case '=': {
425 // ### Work around broken clients that send encoded words in quoted-strings
426 // For example, older KMail versions.
427 if (scursor == send) {
428 break;
429 }
430
431 const char *oldscursor = scursor;
432 QString tmp;
433 QByteArray lang;
434 QByteArray charset;
435 if (*scursor++ == '?') {
436 --scursor;
437 if (parseEncodedWord(scursor, send, tmp, lang, charset)) {
438 result += tmp;
439 //qDebug() << " tmp " << tmp;
440 if (scursor == send) {
441 break;
442 } else if (*scursor++ == ' ') { //Workaround Bug 362650 thunderbird add space for each new line
443 if (scursor == send) {
444 --scursor;
445 break;
446 } else if (*scursor++ == '=') {
447 if (scursor == send) {
448 --scursor;
449 --scursor;
450 break;
451 } else if (*scursor++ == '?') {
452 --scursor;
453 --scursor;
454 break;
455 }
456 } else {
457 --scursor;
458 --scursor;
459 }
460 } else {
461 --scursor;
462 }
463
464 break;
465 } else {
466 scursor = oldscursor;
467 }
468 } else {
469 scursor = oldscursor;
470 }
471 // fall through
472 [[fallthrough]];
473 }
474 default:
475 KMIME_WARN_IF_8BIT(ch);
476 result += QLatin1Char(ch);
477 }
478 }
479
480 return false;
481}
482
483// known issues:
484//
485// - doesn't handle encoded-word inside comments.
486
487bool parseComment(const char *&scursor, const char *const send,
488 QString &result, bool isCRLF, bool reallySave)
489{
490 int commentNestingDepth = 1;
491 const char *afterLastClosingParenPos = nullptr;
492 QString maybeCmnt;
493 const char *oldscursor = scursor;
494
495 assert(*(scursor - 1) == '(');
496
497 while (commentNestingDepth) {
498 QString cmntPart;
499 if (parseGenericQuotedString(scursor, send, cmntPart, isCRLF, '(', ')')) {
500 assert(*(scursor - 1) == ')' || *(scursor - 1) == '(');
501 // see the kdoc for above function for the possible conditions
502 // we have to check:
503 switch (*(scursor - 1)) {
504 case ')':
505 if (reallySave) {
506 // add the chunk that's now surely inside the comment.
507 result += maybeCmnt;
508 result += cmntPart;
509 if (commentNestingDepth > 1) {
510 // don't add the outermost ')'...
511 result += QLatin1Char(')');
512 }
513 maybeCmnt.clear();
514 }
515 afterLastClosingParenPos = scursor;
516 --commentNestingDepth;
517 break;
518 case '(':
519 if (reallySave) {
520 // don't add to "result" yet, because we might find that we
521 // are already outside the (broken) comment...
522 maybeCmnt += cmntPart;
523 maybeCmnt += QLatin1Char('(');
524 }
525 ++commentNestingDepth;
526 break;
527 default: assert(0);
528 } // switch
529 } else {
530 // !parseGenericQuotedString, ie. premature end
531 if (afterLastClosingParenPos) {
532 scursor = afterLastClosingParenPos;
533 } else {
534 scursor = oldscursor;
535 }
536 return false;
537 }
538 } // while
539
540 return true;
541}
542
543// known issues: none.
544
545bool parsePhrase(const char *&scursor, const char *const send,
546 QString &result, bool isCRLF)
547{
548 enum {
549 None, Phrase, Atom, EncodedWord, QuotedString
550 } found = None;
551
552 QString tmp;
553 QByteArray lang;
554 QByteArray charset;
555 QPair<const char *, int> tmpAtom;
556 const char *successfullyParsed = nullptr;
557 // only used by the encoded-word branch
558 const char *oldscursor;
559 // used to suppress whitespace between adjacent encoded-words
560 // (rfc2047, 6.2):
561 bool lastWasEncodedWord = false;
562
563 while (scursor != send) {
564 char ch = *scursor++;
565 switch (ch) {
566 case '.': // broken, but allow for intorop's sake
567 if (found == None) {
568 --scursor;
569 return false;
570 } else {
571 if (scursor != send && (*scursor == ' ' || *scursor == '\t')) {
572 result += QLatin1StringView(". ");
573 } else {
574 result += QLatin1Char('.');
575 }
576 successfullyParsed = scursor;
577 }
578 break;
579 case '"': // quoted-string
580 tmp.clear();
581 if (parseGenericQuotedString(scursor, send, tmp, isCRLF, '"', '"')) {
582 successfullyParsed = scursor;
583 assert(*(scursor - 1) == '"');
584 switch (found) {
585 case None:
586 found = QuotedString;
587 break;
588 case Phrase:
589 case Atom:
590 case EncodedWord:
591 case QuotedString:
592 found = Phrase;
593 result += QLatin1Char(' '); // rfc822, 3.4.4
594 break;
595 default:
596 assert(0);
597 }
598 lastWasEncodedWord = false;
599 result += tmp;
600 } else {
601 // premature end of quoted string.
602 // What to do? Return leading '"' as special? Return as quoted-string?
603 // We do the latter if we already found something, else signal failure.
604 if (found == None) {
605 return false;
606 } else {
607 result += QLatin1Char(' '); // rfc822, 3.4.4
608 result += tmp;
609 return true;
610 }
611 }
612 break;
613 case '(': // comment
614 // parse it, but ignore content:
615 tmp.clear();
616 if (parseComment(scursor, send, tmp, isCRLF,
617 false /*don't bother with the content*/)) {
618 successfullyParsed = scursor;
619 lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
620 } else {
621 if (found == None) {
622 return false;
623 } else {
624 scursor = successfullyParsed;
625 return true;
626 }
627 }
628 break;
629 case '=': // encoded-word
630 tmp.clear();
631 oldscursor = scursor;
632 lang.clear();
633 charset.clear();
634 if (parseEncodedWord(scursor, send, tmp, lang, charset)) {
635 successfullyParsed = scursor;
636 switch (found) {
637 case None:
638 found = EncodedWord;
639 break;
640 case Phrase:
641 case EncodedWord:
642 case Atom:
643 case QuotedString:
644 if (!lastWasEncodedWord) {
645 result += QLatin1Char(' '); // rfc822, 3.4.4
646 }
647 found = Phrase;
648 break;
649 default: assert(0);
650 }
651 lastWasEncodedWord = true;
652 result += tmp;
653 break;
654 } else {
655 // parse as atom:
656 scursor = oldscursor;
657 }
658 [[fallthrough]];
659 // fall though...
660
661 default: //atom
662 scursor--;
663 if (parseAtom(scursor, send, tmpAtom, true /* allow 8bit */)) {
664 successfullyParsed = scursor;
665 switch (found) {
666 case None:
667 found = Atom;
668 break;
669 case Phrase:
670 case Atom:
671 case EncodedWord:
672 case QuotedString:
673 found = Phrase;
674 result += QLatin1Char(' '); // rfc822, 3.4.4
675 break;
676 default:
677 assert(0);
678 }
679 lastWasEncodedWord = false;
680 result += QLatin1StringView(tmpAtom.first, tmpAtom.second);
681 } else {
682 if (found == None) {
683 return false;
684 } else {
685 scursor = successfullyParsed;
686 return true;
687 }
688 }
689 }
690 eatWhiteSpace(scursor, send);
691 }
692
693 return found != None;
694}
695
696bool parseDotAtom(const char *&scursor, const char *const send,
697 QByteArray &result, bool isCRLF)
698{
699 eatCFWS(scursor, send, isCRLF);
700
701 // always points to just after the last atom parsed:
702 const char *successfullyParsed;
703
704 QByteArray tmp;
705 if (!parseAtom(scursor, send, tmp, false /* no 8bit */)) {
706 return false;
707 }
708 result += tmp;
709 successfullyParsed = scursor;
710
711 while (scursor != send) {
712
713 // end of header or no '.' -> return
714 if (scursor == send || *scursor != '.') {
715 return true;
716 }
717 scursor++; // eat '.'
718
719 if (scursor == send || !isAText(*scursor)) {
720 // end of header or no AText, but this time following a '.'!:
721 // reset cursor to just after last successfully parsed char and
722 // return:
723 scursor = successfullyParsed;
724 return true;
725 }
726
727 // try to parse the next atom:
728 QByteArray maybeAtom;
729 if (!parseAtom(scursor, send, maybeAtom, false /*no 8bit*/)) {
730 scursor = successfullyParsed;
731 return true;
732 }
733
734 result += '.';
735 result += maybeAtom;
736 successfullyParsed = scursor;
737 }
738
739 scursor = successfullyParsed;
740 return true;
741}
742
743void eatCFWS(const char *&scursor, const char *const send, bool isCRLF)
744{
745 QString dummy;
746
747 while (scursor != send) {
748 const char *oldscursor = scursor;
749
750 char ch = *scursor++;
751
752 switch (ch) {
753 case ' ':
754 case '\t': // whitespace
755 case '\r':
756 case '\n': // folding
757 continue;
758
759 case '(': // comment
760 if (parseComment(scursor, send, dummy, isCRLF, false /*don't save*/)) {
761 continue;
762 }
763 scursor = oldscursor;
764 return;
765
766 default:
767 scursor = oldscursor;
768 return;
769 }
770 }
771}
772
773bool parseDomain(const char *&scursor, const char *const send,
774 QString &result, bool isCRLF)
775{
776 eatCFWS(scursor, send, isCRLF);
777 if (scursor == send) {
778 return false;
779 }
780
781 // domain := dot-atom / domain-literal / atom *("." atom)
782 //
783 // equivalent to:
784 // domain = dot-atom / domain-literal,
785 // since parseDotAtom does allow CFWS between atoms and dots
786
787 if (*scursor == '[') {
788 // domain-literal:
789 QString maybeDomainLiteral;
790 // eat '[':
791 scursor++;
792 while (parseGenericQuotedString(scursor, send, maybeDomainLiteral,
793 isCRLF, '[', ']')) {
794 if (scursor == send) {
795 // end of header: check for closing ']':
796 if (*(scursor - 1) == ']') {
797 // OK, last char was ']':
798 result = maybeDomainLiteral;
799 return true;
800 } else {
801 // not OK, domain-literal wasn't closed:
802 return false;
803 }
804 }
805 // we hit openChar in parseGenericQuotedString.
806 // include it in maybeDomainLiteral and keep on parsing:
807 if (*(scursor - 1) == '[') {
808 maybeDomainLiteral += QLatin1Char('[');
809 continue;
810 }
811 // OK, real end of domain-literal:
812 result = maybeDomainLiteral;
813 return true;
814 }
815 } else {
816 // dot-atom:
817 QByteArray maybeDotAtom;
818 if (parseDotAtom(scursor, send, maybeDotAtom, isCRLF)) {
819 // Domain may end with '.', if so preserve it'
820 if (scursor != send && *scursor == '.') {
821 maybeDotAtom += '.';
822 scursor++;
823 }
824 result = QString::fromLatin1(maybeDotAtom);
825 return true;
826 }
827 }
828 return false;
829}
830
831bool parseObsRoute(const char *&scursor, const char *const send,
832 QStringList &result, bool isCRLF, bool save)
833{
834 while (scursor != send) {
835 eatCFWS(scursor, send, isCRLF);
836 if (scursor == send) {
837 return false;
838 }
839
840 // empty entry:
841 if (*scursor == ',') {
842 scursor++;
843 if (save) {
844 result.append(QString());
845 }
846 continue;
847 }
848
849 // empty entry ending the list:
850 if (*scursor == ':') {
851 scursor++;
852 if (save) {
853 result.append(QString());
854 }
855 return true;
856 }
857
858 // each non-empty entry must begin with '@':
859 if (*scursor != '@') {
860 return false;
861 } else {
862 scursor++;
863 }
864
865 QString maybeDomain;
866 if (!parseDomain(scursor, send, maybeDomain, isCRLF)) {
867 return false;
868 }
869 if (save) {
870 result.append(maybeDomain);
871 }
872
873 // eat the following (optional) comma:
874 eatCFWS(scursor, send, isCRLF);
875 if (scursor == send) {
876 return false;
877 }
878 if (*scursor == ':') {
879 scursor++;
880 return true;
881 }
882 if (*scursor == ',') {
883 scursor++;
884 }
885 }
886
887 return false;
888}
889
890bool parseAddrSpec(const char *&scursor, const char *const send,
891 AddrSpec &result, bool isCRLF)
892{
893 //
894 // STEP 1:
895 // local-part := dot-atom / quoted-string / word *("." word)
896 //
897 // this is equivalent to:
898 // local-part := word *("." word)
899
900 QString maybeLocalPart;
901 QString tmp;
902 QPair<const char *, int> tmpAtom;
903
904 while (scursor != send) {
905 // first, eat any whitespace
906 eatCFWS(scursor, send, isCRLF);
907
908 char ch = *scursor++;
909 switch (ch) {
910 case '.': // dot
911 maybeLocalPart += QLatin1Char('.');
912 break;
913
914 case '@':
915 goto SAW_AT_SIGN;
916 break;
917
918 case '"': // quoted-string
919 tmp.clear();
920 if (parseGenericQuotedString(scursor, send, tmp, isCRLF, '"', '"')) {
921 maybeLocalPart += tmp;
922 } else {
923 return false;
924 }
925 break;
926
927 default: // atom
928 scursor--; // re-set scursor to point to ch again
929 if (parseAtom(scursor, send, tmpAtom, false /* no 8bit */)) {
930 maybeLocalPart +=
931 QLatin1StringView(tmpAtom.first, tmpAtom.second);
932 } else {
933 return false; // parseAtom can only fail if the first char is non-atext.
934 }
935 break;
936 }
937 }
938
939 return false;
940
941 //
942 // STEP 2:
943 // domain
944 //
945
946SAW_AT_SIGN:
947
948 assert(*(scursor - 1) == '@');
949
950 QString maybeDomain;
951 if (!parseDomain(scursor, send, maybeDomain, isCRLF)) {
952 return false;
953 }
954
955 result.localPart = maybeLocalPart;
956 result.domain = maybeDomain;
957
958 return true;
959}
960
961bool parseAngleAddr(const char *&scursor, const char *const send,
962 AddrSpec &result, bool isCRLF)
963{
964 // first, we need an opening angle bracket:
965 eatCFWS(scursor, send, isCRLF);
966 if (scursor == send || *scursor != '<') {
967 return false;
968 }
969 scursor++; // eat '<'
970
971 eatCFWS(scursor, send, isCRLF);
972 if (scursor == send) {
973 return false;
974 }
975
976 if (*scursor == '@' || *scursor == ',') {
977 // obs-route: parse, but ignore:
978 KMIME_WARN << "obsolete source route found! ignoring.";
979 QStringList dummy;
980 if (!parseObsRoute(scursor, send, dummy,
981 isCRLF, false /* don't save */)) {
982 return false;
983 }
984 // angle-addr isn't complete until after the '>':
985 if (scursor == send) {
986 return false;
987 }
988 }
989
990 // parse addr-spec:
991 AddrSpec maybeAddrSpec;
992 if (!parseAddrSpec(scursor, send, maybeAddrSpec, isCRLF)) {
993 return false;
994 }
995
996 eatCFWS(scursor, send, isCRLF);
997 if (scursor == send || *scursor != '>') {
998 return false;
999 }
1000 scursor++;
1001
1002 result = maybeAddrSpec;
1003 return true;
1004
1005}
1006
1007static QString stripQuotes(const QString &input)
1008{
1009 const QLatin1Char quotes('"');
1010 if (input.startsWith(quotes) && input.endsWith(quotes)) {
1011 QString stripped(input.mid(1, input.size() - 2));
1012 return stripped;
1013 } else {
1014 return input;
1015 }
1016}
1017
1018bool parseMailbox(const char *&scursor, const char *const send,
1019 Mailbox &result, bool isCRLF)
1020{
1021 eatCFWS(scursor, send, isCRLF);
1022 if (scursor == send) {
1023 return false;
1024 }
1025
1026 AddrSpec maybeAddrSpec;
1027 QString maybeDisplayName;
1028
1029 // first, try if it's a vanilla addr-spec:
1030 const char *oldscursor = scursor;
1031 if (parseAddrSpec(scursor, send, maybeAddrSpec, isCRLF)) {
1032 result.setAddress(maybeAddrSpec);
1033 // check for the obsolete form of display-name (as comment):
1034 eatWhiteSpace(scursor, send);
1035 if (scursor != send && *scursor == '(') {
1036 scursor++;
1037 if (!parseComment(scursor, send, maybeDisplayName, isCRLF, true /*keep*/)) {
1038 return false;
1039 }
1040 }
1041 result.setName(stripQuotes(maybeDisplayName));
1042 return true;
1043 }
1044 scursor = oldscursor;
1045
1046 // second, see if there's a display-name:
1047 if (!parsePhrase(scursor, send, maybeDisplayName, isCRLF)) {
1048 // failed: reset cursor, note absent display-name
1049 maybeDisplayName.clear();
1050 scursor = oldscursor;
1051 } else {
1052 // succeeded: eat CFWS
1053 eatCFWS(scursor, send, isCRLF);
1054 if (scursor == send) {
1055 return false;
1056 }
1057 }
1058
1059 // third, parse the angle-addr:
1060 if (!parseAngleAddr(scursor, send, maybeAddrSpec, isCRLF)) {
1061 return false;
1062 }
1063
1064 if (maybeDisplayName.isNull()) {
1065 // check for the obsolete form of display-name (as comment):
1066 eatWhiteSpace(scursor, send);
1067 if (scursor != send && *scursor == '(') {
1068 scursor++;
1069 if (!parseComment(scursor, send, maybeDisplayName, isCRLF, true /*keep*/)) {
1070 return false;
1071 }
1072 }
1073 }
1074
1075 result.setName(stripQuotes(maybeDisplayName));
1076 result.setAddress(maybeAddrSpec);
1077 return true;
1078}
1079
1080bool parseGroup(const char *&scursor, const char *const send,
1081 Address &result, bool isCRLF)
1082{
1083 // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
1084 //
1085 // equivalent to:
1086 // group := display-name ":" [ obs-mbox-list ] ";"
1087
1088 eatCFWS(scursor, send, isCRLF);
1089 if (scursor == send) {
1090 return false;
1091 }
1092
1093 // get display-name:
1094 QString maybeDisplayName;
1095 if (!parsePhrase(scursor, send, maybeDisplayName, isCRLF)) {
1096 return false;
1097 }
1098
1099 // get ":":
1100 eatCFWS(scursor, send, isCRLF);
1101 if (scursor == send || *scursor != ':') {
1102 return false;
1103 }
1104
1105 // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that
1106 // automatically calls removeBidiControlChars
1107 result.displayName = removeBidiControlChars(maybeDisplayName);
1108
1109 // get obs-mbox-list (may contain empty entries):
1110 scursor++;
1111 while (scursor != send) {
1112 eatCFWS(scursor, send, isCRLF);
1113 if (scursor == send) {
1114 return false;
1115 }
1116
1117 // empty entry:
1118 if (*scursor == ',') {
1119 scursor++;
1120 continue;
1121 }
1122
1123 // empty entry ending the list:
1124 if (*scursor == ';') {
1125 scursor++;
1126 return true;
1127 }
1128
1129 Mailbox maybeMailbox;
1130 if (!parseMailbox(scursor, send, maybeMailbox, isCRLF)) {
1131 return false;
1132 }
1133 result.mailboxList.append(maybeMailbox);
1134
1135 eatCFWS(scursor, send, isCRLF);
1136 // premature end:
1137 if (scursor == send) {
1138 return false;
1139 }
1140 // regular end of the list:
1141 if (*scursor == ';') {
1142 scursor++;
1143 return true;
1144 }
1145 // eat regular list entry separator:
1146 if (*scursor == ',') {
1147 scursor++;
1148 }
1149 }
1150 return false;
1151}
1152
1153bool parseAddress(const char *&scursor, const char *const send,
1154 Address &result, bool isCRLF)
1155{
1156 // address := mailbox / group
1157
1158 eatCFWS(scursor, send, isCRLF);
1159 if (scursor == send) {
1160 return false;
1161 }
1162
1163 // first try if it's a single mailbox:
1164 Mailbox maybeMailbox;
1165 const char *oldscursor = scursor;
1166 if (parseMailbox(scursor, send, maybeMailbox, isCRLF)) {
1167 // yes, it is:
1168 result.displayName.clear();
1169 result.mailboxList.append(maybeMailbox);
1170 return true;
1171 }
1172 scursor = oldscursor;
1173
1174 Address maybeAddress;
1175
1176 // no, it's not a single mailbox. Try if it's a group:
1177 if (!parseGroup(scursor, send, maybeAddress, isCRLF)) {
1178 return false;
1179 }
1180
1181 result = maybeAddress;
1182 return true;
1183}
1184
1185bool parseAddressList(const char *&scursor, const char *const send,
1186 AddressList &result, bool isCRLF)
1187{
1188 while (scursor != send) {
1189 eatCFWS(scursor, send, isCRLF);
1190 // end of header: this is OK.
1191 if (scursor == send) {
1192 return true;
1193 }
1194 // empty entry: ignore:
1195 if (*scursor == ',') {
1196 scursor++;
1197 continue;
1198 }
1199 // broken clients might use ';' as list delimiter, accept that as well
1200 if (*scursor == ';') {
1201 scursor++;
1202 continue;
1203 }
1204
1205 // parse one entry
1206 Address maybeAddress;
1207 if (!parseAddress(scursor, send, maybeAddress, isCRLF)) {
1208 return false;
1209 }
1210 result.append(maybeAddress);
1211
1212 eatCFWS(scursor, send, isCRLF);
1213 // end of header: this is OK.
1214 if (scursor == send) {
1215 return true;
1216 }
1217 // comma separating entries: eat it.
1218 if (*scursor == ',') {
1219 scursor++;
1220 }
1221 }
1222 return true;
1223}
1224
1225static bool parseParameter(const char *&scursor, const char *const send,
1226 QPair<QString, QStringOrQPair> &result, bool isCRLF)
1227{
1228 // parameter = regular-parameter / extended-parameter
1229 // regular-parameter = regular-parameter-name "=" value
1230 // extended-parameter =
1231 // value = token / quoted-string
1232 //
1233 // note that rfc2231 handling is out of the scope of this function.
1234 // Therefore we return the attribute as QByteArray and the value as
1235 // (start,length) tuple if we see that the value is encoded
1236 // (trailing asterisk), for parseParameterList to decode...
1237
1238 eatCFWS(scursor, send, isCRLF);
1239 if (scursor == send) {
1240 return false;
1241 }
1242
1243 //
1244 // parse the parameter name:
1245 //
1246 QByteArray tmpAttr;
1247 if (!parseToken(scursor, send, tmpAttr, ParseTokenNoFlag)) {
1248 return false;
1249 }
1250 // FIXME: we could use QMap<QByteArray, ...> in the API for parameters
1251 QString maybeAttribute = QString::fromLatin1(tmpAttr);
1252
1253 eatCFWS(scursor, send, isCRLF);
1254 // premature end: not OK (haven't seen '=' yet).
1255 if (scursor == send || *scursor != '=') {
1256 return false;
1257 }
1258 scursor++; // eat '='
1259
1260 eatCFWS(scursor, send, isCRLF);
1261 if (scursor == send) {
1262 // don't choke on attribute=, meaning the value was omitted:
1263 if (maybeAttribute.endsWith(QLatin1Char('*'))) {
1264 KMIME_WARN << "attribute ends with \"*\", but value is empty!"
1265 "Chopping away \"*\".";
1266 maybeAttribute.chop(1);
1267 }
1268 result = qMakePair(maybeAttribute.toLower(), QStringOrQPair());
1269 return true;
1270 }
1271
1272 const char *oldscursor = scursor;
1273
1274 //
1275 // parse the parameter value:
1276 //
1277 QStringOrQPair maybeValue;
1278 if (*scursor == '"') {
1279 // value is a quoted-string:
1280 scursor++;
1281 if (maybeAttribute.endsWith(QLatin1Char('*'))) {
1282 // attributes ending with "*" designate extended-parameters,
1283 // which cannot have quoted-strings as values. So we remove the
1284 // trailing "*" to not confuse upper layers.
1285 KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
1286 "Chopping away \"*\".";
1287 maybeAttribute.chop(1);
1288 }
1289
1290 if (!parseGenericQuotedString(scursor, send, maybeValue.qstring, isCRLF)) {
1291 scursor = oldscursor;
1292 result = qMakePair(maybeAttribute.toLower(), QStringOrQPair());
1293 return false; // this case needs further processing by upper layers!!
1294 }
1295 } else {
1296 // value is a token:
1297 if (!parseToken(scursor, send, maybeValue.qpair, ParseTokenRelaxedTText)) {
1298 scursor = oldscursor;
1299 result = qMakePair(maybeAttribute.toLower(), QStringOrQPair());
1300 return false; // this case needs further processing by upper layers!!
1301 }
1302 }
1303
1304 result = qMakePair(maybeAttribute.toLower(), maybeValue);
1305 return true;
1306}
1307
1308static bool parseRawParameterList(const char *&scursor, const char *const send,
1310 bool isCRLF)
1311{
1312 // we use parseParameter() consecutively to obtain a map of raw
1313 // attributes to raw values. "Raw" here means that we don't do
1314 // rfc2231 decoding and concatenation. This is left to
1315 // parseParameterList(), which will call this function.
1316 //
1317 // The main reason for making this chunk of code a separate
1318 // (private) method is that we can deal with broken parameters
1319 // _here_ and leave the rfc2231 handling solely to
1320 // parseParameterList(), which will still be enough work.
1321 while (scursor != send) {
1322 eatCFWS(scursor, send, isCRLF);
1323 // empty entry ending the list: OK.
1324 if (scursor == send) {
1325 return true;
1326 }
1327 // empty list entry: ignore.
1328 if (*scursor == ';') {
1329 scursor++;
1330 continue;
1331 }
1332 QPair<QString, QStringOrQPair> maybeParameter;
1333 if (!parseParameter(scursor, send, maybeParameter, isCRLF)) {
1334 // we need to do a bit of work if the attribute is not
1335 // NULL. These are the cases marked with "needs further
1336 // processing" in parseParameter(). Specifically, parsing of the
1337 // token or the quoted-string, which should represent the value,
1338 // failed. We take the easy way out and simply search for the
1339 // next ';' to start parsing again. (Another option would be to
1340 // take the text between '=' and ';' as value)
1341 if (maybeParameter.first.isNull()) {
1342 return false;
1343 }
1344 while (scursor != send) {
1345 if (*scursor++ == ';') {
1346 goto IS_SEMICOLON;
1347 }
1348 }
1349 // scursor == send case: end of list.
1350 return true;
1351 IS_SEMICOLON:
1352 // *scursor == ';' case: parse next entry.
1353 continue;
1354 }
1355 // successful parsing brings us here:
1356 result.insert(maybeParameter.first, maybeParameter.second);
1357
1358 eatCFWS(scursor, send, isCRLF);
1359 // end of header: ends list.
1360 if (scursor == send) {
1361 return true;
1362 }
1363 // regular separator: eat it.
1364 if (*scursor == ';') {
1365 scursor++;
1366 }
1367 }
1368 return true;
1369}
1370
1371static void decodeRFC2231Value(KCodecs::Codec *&rfc2231Codec,
1372 QStringDecoder &textcodec,
1373 bool isContinuation, QString &value,
1374 QPair<const char *, int> &source, QByteArray &charset)
1375{
1376 //
1377 // parse the raw value into (charset,language,text):
1378 //
1379
1380 const char *decBegin = source.first;
1381 const char *decCursor = decBegin;
1382 const char *decEnd = decCursor + source.second;
1383
1384 if (!isContinuation) {
1385 // find the first single quote
1386 while (decCursor != decEnd) {
1387 if (*decCursor == '\'') {
1388 break;
1389 } else {
1390 decCursor++;
1391 }
1392 }
1393
1394 if (decCursor == decEnd) {
1395 // there wasn't a single single quote at all!
1396 // take the whole value to be in latin-1:
1397 KMIME_WARN << "No charset in extended-initial-value."
1398 "Assuming \"iso-8859-1\".";
1399 value += QString::fromLatin1(decBegin, source.second);
1400 return;
1401 }
1402
1403 charset = QByteArray(decBegin, decCursor - decBegin);
1404
1405 const char *oldDecCursor = ++decCursor;
1406 // find the second single quote (we ignore the language tag):
1407 while (decCursor != decEnd) {
1408 if (*decCursor == '\'') {
1409 break;
1410 } else {
1411 decCursor++;
1412 }
1413 }
1414 if (decCursor == decEnd) {
1415 KMIME_WARN << "No language in extended-initial-value."
1416 "Trying to recover.";
1417 decCursor = oldDecCursor;
1418 } else {
1419 decCursor++;
1420 }
1421
1422 // decCursor now points to the start of the
1423 // "extended-other-values":
1424
1425 //
1426 // get the decoders:
1427 //
1428
1429 textcodec = QStringDecoder(charset.constData());
1430 if (!textcodec.isValid()) {
1431 KMIME_WARN_UNKNOWN(Charset, charset);
1432 }
1433 }
1434
1435 if (!rfc2231Codec) {
1436 rfc2231Codec = KCodecs::Codec::codecForName("x-kmime-rfc2231");
1437 assert(rfc2231Codec);
1438 }
1439
1440 if (!textcodec.isValid()) {
1441 value += QString::fromLatin1(decCursor, decEnd - decCursor);
1442 return;
1443 }
1444
1445 KCodecs::Decoder *dec = rfc2231Codec->makeDecoder();
1446 assert(dec);
1447
1448 //
1449 // do the decoding:
1450 //
1451
1452 QByteArray buffer;
1453 buffer.resize(rfc2231Codec->maxDecodedSizeFor(decEnd - decCursor));
1454 QByteArray::Iterator bit = buffer.begin();
1455 QByteArray::ConstIterator bend = buffer.end();
1456
1457 if (!dec->decode(decCursor, decEnd, bit, bend)) {
1458 KMIME_WARN << rfc2231Codec->name()
1459 << "codec lies about its maxDecodedSizeFor()"
1460 << Qt::endl
1461 << "result may be truncated";
1462 }
1463
1464 value += textcodec.decode(QByteArrayView(buffer.begin(), bit - buffer.begin()));
1465
1466 // qCDebug(KMIME_LOG) << "value now: \"" << value << "\"";
1467 // cleanup:
1468 delete dec;
1469}
1470
1471// known issues:
1472// - permutes rfc2231 continuations when the total number of parts
1473// exceeds 10 (other-sections then becomes *xy, ie. two digits)
1474
1475bool parseParameterListWithCharset(const char *&scursor,
1476 const char *const send,
1477 QMap<QString, QString> &result,
1478 QByteArray &charset, bool isCRLF)
1479{
1480// parse the list into raw attribute-value pairs:
1481 QMap<QString, QStringOrQPair> rawParameterList;
1482 if (!parseRawParameterList(scursor, send, rawParameterList, isCRLF)) {
1483 return false;
1484 }
1485
1486 if (rawParameterList.isEmpty()) {
1487 return true;
1488 }
1489
1490 // decode rfc 2231 continuations and alternate charset encoding:
1491
1492 // NOTE: this code assumes that what QMapIterator delivers is sorted
1493 // by the key!
1494
1495 KCodecs::Codec *rfc2231Codec = nullptr;
1496 QStringDecoder textcodec;
1497 QString attribute;
1498 QString value;
1499 enum Mode {
1500 NoMode = 0x0, Continued = 0x1, Encoded = 0x2
1501 };
1502
1503 enum EncodingMode {
1504 NoEncoding,
1505 RFC2047,
1506 RFC2231
1507 };
1508
1510 QMap<QString, QStringOrQPair>::Iterator end = rawParameterList.end();
1511
1512 for (it = rawParameterList.begin() ; it != end ; ++it) {
1513 if (attribute.isNull() || !it.key().startsWith(attribute)) {
1514 //
1515 // new attribute:
1516 //
1517
1518 // store the last attribute/value pair in the result map now:
1519 if (!attribute.isNull()) {
1520 result.insert(attribute, value);
1521 }
1522 // and extract the information from the new raw attribute:
1523 value.clear();
1524 attribute = it.key();
1525 int mode = NoMode;
1526 EncodingMode encodingMode = NoEncoding;
1527
1528 // is the value rfc2331-encoded?
1529 if (attribute.endsWith(QLatin1Char('*'))) {
1530 attribute.chop(1);
1531 mode |= Encoded;
1532 encodingMode = RFC2231;
1533 }
1534 // is the value rfc2047-encoded?
1535 if (!(*it).qstring.isNull() &&
1536 (*it).qstring.contains(QLatin1StringView("=?"))) {
1537 mode |= Encoded;
1538 encodingMode = RFC2047;
1539 }
1540 // is the value continued?
1541 if (attribute.endsWith(QLatin1StringView("*0"))) {
1542 attribute.chop(2);
1543 mode |= Continued;
1544 }
1545 //
1546 // decode if necessary:
1547 //
1548 if (mode & Encoded) {
1549 if (encodingMode == RFC2231) {
1550 decodeRFC2231Value(rfc2231Codec, textcodec,
1551 false, /* isn't continuation */
1552 value, (*it).qpair, charset);
1553 } else if (encodingMode == RFC2047) {
1554 value += KCodecs::decodeRFC2047String((*it).qstring.toLatin1(), &charset);
1555 }
1556 } else {
1557 // not encoded.
1558 if ((*it).qpair.first) {
1559 value += QString::fromLatin1((*it).qpair.first, (*it).qpair.second);
1560 } else {
1561 value += (*it).qstring;
1562 }
1563 }
1564
1565 //
1566 // shortcut-processing when the value isn't encoded:
1567 //
1568
1569 if (!(mode & Continued)) {
1570 // save result already:
1571 result.insert(attribute, value);
1572 // force begin of a new attribute:
1573 attribute.clear();
1574 }
1575 } else { // it.key().startsWith( attribute )
1576 //
1577 // continuation
1578 //
1579
1580 // ignore the section and trust QMap to have sorted the keys:
1581 if (it.key().endsWith(QLatin1Char('*'))) {
1582 // encoded
1583 decodeRFC2231Value(rfc2231Codec, textcodec,
1584 true, /* is continuation */
1585 value, (*it).qpair, charset);
1586 } else {
1587 // not encoded
1588 if ((*it).qpair.first) {
1589 value += QString::fromLatin1((*it).qpair.first, (*it).qpair.second);
1590 } else {
1591 value += (*it).qstring;
1592 }
1593 }
1594 }
1595 }
1596 // write last attr/value pair:
1597 if (!attribute.isNull()) {
1598 result.insert(attribute, value);
1599 }
1600
1601 return true;
1602}
1603
1604bool parseParameterList(const char *&scursor, const char *const send,
1605 QMap<QString, QString> &result, bool isCRLF)
1606{
1607 QByteArray charset;
1608 return parseParameterListWithCharset(scursor, send, result, charset, isCRLF);
1609}
1610
1611static const char stdDayNames[][4] = {
1612 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
1613};
1614static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
1615
1616static bool parseDayName(const char *&scursor, const char *const send)
1617{
1618 // check bounds:
1619 if (send - scursor < 3) {
1620 return false;
1621 }
1622
1623 for (int i = 0 ; i < stdDayNamesLen ; ++i) {
1624 if (qstrnicmp(scursor, stdDayNames[i], 3) == 0) {
1625 scursor += 3;
1626 // qCDebug(KMIME_LOG) << "found" << stdDayNames[i];
1627 return true;
1628 }
1629 }
1630
1631 return false;
1632}
1633
1634static const char stdMonthNames[][4] = {
1635 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1636 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
1637};
1638static const int stdMonthNamesLen =
1639 sizeof stdMonthNames / sizeof *stdMonthNames;
1640
1641static bool parseMonthName(const char *&scursor, const char *const send,
1642 int &result)
1643{
1644 // check bounds:
1645 if (send - scursor < 3) {
1646 return false;
1647 }
1648
1649 for (result = 0 ; result < stdMonthNamesLen ; ++result) {
1650 if (qstrnicmp(scursor, stdMonthNames[result], 3) == 0) {
1651 scursor += 3;
1652 return true;
1653 }
1654 }
1655
1656 // not found:
1657 return false;
1658}
1659
1660static const struct {
1661 const char tzName[5];
1662 long int secsEastOfGMT;
1663} timeZones[] = {
1664 // rfc 822 timezones:
1665 { "GMT", 0 },
1666 { "UT", 0 },
1667 { "EDT", -4 * 3600 },
1668 { "EST", -5 * 3600 },
1669 { "MST", -5 * 3600 },
1670 { "CST", -6 * 3600 },
1671 { "MDT", -6 * 3600 },
1672 { "MST", -7 * 3600 },
1673 { "PDT", -7 * 3600 },
1674 { "PST", -8 * 3600 },
1675 // common, non-rfc-822 zones:
1676 { "CET", 1 * 3600 },
1677 { "MET", 1 * 3600 },
1678 { "UTC", 0 },
1679 { "CEST", 2 * 3600 },
1680 { "BST", 1 * 3600 },
1681 // rfc 822 military timezones:
1682 { "Z", 0 },
1683 { "A", -1 * 3600 },
1684 { "B", -2 * 3600 },
1685 { "C", -3 * 3600 },
1686 { "D", -4 * 3600 },
1687 { "E", -5 * 3600 },
1688 { "F", -6 * 3600 },
1689 { "G", -7 * 3600 },
1690 { "H", -8 * 3600 },
1691 { "I", -9 * 3600 },
1692 // J is not used!
1693 { "K", -10 * 3600 },
1694 { "L", -11 * 3600 },
1695 { "M", -12 * 3600 },
1696 { "N", 1 * 3600 },
1697 { "O", 2 * 3600 },
1698 { "P", 3 * 3600 },
1699 { "Q", 4 * 3600 },
1700 { "R", 5 * 3600 },
1701 { "S", 6 * 3600 },
1702 { "T", 7 * 3600 },
1703 { "U", 8 * 3600 },
1704 { "V", 9 * 3600 },
1705 { "W", 10 * 3600 },
1706 { "X", 11 * 3600 },
1707 { "Y", 12 * 3600 },
1708};
1709static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
1710
1711static bool parseAlphaNumericTimeZone(const char *&scursor,
1712 const char *const send,
1713 long int &secsEastOfGMT,
1714 bool &timeZoneKnown)
1715{
1716 // allow the timezone to be wrapped in quotes; bug 260761
1717 if (scursor < send && *scursor == '"') {
1718 scursor++;
1719
1720 if (scursor == send) {
1721 return false;
1722 }
1723 }
1724
1725 QPair<const char *, int> maybeTimeZone(nullptr, 0);
1726 if (!parseToken(scursor, send, maybeTimeZone, ParseTokenNoFlag)) {
1727 return false;
1728 }
1729 for (int i = 0 ; i < timeZonesLen ; ++i) {
1730 if (qstrnicmp(timeZones[i].tzName,
1731 maybeTimeZone.first, maybeTimeZone.second) == 0) {
1732 scursor += maybeTimeZone.second;
1733 secsEastOfGMT = timeZones[i].secsEastOfGMT;
1734 timeZoneKnown = true;
1735
1736 if (scursor < send && *scursor == '"') {
1737 scursor++;
1738 }
1739
1740 return true;
1741 }
1742 }
1743
1744 // don't choke just because we don't happen to know the time zone
1745 KMIME_WARN_UNKNOWN(time zone,
1746 QByteArray(maybeTimeZone.first, maybeTimeZone.second));
1747 secsEastOfGMT = 0;
1748 timeZoneKnown = false;
1749 return true;
1750}
1751
1752// parse a number and return the number of digits parsed:
1753int parseDigits(const char *&scursor, const char *const send, int &result)
1754{
1755 result = 0;
1756 int digits = 0;
1757 for (; scursor != send && isdigit(*scursor) ; scursor++, digits++) {
1758 result *= 10;
1759 result += int(*scursor - '0');
1760 }
1761 return digits;
1762}
1763
1764static bool parseTimeOfDay(const char *&scursor, const char *const send,
1765 int &hour, int &min, int &sec, bool isCRLF = false)
1766{
1767 // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
1768
1769 //
1770 // 2DIGIT representing "hour":
1771 //
1772 if (!parseDigits(scursor, send, hour)) {
1773 return false;
1774 }
1775
1776 eatCFWS(scursor, send, isCRLF);
1777 if (scursor == send || *scursor != ':') {
1778 return false;
1779 }
1780 scursor++; // eat ':'
1781
1782 eatCFWS(scursor, send, isCRLF);
1783 if (scursor == send) {
1784 return false;
1785 }
1786
1787 //
1788 // 2DIGIT representing "minute":
1789 //
1790 if (!parseDigits(scursor, send, min)) {
1791 return false;
1792 }
1793
1794 eatCFWS(scursor, send, isCRLF);
1795 if (scursor == send) {
1796 return true; // seconds are optional
1797 }
1798
1799 //
1800 // let's see if we have a 2DIGIT representing "second":
1801 //
1802 if (*scursor == ':') {
1803 // yepp, there are seconds:
1804 scursor++; // eat ':'
1805 eatCFWS(scursor, send, isCRLF);
1806 if (scursor == send) {
1807 return false;
1808 }
1809
1810 if (!parseDigits(scursor, send, sec)) {
1811 return false;
1812 }
1813 } else {
1814 sec = 0;
1815 }
1816
1817 return true;
1818}
1819
1820bool parseTime(const char *&scursor, const char *send,
1821 int &hour, int &min, int &sec, long int &secsEastOfGMT,
1822 bool &timeZoneKnown, bool isCRLF)
1823{
1824 // time := time-of-day CFWS ( zone / obs-zone )
1825 //
1826 // obs-zone := "UT" / "GMT" /
1827 // "EST" / "EDT" / ; -0500 / -0400
1828 // "CST" / "CDT" / ; -0600 / -0500
1829 // "MST" / "MDT" / ; -0700 / -0600
1830 // "PST" / "PDT" / ; -0800 / -0700
1831 // "A"-"I" / "a"-"i" /
1832 // "K"-"Z" / "k"-"z"
1833
1834 eatCFWS(scursor, send, isCRLF);
1835 if (scursor == send) {
1836 return false;
1837 }
1838
1839 if (!parseTimeOfDay(scursor, send, hour, min, sec, isCRLF)) {
1840 return false;
1841 }
1842
1843 eatCFWS(scursor, send, isCRLF);
1844 // there might be no timezone but a year following
1845 if ((scursor == send) || isdigit(*scursor)) {
1846 timeZoneKnown = false;
1847 secsEastOfGMT = 0;
1848 return true; // allow missing timezone
1849 }
1850
1851 timeZoneKnown = true;
1852 if (*scursor == '+' || *scursor == '-') {
1853 // remember and eat '-'/'+':
1854 const char sign = *scursor++;
1855 // numerical timezone:
1856 int maybeTimeZone;
1857 const int tzDigits = parseDigits(scursor, send, maybeTimeZone);
1858 if (tzDigits != 4) {
1859 // Allow timezones in 02:00 format
1860 if (tzDigits == 2 && scursor != send && *scursor == ':') {
1861 scursor++;
1862 int maybeTimeZone2;
1863 if (parseDigits(scursor, send, maybeTimeZone2) != 2) {
1864 return false;
1865 }
1866 maybeTimeZone = maybeTimeZone * 100 + maybeTimeZone2;
1867 } else {
1868 return false;
1869 }
1870 }
1871 secsEastOfGMT = 60 * (maybeTimeZone / 100 * 60 + maybeTimeZone % 100);
1872 if (sign == '-') {
1873 secsEastOfGMT *= -1;
1874 if (secsEastOfGMT == 0) {
1875 timeZoneKnown = false; // -0000 means indetermined tz
1876 }
1877 }
1878 } else {
1879 // maybe alphanumeric timezone:
1880 if (!parseAlphaNumericTimeZone(scursor, send, secsEastOfGMT, timeZoneKnown)) {
1881 return false;
1882 }
1883 }
1884 return true;
1885}
1886
1887bool parseQDateTime(const char *&scursor, const char *const send,
1888 QDateTime &result, bool isCRLF)
1889{
1890 eatCFWS(scursor, send, isCRLF);
1891 if (scursor == send) {
1892 return false;
1893 }
1894 // In qt6 yy == 1900 ! => for sure we use 2000 here.
1895 result = QDateTime::fromString(QString::fromLatin1(scursor, 17), QStringLiteral("dd/MM/yy HH:mm:ss"));
1896 QDate resultDate = result.date();
1897 resultDate.setDate(resultDate.year() + 100, resultDate.month(), resultDate.day());
1898 result.setDate(resultDate);
1899 return result.isValid();
1900}
1901
1902bool parseDateTime(const char *&scursor, const char *const send,
1903 QDateTime &result, bool isCRLF)
1904{
1905 // Parsing date-time; strict mode:
1906 //
1907 // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday
1908 // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
1909 // time
1910 //
1911 // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
1912 // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
1913 // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
1914
1915 result = QDateTime();
1916
1917 eatCFWS(scursor, send, isCRLF);
1918 if (scursor == send) {
1919 return false;
1920 }
1921
1922 //
1923 // let's see if there's a day-of-week:
1924 //
1925 if (parseDayName(scursor, send)) {
1926 eatCFWS(scursor, send, isCRLF);
1927 if (scursor == send) {
1928 return false;
1929 }
1930 // day-name should be followed by ',' but we treat it as optional:
1931 if (*scursor == ',') {
1932 scursor++; // eat ','
1933 eatCFWS(scursor, send, isCRLF);
1934 }
1935 }
1936
1937 int maybeMonth = -1;
1938 bool asctimeFormat = false;
1939
1940 // ANSI-C asctime() format is: Wed Jun 30 21:49:08 1993
1941 if (!isdigit(*scursor) && parseMonthName(scursor, send, maybeMonth)) {
1942 asctimeFormat = true;
1943 eatCFWS(scursor, send, isCRLF);
1944 }
1945
1946 //
1947 // 1*2DIGIT representing "day" (of month):
1948 //
1949 int maybeDay;
1950 if (!parseDigits(scursor, send, maybeDay)) {
1951 return false;
1952 }
1953
1954 eatCFWS(scursor, send, isCRLF);
1955 if (scursor == send) {
1956 return false;
1957 }
1958
1959 // ignore ","; bug 54098
1960 if (*scursor == ',') {
1961 scursor++;
1962 }
1963
1964 //
1965 // month-name:
1966 //
1967 if (!asctimeFormat && !parseMonthName(scursor, send, maybeMonth)) {
1968 return false;
1969 }
1970 if (scursor == send) {
1971 return false;
1972 }
1973 assert(maybeMonth >= 0); assert(maybeMonth <= 11);
1974 ++maybeMonth; // 0-11 -> 1-12
1975
1976 eatCFWS(scursor, send, isCRLF);
1977 if (scursor == send) {
1978 return false;
1979 }
1980
1981 // check for "year HH:MM:SS" or only "HH:MM:SS" (or "H:MM:SS")
1982 bool timeAfterYear = true;
1983 if ((send - scursor > 3) && ((scursor[1] == ':') || (scursor[2] == ':'))) {
1984 timeAfterYear = false; // first read time, then year
1985 }
1986
1987 //
1988 // 2*DIGIT representing "year":
1989 //
1990 int maybeYear = 0;
1991
1992 if (timeAfterYear && !parseDigits(scursor, send, maybeYear)) {
1993 return false;
1994 }
1995
1996 eatCFWS(scursor, send, isCRLF);
1997 int maybeHour;
1998 int maybeMinute;
1999 int maybeSecond;
2000 long int secsEastOfGMT = 0;
2001 QDate maybeDate;
2002 QTime maybeTime;
2003 if (scursor != send) {
2004 //
2005 // time
2006 //
2007 bool timeZoneKnown = true;
2008
2009 if (!parseTime(scursor, send,
2010 maybeHour, maybeMinute, maybeSecond,
2011 secsEastOfGMT, timeZoneKnown, isCRLF)) {
2012 return false;
2013 }
2014
2015 // in asctime() the year follows the time
2016 if (!timeAfterYear) {
2017 eatCFWS(scursor, send, isCRLF);
2018 if (scursor == send) {
2019 return false;
2020 }
2021
2022 if (!parseDigits(scursor, send, maybeYear)) {
2023 return false;
2024 }
2025 }
2026
2027 // RFC 2822 4.3 processing:
2028 if (maybeYear < 50) {
2029 maybeYear += 2000;
2030 } else if (maybeYear < 1000) {
2031 maybeYear += 1900;
2032 }
2033 // else keep as is
2034 if (maybeYear < 1900) {
2035 return false; // rfc2822, 3.3
2036 }
2037
2038 maybeDate = QDate(maybeYear, maybeMonth, maybeDay);
2039 maybeTime = QTime(maybeHour, maybeMinute, maybeSecond);
2040
2041 if (!maybeDate.isValid() || !maybeTime.isValid()) {
2042 return false;
2043 }
2044 } else {
2045 maybeDate = QDate(maybeYear, maybeMonth, maybeDay);
2046 maybeTime = QTime(0, 0, 0);
2047 }
2048
2049 result = QDateTime(maybeDate, maybeTime, QTimeZone::fromSecondsAheadOfUtc(secsEastOfGMT));
2050 if (!result.isValid()) {
2051 return false;
2052 }
2053 return true;
2054}
2055
2056namespace {
2057
2058Headers::Base *extractHeader(QByteArrayView head, const int headerStart, int &endOfFieldBody)
2059{
2060 Headers::Base *header = {};
2061
2062 int startOfFieldBody = head.indexOf(':', headerStart);
2063 if (startOfFieldBody < 0) {
2064 return nullptr;
2065 }
2066
2067 const char *rawType = head.constData() + headerStart;
2068 const size_t rawTypeLen = startOfFieldBody - headerStart;
2069
2070 startOfFieldBody++; //skip the ':'
2071 if (startOfFieldBody < head.size() - 1 && head[startOfFieldBody] == ' ') { // skip the space after the ':', if there's any
2072 startOfFieldBody++;
2073 }
2074
2075 bool folded = false;
2076 endOfFieldBody = findHeaderLineEnd(head, startOfFieldBody, &folded);
2077
2078 // We might get an invalid mail without a field name, don't crash on that.
2079 if (rawTypeLen > 0) {
2080 header = HeaderFactory::createHeader(rawType, rawTypeLen);
2081 }
2082 if (!header) {
2083 //qCWarning(KMIME_LOG)() << "Returning Generic header of type" << rawType;
2084 header = new Headers::Generic(rawType, rawTypeLen);
2085 }
2086 if (folded) {
2087 const auto unfoldedBody = unfoldHeader(head.constData() + startOfFieldBody, endOfFieldBody - startOfFieldBody);
2088 header->from7BitString(unfoldedBody);
2089 } else {
2090 header->from7BitString(head.constData() + startOfFieldBody, endOfFieldBody - startOfFieldBody);
2091 }
2092
2093 return header;
2094}
2095
2096}
2097
2098std::unique_ptr<KMime::Headers::Base> parseNextHeader(QByteArrayView &head)
2099{
2100 int endOfFieldBody = 0;
2101 std::unique_ptr<KMime::Headers::Base> header(extractHeader(head, 0, endOfFieldBody));
2102 if (header) {
2103 head = head.mid(endOfFieldBody + 1);
2104 } else {
2105 head = {};
2106 }
2107
2108 return header;
2109}
2110
2111void extractHeaderAndBody(const QByteArray &content, QByteArray &header, QByteArray &body)
2112{
2113 header.clear();
2114 body.clear();
2115
2116 // empty header
2117 if (content.startsWith('\n')) {
2118 body = content.right(content.length() - 1);
2119 return;
2120 }
2121
2122 int pos = content.indexOf("\n\n", 0);
2123 if (pos > -1) {
2124 header = content.left(++pos); //header *must* end with "\n" !!
2125 body = content.mid(pos + 1);
2126 if (body.startsWith("\n")) {
2127 body = "\n" + body;
2128 }
2129 } else {
2130 header = content;
2131 }
2132}
2133
2134QList<Headers::Base *> parseHeaders(const QByteArray &head) {
2136
2137 int cursor = 0;
2138 while (cursor < head.size()) {
2139 const int headerStart = cursor;
2140 int endOfFieldBody;
2141 if (auto header = extractHeader(head, headerStart, endOfFieldBody)) {
2142 ret << header;
2143 cursor = endOfFieldBody + 1;
2144 } else {
2145 break;
2146 }
2147 }
2148
2149 return ret;
2150}
2151
2152} // namespace HeaderParsing
2153
2154} // namespace KMime
static Codec * codecForName(QByteArrayView name)
virtual Decoder * makeDecoder(NewlineType newline=NewlineLF) const=0
virtual const char * name() const=0
virtual qsizetype maxDecodedSizeFor(qsizetype insize, NewlineType newline=NewlineLF) const=0
Baseclass of all header-classes.
virtual void from7BitString(const char *s, size_t len)
Parses the given string.
Represents an arbitrary header, that can contain any header-field.
Represents an (email address, display name) pair according RFC 2822, section 3.4.
Definition kmime_types.h:38
void setName(const QString &name)
Sets the name.
void setAddress(const AddrSpec &addr)
Sets the email address.
Q_SCRIPTABLE Q_NOREPLY void start()
This file is part of the API for handling MIME data and defines the DateFormatter class.
This file is part of the API for handling MIME data and defines the various header classes:
KCODECS_EXPORT QString decodeRFC2047String(QByteArrayView src, QByteArray *usedCS, const QByteArray &defaultCS=QByteArray(), CharsetOption option=NoOption)
const QList< QKeySequence > & end()
iterator begin()
void clear()
const char * constData() const const
char * data()
iterator end()
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
bool isEmpty() const const
QByteArray left(qsizetype len) const const
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
void resize(qsizetype newSize, char c)
QByteArray right(qsizetype len) const const
qsizetype size() const const
bool startsWith(QByteArrayView bv) const const
QByteArrayView mid(qsizetype start, qsizetype length) const const
const_pointer constData() const const
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
qsizetype size() const const
int day() const const
bool isValid(int year, int month, int day)
int month() const const
bool setDate(int year, int month, int day)
int year() const const
QDate date() const const
QDateTime fromString(QStringView string, QStringView format, QCalendar cal)
bool isValid() const const
void setDate(QDate date)
void append(QList< T > &&value)
iterator begin()
iterator end()
iterator insert(const Key &key, const T &value)
bool isEmpty() const const
Key key(const T &value, const Key &defaultKey) const const
void chop(qsizetype n)
void clear()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromLatin1(QByteArrayView str)
bool isNull() const const
QString mid(qsizetype position, qsizetype n) const const
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QString toLower() const const
bool isValid() const const
EncodedData< QByteArrayView > decode(QByteArrayView ba)
QTextStream & dec(QTextStream &stream)
QTextStream & endl(QTextStream &stream)
bool isValid(int h, int m, int s, int ms)
QTimeZone fromSecondsAheadOfUtc(int offset)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Sat Apr 27 2024 22:14:42 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.