Md4qt

parser.h
Go to the documentation of this file.
1/*
2 SPDX-FileCopyrightText: 2022-2024 Igor Mironchik <igor.mironchik@gmail.com>
3 SPDX-License-Identifier: MIT
4*/
5
6#ifndef MD4QT_MD_PARSER_HPP_INCLUDED
7#define MD4QT_MD_PARSER_HPP_INCLUDED
8
9// md4qt include.
10#include "doc.h"
11#include "entities_map.h"
12#include "traits.h"
13#include "utils.h"
14
15#ifdef MD4QT_QT_SUPPORT
16
17// Qt include.
18#include <QDir>
19#include <QFile>
20#include <QTextStream>
21
22#endif // MD4QT_QT_SUPPORT
23
24#ifdef MD4QT_ICU_STL_SUPPORT
25
26// C++ include.
27#include <exception>
28
29#endif // MD4QT_ICU_STL_SUPPORT
30
31// C++ include.
32#include <algorithm>
33#include <cassert>
34#include <cmath>
35#include <fstream>
36#include <functional>
37#include <memory>
38#include <set>
39#include <tuple>
40#include <unordered_map>
41#include <vector>
42
43namespace MD
44{
45
46//! Starting HTML comment string.
47static const char *s_startComment = "<!--";
48
49//! \return Is \p indent indent belongs to list with previous \p indents indents.
50inline bool
51indentInList(const std::vector<long long int> *indents,
52 long long int indent,
53 bool codeIndentedBySpaces)
54{
55 if (indents && !indents->empty()) {
56 return (std::find_if(indents->cbegin(),
57 indents->cend(),
58 [indent, codeIndentedBySpaces](const auto &v) {
59 return (indent >= v && (codeIndentedBySpaces ?
60 true : indent <= v + 3));
61 })
62 != indents->cend());
63 } else {
64 return false;
65 }
66}
67
68//! Skip spaces in line from position \p i.
69template<class Trait>
70inline long long int
71skipSpaces(long long int i, const typename Trait::String &line)
72{
73 const auto length = line.length();
74
75 while (i < length && line[i].isSpace()) {
76 ++i;
77 }
78
79 return i;
80}
81
82
83//! Remove spaces at the end of string \p s.
84template<class String>
85inline void
87{
88 long long int i = s.length() - 1;
89
90 for (; i >= 0; --i) {
91 if (!s[i].isSpace()) {
92 break;
93 }
94 }
95
96 if (i != s.length() - 1) {
97 s.remove(i + 1, s.length() - i - 1);
98 }
99}
100
101//! \return Last non-space character position.
102template<class Trait>
103inline long long int
104lastNonSpacePos(const typename Trait::String &line)
105{
106 long long int i = line.length() - 1;
107
108 while (i > 0 && line[i].isSpace()) {
109 --i;
110 }
111
112 return i;
113}
114
115//! \return Starting sequence of the same characters.
116template<class Trait>
117inline typename Trait::String
118startSequence(const typename Trait::String &line)
119{
120 auto pos = skipSpaces<Trait>(0, line);
121
122 if (pos >= line.length()) {
123 return {};
124 }
125
126 const auto sch = line[pos];
127 const auto start = pos;
128
129 ++pos;
130
131 while (pos < line.length() && line[pos] == sch) {
132 ++pos;
133 }
134
135 return line.sliced(start, pos - start);
136}
137
138//! \return Is string an ordered list.
139template<class Trait>
140inline bool
141isOrderedList(const typename Trait::String &s,
142 int *num = nullptr,
143 int *len = nullptr,
144 typename Trait::Char *delim = nullptr,
145 bool *isFirstLineEmpty = nullptr)
146{
147 long long int p = skipSpaces<Trait>(0, s);
148
149 long long int dp = p;
150
151 for (; p < s.size(); ++p) {
152 if (!s[p].isDigit()) {
153 break;
154 }
155 }
156
157 if (dp != p && p < s.size()) {
158 const auto digits = s.sliced(dp, p - dp);
159
160 if (digits.size() > 9) {
161 return false;
162 }
163
164 const auto i = digits.toInt();
165
166 if (num) {
167 *num = i;
168 }
169
170 if (len) {
171 *len = p - dp;
172 }
173
174 if (s[p] == Trait::latin1ToChar('.') || s[p] == Trait::latin1ToChar(')')) {
175 if (delim) {
176 *delim = s[p];
177 }
178
179 ++p;
180
181 long long int tmp = skipSpaces<Trait>(p, s);
182
183 if (isFirstLineEmpty) {
184 *isFirstLineEmpty = (tmp == s.size());
185 }
186
187 if ((p < s.size() && s[p] == Trait::latin1ToChar(' ')) || p == s.size()) {
188 return true;
189 }
190 }
191 }
192
193 return false;
194}
195
196//
197// RawHtmlBlock
198//
199
200//! Internal structure for pre-storing HTML.
201template<class Trait>
203 std::shared_ptr<RawHtml<Trait>> m_html = {};
204 std::shared_ptr<Block<Trait>> m_parent = {};
205 std::shared_ptr<Block<Trait>> m_topParent = {};
206 using SequenceOfBlock = std::vector<std::pair<std::shared_ptr<Block<Trait>>, long long int>>;
208 std::unordered_map<std::shared_ptr<Block<Trait>>, SequenceOfBlock> m_toAdjustLastPos = {};
210 bool m_continueHtml = false;
211 bool m_onLine = false;
212
213 std::shared_ptr<Block<Trait>>
214 findParent(long long int indent) const
215 {
216 for (auto it = m_blocks.crbegin(), last = m_blocks.crend(); it != last; ++it) {
217 if (indent >= it->second) {
218 return it->first;
219 }
220 }
221
222 return nullptr;
223 }
224}; // struct RawHtmlBlock
225
226//! Reset pre-stored HTML.
227template<class Trait>
229{
230 html.m_html.reset();
231 html.m_parent.reset();
232 html.m_htmlBlockType = -1;
233 html.m_continueHtml = false;
234 html.m_onLine = false;
235}
236
237//
238// MdLineData
239//
240
241//! Internal structure for auxiliary information about a line in Markdown.
243 long long int m_lineNumber = -1;
244 using CommentData = std::pair<char, bool>;
245 using CommentDataMap = std::map<long long int, CommentData>;
246 // std::pair< closed, valid >
248}; // struct MdLineData
249
250//
251// MdBlock
252//
253
254//! Internal structure for block of text in Markdown.
255template<class Trait>
256struct MdBlock {
257 using Line = std::pair<typename Trait::InternalString, MdLineData>;
258 using Data = typename Trait::template Vector<Line>;
259
261 long long int m_emptyLinesBefore = 0;
262 bool m_emptyLineAfter = true;
263}; // struct MdBlock
264
265//
266// StringListStream
267//
268
269//! Wrapper for typename Trait::StringList to be behaved like a stream.
270template<class Trait>
272{
273public:
275 : m_stream(stream)
276 , m_pos(0)
277 {
278 }
279
280 bool atEnd() const
281 {
282 return (m_pos >= (long long int)m_stream.size());
283 }
284 typename Trait::InternalString readLine()
285 {
286 return m_stream.at(m_pos++).first;
287 }
288 long long int currentLineNumber() const
289 {
290 return (m_pos < size() ? m_stream.at(m_pos).second.m_lineNumber : size());
291 }
292 typename Trait::InternalString lineAt(long long int pos)
293 {
294 return m_stream.at(pos).first;
295 }
296 long long int size() const
297 {
298 return m_stream.size();
299 }
300
301private:
302 typename MdBlock<Trait>::Data &m_stream;
303 long long int m_pos;
304}; // class StringListStream
305
306inline bool
307checkStack(std::vector<std::pair<std::pair<long long int, bool>, int>> &s,
308 const std::pair<std::pair<long long int, bool>, int> &v,
309 size_t idx)
310{
311 int value = -v.first.first;
312
313 for (long long int i = s.size() - 1; i >= 0; --i) {
314 if (s[i].second == v.second && s[i].first.first > 0) {
315 // Check for rule of multiplies of 3. Look at CommonMark 0.30 example 411.
316 if (!((s[i].first.second || v.first.second) &&
317 (s[i].first.first + value) % 3 == 0 &&
318 !(s[i].first.first % 3 == 0 && value % 3 == 0))) {
319 if (s[i].first.first - value <= 0) {
320 if (i == (long long int)idx) {
321 return true;
322 }
323
324 value -= s[i].first.first;
325
326 s.erase(s.cbegin() + i, s.cend());
327
328 if (value == 0) {
329 break;
330 }
331 } else {
332 s[i].first.first -= value;
333
334 s.erase(s.cbegin() + i + 1, s.cend());
335
336 break;
337 }
338 }
339 }
340
341 if (i == 0) {
342 break;
343 }
344 }
345
346 return false;
347}
348
349//! \return Is string a footnote?
350template<class Trait>
351inline bool
352isFootnote(const typename Trait::String &s)
353{
354 long long int p = skipSpaces<Trait>(0, s);
355
356 if (s.size() - p < 5) {
357 return false;
358 }
359
360 if (s[p++] != Trait::latin1ToChar('[')) {
361 return false;
362 }
363
364 if (s[p++] != Trait::latin1ToChar('^')) {
365 return false;
366 }
367
368 if (s[p] == Trait::latin1ToChar(']') || s[p].isSpace()) {
369 return false;
370 }
371
372 for (; p < s.size(); ++p) {
373 if (s[p] == Trait::latin1ToChar(']')) {
374 break;
375 } else if (s[p].isSpace()) {
376 return false;
377 }
378 }
379
380 ++p;
381
382 if (p < s.size() && s[p] == Trait::latin1ToChar(':')) {
383 return true;
384 } else {
385 return false;
386 }
387}
388
389//! \return Is string a code fences?
390template<class Trait>
391inline bool
392isCodeFences(const typename Trait::String &s, bool closing = false)
393{
394 auto p = skipSpaces<Trait>(0, s);
395
396 if (p > 3 || p == s.length()) {
397 return false;
398 }
399
400 const auto ch = s[p];
401
402 if (ch != Trait::latin1ToChar('~') && ch != Trait::latin1ToChar('`')) {
403 return false;
404 }
405
406 bool space = false;
407
408 long long int c = 1;
409 ++p;
410
411 for (; p < s.length(); ++p) {
412 if (s[p].isSpace()) {
413 space = true;
414 } else if (s[p] == ch) {
415 if (space && (closing ? true : ch == Trait::latin1ToChar('`'))) {
416 return false;
417 }
418
419 if (!space) {
420 ++c;
421 }
422 } else if (closing) {
423 return false;
424 } else {
425 break;
426 }
427 }
428
429 if (c < 3) {
430 return false;
431 }
432
433 if (ch == Trait::latin1ToChar('`')) {
434 for (; p < s.length(); ++p) {
435 if (s[p] == Trait::latin1ToChar('`')) {
436 return false;
437 }
438 }
439 }
440
441 return true;
442}
443
444//! Skip escaped sequence of characters till first space.
445template<class Trait>
446inline typename Trait::String
447readEscapedSequence(long long int i,
448 const typename Trait::String &str,
449 long long int *endPos = nullptr)
450{
451 bool backslash = false;
452 const auto start = i;
453
454 if (start >= str.length()) {
455 return {};
456 }
457
458 while (i < str.length()) {
459 bool now = false;
460
461 if (str[i] == Trait::latin1ToChar('\\') && !backslash) {
462 backslash = true;
463 now = true;
464 } else if (str[i].isSpace() && !backslash) {
465 break;
466 }
467
468 if (!now) {
469 backslash = false;
470 }
471
472 ++i;
473 }
474
475 if (endPos) {
476 *endPos = i - 1;
477 }
478
479 return str.sliced(start, i - start);
480}
481
482//! Characters that can be escaped.
483template<class Trait>
484static const typename Trait::String s_canBeEscaped =
485 Trait::latin1ToString("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
486
487//! Remove backslashes from the string.
488template<class String, class Trait>
489inline String
490removeBackslashes(const String &s)
491{
492 String r = s;
493 bool backslash = false;
494 long long int extra = 0;
495
496 for (long long int i = 0; i < s.length(); ++i) {
497 bool now = false;
498
499 if (s[i] == Trait::latin1ToChar('\\') && !backslash && i != s.length() - 1) {
500 backslash = true;
501 now = true;
502 } else if (s_canBeEscaped<Trait>.contains(s[i]) && backslash) {
503 r.remove(i - extra - 1, 1);
504 ++extra;
505 }
506
507 if (!now) {
508 backslash = false;
509 }
510 }
511
512 return r;
513}
514
515//! \return Is string a start of code?
516template<class Trait>
517inline bool
518isStartOfCode(const typename Trait::String &str,
519 typename Trait::String *syntax = nullptr,
520 WithPosition *delim = nullptr,
521 WithPosition *syntaxPos = nullptr)
522{
523 long long int p = skipSpaces<Trait>(0, str);
524
525 if (delim) {
526 delim->setStartColumn(p);
527 }
528
529 if (p > 3) {
530 return false;
531 }
532
533 if (str.size() - p < 3) {
534 return false;
535 }
536
537 const bool c96 = str[p] == Trait::latin1ToChar('`');
538 const bool c126 = str[p] == Trait::latin1ToChar('~');
539
540 if (c96 || c126) {
541 ++p;
542 long long int c = 1;
543
544 while (p < str.length()) {
545 if (str[p] != (c96 ? Trait::latin1ToChar('`') : Trait::latin1ToChar('~'))) {
546 break;
547 }
548
549 ++c;
550 ++p;
551 }
552
553 if (delim) {
554 delim->setEndColumn(p - 1);
555 }
556
557 if (c < 3) {
558 return false;
559 }
560
561 if (syntax) {
562 p = skipSpaces<Trait>(p, str);
563 long long int endSyntaxPos = p;
564
565 if (p < str.size()) {
567 readEscapedSequence<Trait>(p, str, &endSyntaxPos));
568
569 if (syntaxPos) {
570 syntaxPos->setStartColumn(p);
571 syntaxPos->setEndColumn(endSyntaxPos);
572 }
573 }
574 }
575
576 return true;
577 }
578
579 return false;
580}
581
582//! \return Is string a horizontal line?
583template<class Trait>
584inline bool
585isHorizontalLine(const typename Trait::String &s)
586{
587 if (s.size() < 3) {
588 return false;
589 }
590
591 typename Trait::Char c;
592
593 if (s[0] == Trait::latin1ToChar('*')) {
594 c = Trait::latin1ToChar('*');
595 } else if (s[0] == Trait::latin1ToChar('-')) {
596 c = Trait::latin1ToChar('-');
597 } else if (s[0] == Trait::latin1ToChar('_')) {
598 c = Trait::latin1ToChar('_');
599 } else {
600 return false;
601 }
602
603 long long int p = 1;
604 long long int count = 1;
605
606 for (; p < s.size(); ++p) {
607 if (s[p] != c && !s[p].isSpace()) {
608 break;
609 } else if (s[p] == c) {
610 ++count;
611 }
612 }
613
614 if (count < 3) {
615 return false;
616 }
617
618 if (p == s.size()) {
619 return true;
620 }
621
622 return false;
623}
624
625//! \return Is string a column alignment?
626template<class Trait>
627inline bool
628isColumnAlignment(const typename Trait::String &s)
629{
630 long long int p = skipSpaces<Trait>(0, s);
631
632 static const typename Trait::String s_legitime = Trait::latin1ToString(":-");
633
634 if (p >= s.length()) {
635 return false;
636 }
637
638 if (!s_legitime.contains(s[p])) {
639 return false;
640 }
641
642 if (s[p] == Trait::latin1ToChar(':')) {
643 ++p;
644 }
645
646 for (; p < s.size(); ++p) {
647 if (s[p] != Trait::latin1ToChar('-')) {
648 break;
649 }
650 }
651
652 if (p == s.size()) {
653 return true;
654 }
655
656 if (s[p] != Trait::latin1ToChar(':') && !s[p].isSpace()) {
657 return false;
658 }
659
660 ++p;
661
662 for (; p < s.size(); ++p) {
663 if (!s[p].isSpace()) {
664 return false;
665 }
666 }
667
668 return true;
669}
670
671//! Split string.
672template<class Trait>
673typename Trait::StringList
674splitString(const typename Trait::String &str, const typename Trait::Char &ch);
675
676#ifdef MD4QT_ICU_STL_SUPPORT
677
678template<>
681{
682 return str.split(ch);
683}
684
685#endif
686
687#ifdef MD4QT_QT_SUPPORT
688
689template<>
691splitString<QStringTrait>(const QString &str, const QChar &ch)
692{
693 return str.split(ch, Qt::SkipEmptyParts);
694}
695
696#endif
697
698//! \return Number of columns?
699template<class Trait>
700inline int
701isTableAlignment(const typename Trait::String &s)
702{
703 const auto columns = splitString<Trait>(s.simplified(), Trait::latin1ToChar('|'));
704
705 for (const auto &c : columns) {
706 if (!isColumnAlignment<Trait>(c)) {
707 return 0;
708 }
709 }
710
711 return columns.size();
712}
713
714//! \return Is given string a HTML comment.
715template<class Trait>
716inline bool
717isHtmlComment(const typename Trait::String &s)
718{
719 auto c = s;
720
721 if (s.startsWith(Trait::latin1ToString(s_startComment))) {
722 c.remove(0, 4);
723 } else {
724 return false;
725 }
726
727 long long int p = -1;
728 bool endFound = false;
729
730 while ((p = c.indexOf(Trait::latin1ToString("--"), p + 1)) > -1) {
731 if (c.size() > p + 2 && c[p + 2] == Trait::latin1ToChar('>')) {
732 if (!endFound) {
733 endFound = true;
734 } else {
735 return false;
736 }
737 } else if (p - 2 >= 0 && c.sliced(p - 2, 4) == Trait::latin1ToString("<!--")) {
738 return false;
739 } else if (c.size() > p + 3 && c.sliced(p, 4) == Trait::latin1ToString("--!>")) {
740 return false;
741 }
742 }
743
744 return endFound;
745}
746
747//! Replace entities in the string with corresponding character.
748template<class Trait>
749inline typename Trait::String
750replaceEntity(const typename Trait::String &s)
751{
752 long long int p1 = 0;
753
754 typename Trait::String res;
755 long long int i = 0;
756
757 while ((p1 = s.indexOf(Trait::latin1ToChar('&'), p1)) != -1) {
758 if (p1 > 0 && s[p1 - 1] == Trait::latin1ToChar('\\')) {
759 ++p1;
760
761 continue;
762 }
763
764 const auto p2 = s.indexOf(Trait::latin1ToChar(';'), p1);
765
766 if (p2 != -1) {
767 const auto en = s.sliced(p1, p2 - p1 + 1);
768
769 if (en.size() > 2 && en[1] == Trait::latin1ToChar('#')) {
770 if (en.size() > 3 && en[2].toLower() == Trait::latin1ToChar('x')) {
771 const auto hex = en.sliced(3, en.size() - 4);
772
773 if (hex.size() <= 6 && hex.size() > 0) {
774 bool ok = false;
775
776 const char32_t c = hex.toInt(&ok, 16);
777
778 if (ok) {
779 res.push_back(s.sliced(i, p1 - i));
780 i = p2 + 1;
781
782 if (c) {
783 Trait::appendUcs4(res, c);
784 } else {
785 res.push_back(typename Trait::Char(0xFFFD));
786 }
787 }
788 }
789 } else {
790 const auto dec = en.sliced(2, en.size() - 3);
791
792 if (dec.size() <= 7 && dec.size() > 0) {
793 bool ok = false;
794
795 const char32_t c = dec.toInt(&ok, 10);
796
797 if (ok) {
798 res.push_back(s.sliced(i, p1 - i));
799 i = p2 + 1;
800
801 if (c) {
802 Trait::appendUcs4(res, c);
803 } else {
804 res.push_back(typename Trait::Char(0xFFFD));
805 }
806 }
807 }
808 }
809 } else {
810 const auto it = s_entityMap<Trait>.find(en);
811
812 if (it != s_entityMap<Trait>.cend()) {
813 res.push_back(s.sliced(i, p1 - i));
814 i = p2 + 1;
815 res.push_back(Trait::utf16ToString(it->second));
816 }
817 }
818 } else {
819 break;
820 }
821
822 p1 = p2 + 1;
823 }
824
825 res.push_back(s.sliced(i, s.size() - i));
826
827 return res;
828}
829
830//! Remove backslashes in block.
831template<class Trait>
832inline typename MdBlock<Trait>::Data
834{
835 auto tmp = d;
836
837 for (auto &line : tmp) {
839 }
840
841 return tmp;
842}
843
844//! Type of the paragraph's optimization.
846 //! Full optimization.
847 Full,
848 //! Semi optimization, optimization won't concatenate text
849 //! items if style delimiters will be in the middle.
850 Semi,
851 //! Full optimization, but raw text data won't be concatenated (will be untouched).
853 //! Semi optimization, but raw text data won't be concatenated (will be untouched).
855};
856
857//
858// TextPlugin
859//
860
861//! ID of text plugin.
862enum TextPlugin : int {
863 //! Unknown plugin.
865 //! GitHub's autolinks plugin.
867 //! First user defined plugin ID.
869}; // enum TextPlugin
870
871//
872// Style
873//
874
875//! Emphasis type.
876enum class Style {
877 //! "*"
878 Italic1,
879 //! "_"
880 Italic2,
881 //! "**"
882 Bold1,
883 //! "__"
884 Bold2,
885 //! "~"
887 //! Unknown.
888 Unknown
889};
890
891//! \return Text option from style.
892inline TextOption
894{
895 switch (s) {
896 case Style::Italic1:
897 case Style::Italic2:
898 return ItalicText;
899
900 case Style::Bold1:
901 case Style::Bold2:
902 return BoldText;
903
905 return StrikethroughText;
906
907 default:
908 return TextWithoutFormat;
909 }
910}
911
912//
913// TextPluginFunc
914//
915
916template<class Trait>
917struct TextParsingOpts;
918
919//! Functor type for text plugin.
920template<class Trait>
921using TextPluginFunc = std::function<void(std::shared_ptr<Paragraph<Trait>>,
923 const typename Trait::StringList &)>;
924
925//
926// TextPluginsMap
927//
928
929//! Type of the map of text plugins.
930template<class Trait>
931using TextPluginsMap = std::map<int, std::tuple<TextPluginFunc<Trait>,
932 bool,
933 typename Trait::StringList>>;
934
935//
936// TextParsingOpts
937//
938
939//! Internal structure for auxiliary options for parser.
940template<class Trait>
943 std::shared_ptr<Block<Trait>> m_parent;
944 std::shared_ptr<RawHtml<Trait>> m_tmpHtml;
945 std::shared_ptr<Document<Trait>> m_doc;
946 typename Trait::StringList &m_linksToParse;
947 typename Trait::String m_workingPath;
948 typename Trait::String m_fileName;
953 std::shared_ptr<Text<Trait>> m_lastText = {};
954 bool m_wasRefLink = false;
957
958 struct TextData {
959 typename Trait::String m_str;
960 long long int m_pos = -1;
961 long long int m_line = -1;
962 };
963
964 std::vector<TextData> m_rawTextData = {};
965
966 inline void
967 concatenateAuxText(long long int start, long long int end)
968 {
969 if (start < end && (end - start > 1)) {
970 for (auto i = start + 1; i < end; ++i) {
971 m_rawTextData[start].m_str += m_rawTextData[i].m_str;
972 }
973
974 m_rawTextData.erase(m_rawTextData.cbegin() + start + 1, m_rawTextData.cbegin() + end);
975 }
976 }
977
978 enum class Detected { Nothing = 0, Table = 1, HTML = 2, List = 3, Code = 4 }; // enum class Detected
979
981
982 inline bool
984 {
985 switch (m_detected) {
986 case Detected::Table:
987 case Detected::List:
988 case Detected::Code:
989 return true;
990
991 default:
992 return false;
993 }
994 }
995
996 long long int m_line = 0;
997 long long int m_pos = 0;
998 long long int m_startTableLine = -1;
999 long long int m_lastTextLine = -1;
1000 long long int m_lastTextPos = -1;
1003 std::vector<std::pair<Style, long long int>> m_styles = {};
1005 std::shared_ptr<ItemWithOpts<Trait>> m_lastItemWithStyle = nullptr;
1006}; // struct TextParsingOpts
1007
1008//
1009// virginSubstr
1010//
1011
1012//! \return Substring from fragment with given virgin positions.
1013template<class Trait>
1014inline typename Trait::String
1015virginSubstr(const MdBlock<Trait> &fr, const WithPosition &virginPos)
1016{
1017 if (fr.m_data.empty()) {
1018 return {};
1019 }
1020
1021 long long int startLine = virginPos.startLine() < fr.m_data.at(0).second.m_lineNumber ?
1022 (virginPos.endLine() < fr.m_data.at(0).second.m_lineNumber ? -1 : 0) :
1023 virginPos.startLine() - fr.m_data.at(0).second.m_lineNumber;
1024
1025 if (startLine >= static_cast<long long int>(fr.m_data.size()) || startLine < 0) {
1026 return {};
1027 }
1028
1029 auto spos = virginPos.startColumn() - fr.m_data.at(startLine).first.virginPos(0);
1030
1031 if (spos < 0) {
1032 spos = 0;
1033 }
1034
1035 long long int epos = 0;
1036 long long int linesCount = virginPos.endLine() - virginPos.startLine() -
1037 (virginPos.startLine() < fr.m_data.at(0).second.m_lineNumber ?
1038 fr.m_data.at(0).second.m_lineNumber - virginPos.startLine() : 0);
1039
1040 if (startLine + linesCount > static_cast<long long int>(fr.m_data.size())) {
1041 linesCount = fr.m_data.size() - startLine - 1;
1042 epos = fr.m_data.back().first.length();
1043 } else {
1044 epos = virginPos.endColumn() - fr.m_data.at(linesCount + startLine).first.virginPos(0) + 1;
1045 }
1046
1047 if (epos < 0) {
1048 epos = 0;
1049 }
1050
1051 if (epos > fr.m_data.at(linesCount + startLine).first.length()) {
1052 epos = fr.m_data.at(linesCount + startLine).first.length();
1053 }
1054
1055 typename Trait::String str =
1056 (linesCount ? fr.m_data.at(startLine).first.sliced(spos).asString() :
1057 fr.m_data.at(startLine).first.sliced(spos, epos - spos).asString());
1058
1059 long long int i = startLine + 1;
1060
1061 for (; i < startLine + linesCount; ++i) {
1062 str.push_back(Trait::latin1ToString("\n"));
1063 str.push_back(fr.m_data.at(i).first.asString());
1064 }
1065
1066 if (linesCount) {
1067 str.push_back(Trait::latin1ToString("\n"));
1068 str.push_back(fr.m_data.at(i).first.sliced(0, epos).asString());
1069 }
1070
1071 return str;
1072}
1073
1074//
1075// localPosFromVirgin
1076//
1077
1078//! \return Local position ( { column, line } ) in fragment for given virgin position if exists.
1079//! \return { -1, -1 } if there is no given position.
1080template<class Trait>
1081inline std::pair<long long int, long long int>
1082localPosFromVirgin(const MdBlock<Trait> &fr, long long int virginColumn, long long int virginLine)
1083{
1084 if (fr.m_data.empty()) {
1085 return {-1, -1};
1086 }
1087
1088 if (fr.m_data.front().second.m_lineNumber > virginLine ||
1089 fr.m_data.back().second.m_lineNumber < virginLine) {
1090 return {-1, -1};
1091 }
1092
1093 auto line = virginLine - fr.m_data.front().second.m_lineNumber;
1094
1095 if (fr.m_data.at(line).first.isEmpty()) {
1096 return {-1, -1};
1097 }
1098
1099 const auto vzpos = fr.m_data.at(line).first.virginPos(0);
1100
1101 if (vzpos > virginColumn || virginColumn > vzpos + fr.m_data.at(line).first.length() - 1) {
1102 return {-1, -1};
1103 }
1104
1105 return {virginColumn - vzpos, line};
1106}
1107
1108//
1109// GitHubAutolinkPlugin
1110//
1111
1112/*
1113 "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
1114 "(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
1115*/
1116//! \return Is the given string a valid email?
1117template<class Trait>
1118inline bool
1119isEmail(const typename Trait::String &url)
1120{
1121 auto isAllowed = [](const typename Trait::Char &ch) -> bool {
1122 const auto unicode = ch.unicode();
1123 return ((unicode >= 48 && unicode <= 57) || (unicode >= 97 && unicode <= 122) ||
1124 (unicode >= 65 && unicode <= 90));
1125 };
1126
1127 auto isAdditional = [](const typename Trait::Char &ch) -> bool {
1128 const auto unicode = ch.unicode();
1129 return (unicode == 33 || (unicode >= 35 && unicode <= 39) ||
1130 unicode == 42 || unicode == 43 || (unicode >= 45 && unicode <= 47) ||
1131 unicode == 61 || unicode == 63 || (unicode >= 94 && unicode <= 96) ||
1132 (unicode >= 123 && unicode <= 126));
1133 };
1134
1135 static const auto s_delim = Trait::latin1ToChar('-');
1136 static const auto s_dog = Trait::latin1ToChar('@');
1137 static const auto s_dot = Trait::latin1ToChar('.');
1138
1139 long long int i = (url.startsWith(Trait::latin1ToString("mailto:")) ? 7 : 0);
1140 const auto dogPos = url.indexOf(s_dog, i);
1141
1142 if (dogPos != -1) {
1143 if (i == dogPos) {
1144 return false;
1145 }
1146
1147 for (; i < dogPos; ++i) {
1148 if (!isAllowed(url[i]) && !isAdditional(url[i])) {
1149 return false;
1150 }
1151 }
1152
1153 auto checkToDot = [&](long long int start, long long int dotPos) -> bool {
1154 static const long long int maxlen = 63;
1155
1156 if (dotPos - start > maxlen ||
1157 start + 1 > dotPos ||
1158 start >= url.length() ||
1159 dotPos > url.length()) {
1160 return false;
1161 }
1162
1163 if (url[start] == s_delim) {
1164 return false;
1165 }
1166
1167 if (url[dotPos - 1] == s_delim) {
1168 return false;
1169 }
1170
1171 for (; start < dotPos; ++start) {
1172 if (!isAllowed(url[start]) && url[start] != s_delim) {
1173 return false;
1174 }
1175 }
1176
1177 return true;
1178 };
1179
1180 long long int dotPos = url.indexOf(s_dot, dogPos + 1);
1181
1182 if (dotPos != -1) {
1183 i = dogPos + 1;
1184
1185 while (dotPos != -1) {
1186 if (!checkToDot(i, dotPos)) {
1187 return false;
1188 }
1189
1190 i = dotPos + 1;
1191 dotPos = url.indexOf(s_dot, i);
1192 }
1193
1194 if (!checkToDot(i, url.length())) {
1195 return false;
1196 }
1197
1198 return true;
1199 }
1200 }
1201
1202 return false;
1203}
1204
1205//! \return Is the fiven string a valid URL?
1206template<class Trait>
1207inline bool
1208isValidUrl(const typename Trait::String &url);
1209
1210//! \return Is the given string a GitHub autolink?
1211template<class Trait>
1212inline bool
1213isGitHubAutolink(const typename Trait::String &url);
1214
1215#ifdef MD4QT_QT_SUPPORT
1216
1217template<>
1218inline bool
1220{
1221 const QUrl u(url, QUrl::StrictMode);
1222
1223 return (u.isValid() && !u.isRelative());
1224}
1225
1226template<>
1227inline bool
1229{
1230 const QUrl u(url, QUrl::StrictMode);
1231
1232 return (u.isValid()
1233 && ((!u.scheme().isEmpty() && !u.host().isEmpty())
1234 || (url.startsWith(QStringLiteral("www.")) && url.length() >= 7 &&
1235 url.indexOf(QLatin1Char('.'), 4) != -1)));
1236}
1237
1238#endif
1239
1240#ifdef MD4QT_ICU_STL_SUPPORT
1241
1242template<>
1243inline bool
1245{
1246 const UrlUri u(url);
1247
1248 return (u.isValid() && !u.isRelative());
1249}
1250
1251template<>
1252inline bool
1254{
1255 const UrlUri u(url);
1256
1257 return (u.isValid()
1258 && ((!u.scheme().isEmpty() && !u.host().isEmpty())
1259 || (url.startsWith(UnicodeString("www.")) && url.length() >= 7 &&
1260 url.indexOf(UnicodeChar('.'), 4) != -1)));
1261}
1262
1263#endif
1264
1265//! Process GitHub autolinks for the text with index \p idx.
1266template<class Trait>
1267inline long long int
1270 long long int idx)
1271{
1272 if (idx < 0 || idx >= (long long int)po.m_rawTextData.size()) {
1273 return idx;
1274 }
1275
1276 static const auto s_delims = Trait::latin1ToString("*_~()<>");
1277 auto s = po.m_rawTextData[idx];
1278 bool first = true;
1279 long long int j = 0;
1280 auto end = typename Trait::Char(0x00);
1281 bool skipSpace = true;
1282 long long int ret = idx;
1283
1284 while (s.m_str.length()) {
1285 long long int i = 0;
1286 end = typename Trait::Char(0x00);
1287
1288 for (; i < s.m_str.length(); ++i) {
1289 if (first) {
1290 if (s.m_str[i] == Trait::latin1ToChar('(')) {
1291 end = Trait::latin1ToChar(')');
1292 }
1293
1294 if (s_delims.indexOf(s.m_str[i]) == -1 && !s.m_str[i].isSpace()) {
1295 first = false;
1296 j = i;
1297 }
1298 } else {
1299 if (s.m_str[i].isSpace() || i == s.m_str.length() - 1 || s.m_str[i] == end) {
1300 auto tmp = s.m_str.sliced(j, i - j +
1301 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1302 1 : 0));
1303 skipSpace = s.m_str[i].isSpace();
1304
1305 const auto email = isEmail<Trait>(tmp);
1306
1307 if (isGitHubAutolink<Trait>(tmp) || email) {
1308 auto ti = textAtIdx(p, idx);
1309
1310 if (ti >= 0 && ti < static_cast<long long int>(p->items().size())) {
1311 typename ItemWithOpts<Trait>::Styles openStyles, closeStyles;
1312 const auto opts = std::static_pointer_cast<Text<Trait>>(p->items().at(ti))->opts();
1313
1314 if (j == 0 || s.m_str.sliced(0, j).isEmpty()) {
1315 openStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->openStyles();
1316 closeStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->closeStyles();
1317 p->removeItemAt(ti);
1318 po.m_rawTextData.erase(po.m_rawTextData.cbegin() + idx);
1319 --ret;
1320 } else {
1321 const auto tmp = s.m_str.sliced(0, j);
1322
1323 auto t = std::static_pointer_cast<Text<Trait>>(p->items().at(ti));
1324 t->setEndColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j - 1));
1325 closeStyles = t->closeStyles();
1326 t->closeStyles() = {};
1327 po.m_rawTextData[idx].m_str = tmp;
1328 ++idx;
1330 ++ti;
1331 }
1332
1333 std::shared_ptr<Link<Trait>> lnk(new Link<Trait>);
1334 lnk->setStartColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j));
1335 lnk->setStartLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1336 lnk->setEndColumn(
1337 po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + i -
1338 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1339 0 : 1)));
1340 lnk->setEndLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1341 lnk->openStyles() = openStyles;
1342 lnk->setTextPos({lnk->startColumn(), lnk->startLine(), lnk->endColumn(), lnk->endLine()});
1343 lnk->setUrlPos(lnk->textPos());
1344
1345 if (email && !tmp.toLower().startsWith(Trait::latin1ToString("mailto:"))) {
1346 tmp = Trait::latin1ToString("mailto:") + tmp;
1347 }
1348
1349 if (!email && tmp.toLower().startsWith(Trait::latin1ToString("www."))) {
1350 tmp = Trait::latin1ToString("http://") + tmp;
1351 }
1352
1353 lnk->setUrl(tmp);
1354 lnk->setOpts(opts);
1355 p->insertItem(ti, lnk);
1356
1357 s.m_pos += i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1);
1358 s.m_str.remove(0, i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1));
1359 j = 0;
1360 i = 0;
1361
1362 if (!s.m_str.isEmpty()) {
1363 po.m_rawTextData.insert(po.m_rawTextData.cbegin() + idx, s);
1364 ++ret;
1365
1366 auto t = std::make_shared<Text<Trait>>();
1367 t->setStartColumn(po.m_fr.m_data[s.m_line].first.virginPos(s.m_pos));
1368 t->setStartLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1369 t->setEndLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1370 t->setEndColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + s.m_str.length() - 1));
1372 t->closeStyles() = closeStyles;
1373 p->insertItem(ti + 1, t);
1374 } else {
1375 lnk->closeStyles() = closeStyles;
1376 }
1377
1378 break;
1379 }
1380 }
1381
1382 j = i + (skipSpace ? 1 : 0);
1383 }
1384 }
1385 }
1386
1387 first = true;
1388
1389 if (i == s.m_str.length()) {
1390 break;
1391 }
1392 }
1393
1394 return ret;
1395}
1396
1397//! GitHub autolinks plugin.
1398template<class Trait>
1399inline void
1402 const typename Trait::StringList &)
1403{
1404 if (!po.m_collectRefLinks) {
1405 long long int i = 0;
1406
1407 while (i >= 0 && i < (long long int)po.m_rawTextData.size()) {
1408 i = processGitHubAutolinkExtension(p, po, i);
1409
1410 ++i;
1411 }
1412 }
1413}
1414
1415//
1416// Parser
1417//
1418
1419//! Markdown parser.
1420template<class Trait>
1421class Parser final
1422{
1423public:
1428
1429 ~Parser() = default;
1430
1431 //! \return Parsed Markdown document.
1432 std::shared_ptr<Document<Trait>>
1433 parse(
1434 //! File name of the Markdown document.
1435 const typename Trait::String &fileName,
1436 //! Should parsing be recursive? If recursive all links to existing Markdown
1437 //! files will be parsed and presented in the returned document.
1438 bool recursive = true,
1439 //! Allowed extensions for Markdonw document files. If Markdown file doesn't
1440 //! have given extension it will be ignored.
1441 const typename Trait::StringList &ext = {Trait::latin1ToString("md"), Trait::latin1ToString("markdown")},
1442 //! Make full optimization, or just semi one. In full optimization
1443 //! text items with one style but with some closing delimiters
1444 //! in the middle will be concatenated in one, like in **text* text*,
1445 //! here in full optimization will be "text text" with 2 open/close
1446 //! style delimiters, but one closing delimiter is in the middle.
1447 bool fullyOptimizeParagraphs = true);
1448
1449 //! \return Parsed Markdown document.
1450 std::shared_ptr<Document<Trait>>
1451 parse(
1452 //! Stream to parse.
1453 typename Trait::TextStream &stream,
1454 //! Absolute path to the root folder for the document.
1455 //! This path will be used to resolve local links.
1456 const typename Trait::String &path,
1457 //! This argument needed only for anchor.
1458 const typename Trait::String &fileName,
1459 //! Make full optimization, or just semi one. In full optimization
1460 //! text items with one style but with some closing delimiters
1461 //! in the middle will be concatenated in one, like in **text* text*,
1462 //! here in full optimization will be "text text" with 2 open/close
1463 //! style delimiters, but one closing delimiter is in the middle.
1464 bool fullyOptimizeParagraphs = true);
1465
1466 //! Add text plugin.
1467 void
1469 //! ID of a plugin. Use TextPlugin::UserDefinedPluginID value for start ID.
1470 int id,
1471 //! Function of a plugin, that will be invoked to processs raw text.
1472 TextPluginFunc<Trait> plugin,
1473 //! Should this plugin be used in parsing of internals of links?
1474 bool processInLinks,
1475 //! User data that will be passed to plugin function.
1476 const typename Trait::StringList &userData)
1477 {
1478 m_textPlugins.insert({id, {plugin, processInLinks, userData}});
1479 }
1480
1481 //! Remove text plugin.
1482 void
1484 //! ID of plugin that should be removed.
1485 int id)
1486 {
1487 m_textPlugins.erase(id);
1488 }
1489
1490private:
1491 void
1492 parseFile(const typename Trait::String &fileName,
1493 bool recursive,
1494 std::shared_ptr<Document<Trait>> doc,
1495 const typename Trait::StringList &ext,
1496 typename Trait::StringList *parentLinks = nullptr);
1497
1498 void
1499 parseStream(typename Trait::TextStream &stream,
1500 const typename Trait::String &workingPath,
1501 const typename Trait::String &fileName,
1502 bool recursive,
1503 std::shared_ptr<Document<Trait>> doc,
1504 const typename Trait::StringList &ext,
1505 typename Trait::StringList *parentLinks = nullptr);
1506
1507 void
1508 clearCache();
1509
1510 enum class BlockType {
1511 Unknown,
1512 EmptyLine,
1513 Text,
1514 List,
1515 ListWithFirstEmptyLine,
1516 CodeIndentedBySpaces,
1517 Code,
1518 Blockquote,
1519 Heading,
1520 SomethingInList,
1521 FensedCodeInList,
1522 Footnote
1523 }; // enum BlockType
1524
1525 struct ListIndent {
1526 long long int m_level = -1;
1527 long long int m_indent = -1;
1528 }; // struct ListIndent
1529
1530 BlockType
1531 whatIsTheLine(typename Trait::InternalString &str,
1532 bool inList = false,
1533 bool inListWithFirstEmptyLine = false,
1534 bool fensedCodeInList = false,
1535 typename Trait::String *startOfCode = nullptr,
1536 ListIndent *indent = nullptr,
1537 bool emptyLinePreceded = false,
1538 bool calcIndent = false,
1539 const std::vector<long long int> *indents = nullptr);
1540
1541 long long int
1542 parseFragment(MdBlock<Trait> &fr,
1543 std::shared_ptr<Block<Trait>> parent,
1544 std::shared_ptr<Document<Trait>> doc,
1545 typename Trait::StringList &linksToParse,
1546 const typename Trait::String &workingPath,
1547 const typename Trait::String &fileName,
1548 bool collectRefLinks,
1549 RawHtmlBlock<Trait> &html);
1550
1551 void
1552 parseText(MdBlock<Trait> &fr,
1553 std::shared_ptr<Block<Trait>> parent,
1554 std::shared_ptr<Document<Trait>> doc,
1555 typename Trait::StringList &linksToParse,
1556 const typename Trait::String &workingPath,
1557 const typename Trait::String &fileName,
1558 bool collectRefLinks,
1559 RawHtmlBlock<Trait> &html);
1560
1561 void
1562 parseBlockquote(MdBlock<Trait> &fr,
1563 std::shared_ptr<Block<Trait>> parent,
1564 std::shared_ptr<Document<Trait>> doc,
1565 typename Trait::StringList &linksToParse,
1566 const typename Trait::String &workingPath,
1567 const typename Trait::String &fileName,
1568 bool collectRefLinks,
1569 RawHtmlBlock<Trait> &html);
1570
1571 long long int
1572 parseList(MdBlock<Trait> &fr,
1573 std::shared_ptr<Block<Trait>> parent,
1574 std::shared_ptr<Document<Trait>> doc,
1575 typename Trait::StringList &linksToParse,
1576 const typename Trait::String &workingPath,
1577 const typename Trait::String &fileName,
1578 bool collectRefLinks,
1579 RawHtmlBlock<Trait> &html);
1580
1581 void
1582 parseCode(MdBlock<Trait> &fr, std::shared_ptr<Block<Trait>> parent, bool collectRefLinks);
1583
1584 void
1585 parseCodeIndentedBySpaces(MdBlock<Trait> &fr,
1586 std::shared_ptr<Block<Trait>> parent,
1587 bool collectRefLinks,
1588 int indent,
1589 const typename Trait::String &syntax,
1590 long long int emptyColumn,
1591 long long int startLine,
1592 bool fensedCode,
1593 const WithPosition &startDelim = {},
1594 const WithPosition &endDelim = {},
1595 const WithPosition &syntaxPos = {});
1596
1597 long long int
1598 parseListItem(MdBlock<Trait> &fr,
1599 std::shared_ptr<Block<Trait>> parent,
1600 std::shared_ptr<Document<Trait>> doc,
1601 typename Trait::StringList &linksToParse,
1602 const typename Trait::String &workingPath,
1603 const typename Trait::String &fileName,
1604 bool collectRefLinks,
1605 RawHtmlBlock<Trait> &html,
1606 std::shared_ptr<ListItem<Trait>> *resItem = nullptr);
1607
1608 void
1609 parseHeading(MdBlock<Trait> &fr,
1610 std::shared_ptr<Block<Trait>> parent,
1611 std::shared_ptr<Document<Trait>> doc,
1612 typename Trait::StringList &linksToParse,
1613 const typename Trait::String &workingPath,
1614 const typename Trait::String &fileName,
1615 bool collectRefLinks);
1616
1617 void
1618 parseFootnote(MdBlock<Trait> &fr,
1619 std::shared_ptr<Block<Trait>> parent,
1620 std::shared_ptr<Document<Trait>> doc,
1621 typename Trait::StringList &linksToParse,
1622 const typename Trait::String &workingPath,
1623 const typename Trait::String &fileName,
1624 bool collectRefLinks);
1625
1626 void
1627 parseTable(MdBlock<Trait> &fr,
1628 std::shared_ptr<Block<Trait>> parent,
1629 std::shared_ptr<Document<Trait>> doc,
1630 typename Trait::StringList &linksToParse,
1631 const typename Trait::String &workingPath,
1632 const typename Trait::String &fileName,
1633 bool collectRefLinks,
1634 int columnsCount);
1635
1636 void
1637 parseParagraph(MdBlock<Trait> &fr,
1638 std::shared_ptr<Block<Trait>> parent,
1639 std::shared_ptr<Document<Trait>> doc,
1640 typename Trait::StringList &linksToParse,
1641 const typename Trait::String &workingPath,
1642 const typename Trait::String &fileName,
1643 bool collectRefLinks,
1644 RawHtmlBlock<Trait> &html);
1645
1646 void
1647 parseFormattedTextLinksImages(MdBlock<Trait> &fr,
1648 std::shared_ptr<Block<Trait>> parent,
1649 std::shared_ptr<Document<Trait>> doc,
1650 typename Trait::StringList &linksToParse,
1651 const typename Trait::String &workingPath,
1652 const typename Trait::String &fileName,
1653 bool collectRefLinks,
1654 bool ignoreLineBreak,
1655 RawHtmlBlock<Trait> &html,
1656 bool inLink);
1657
1658 RawHtmlBlock<Trait>
1659 parse(StringListStream<Trait> &stream,
1660 std::shared_ptr<Block<Trait>> parent,
1661 std::shared_ptr<Document<Trait>> doc,
1662 typename Trait::StringList &linksToParse,
1663 const typename Trait::String &workingPath,
1664 const typename Trait::String &fileName,
1665 bool collectRefLinks,
1666 bool top = false,
1667 bool dontProcessLastFreeHtml = false);
1668
1669 struct ParserContext {
1670 typename Trait::template Vector<MdBlock<Trait>> m_splitted;
1671 typename MdBlock<Trait>::Data m_fragment;
1672 bool m_emptyLineInList = false;
1673 bool m_fensedCodeInList = false;
1674 long long int m_emptyLinesCount = 0;
1675 long long int m_lineCounter = 0;
1676 std::vector<long long int> m_indents;
1677 ListIndent m_indent;
1678 RawHtmlBlock<Trait> m_html;
1679 long long int m_emptyLinesBefore = 0;
1680 MdLineData::CommentDataMap m_htmlCommentData;
1681 typename Trait::String m_startOfCode;
1682 typename Trait::String m_startOfCodeInList;
1683 BlockType m_type = BlockType::EmptyLine;
1684 BlockType m_lineType = BlockType::Unknown;
1685 BlockType m_prevLineType = BlockType::Unknown;
1686 }; // struct ParserContext
1687
1688 void
1689 parseFragment(ParserContext &ctx,
1690 std::shared_ptr<Block<Trait>> parent,
1691 std::shared_ptr<Document<Trait>> doc,
1692 typename Trait::StringList &linksToParse,
1693 const typename Trait::String &workingPath,
1694 const typename Trait::String &fileName,
1695 bool collectRefLinks);
1696
1697 void
1698 eatFootnote(ParserContext &ctx,
1699 StringListStream<Trait> &stream,
1700 std::shared_ptr<Block<Trait>> parent,
1701 std::shared_ptr<Document<Trait>> doc,
1702 typename Trait::StringList &linksToParse,
1703 const typename Trait::String &workingPath,
1704 const typename Trait::String &fileName,
1705 bool collectRefLinks);
1706
1707 void
1708 finishHtml(ParserContext &ctx,
1709 std::shared_ptr<Block<Trait>> parent,
1710 std::shared_ptr<Document<Trait>> doc,
1711 bool collectRefLinks,
1712 bool top,
1713 bool dontProcessLastFreeHtml);
1714
1715 void
1716 makeLineMain(ParserContext &ctx,
1717 const typename Trait::InternalString &line,
1718 long long int emptyLinesCount,
1719 const ListIndent &currentIndent,
1720 long long int ns,
1721 long long int currentLineNumber);
1722
1723 void
1724 parseFragmentAndMakeNextLineMain(ParserContext &ctx,
1725 std::shared_ptr<Block<Trait>> parent,
1726 std::shared_ptr<Document<Trait>> doc,
1727 typename Trait::StringList &linksToParse,
1728 const typename Trait::String &workingPath,
1729 const typename Trait::String &fileName,
1730 bool collectRefLinks,
1731 const typename Trait::InternalString &line,
1732 const ListIndent &currentIndent,
1733 long long int ns,
1734 long long int currentLineNumber);
1735
1736 bool
1737 isListType(BlockType t);
1738
1739 typename Trait::InternalString
1740 readLine(ParserContext &ctx, StringListStream<Trait> &stream);
1741
1742 std::shared_ptr<Image<Trait>>
1743 makeImage(const typename Trait::String &url,
1744 const typename MdBlock<Trait>::Data &text,
1745 TextParsingOpts<Trait> &po,
1746 bool doNotCreateTextOnFail,
1747 long long int startLine,
1748 long long int startPos,
1749 long long int lastLine,
1750 long long int lastPos,
1751 const WithPosition &textPos,
1752 const WithPosition &urlPos);
1753
1754 std::shared_ptr<Link<Trait>>
1755 makeLink(const typename Trait::String &url,
1756 const typename MdBlock<Trait>::Data &text,
1757 TextParsingOpts<Trait> &po,
1758 bool doNotCreateTextOnFail,
1759 long long int startLine,
1760 long long int startPos,
1761 long long int lastLine,
1762 long long int lastPos,
1763 const WithPosition &textPos,
1764 const WithPosition &urlPos);
1765
1766 struct Delimiter {
1767 enum DelimiterType {
1768 // (
1769 ParenthesesOpen,
1770 // )
1771 ParenthesesClose,
1772 // [
1773 SquareBracketsOpen,
1774 // ]
1775 SquareBracketsClose,
1776 // ![
1777 ImageOpen,
1778 // ~~
1779 Strikethrough,
1780 // *
1781 Emphasis1,
1782 // _
1783 Emphasis2,
1784 // `
1785 InlineCode,
1786 // <
1787 Less,
1788 // >
1789 Greater,
1790 // $
1791 Math,
1792 HorizontalLine,
1793 H1,
1794 H2,
1795 Unknown
1796 }; // enum DelimiterType
1797
1798 DelimiterType m_type = Unknown;
1799 long long int m_line = -1;
1800 long long int m_pos = -1;
1801 long long int m_len = 0;
1802 bool m_isWordBefore = false;
1803 bool m_backslashed = false;
1804 bool m_leftFlanking = false;
1805 bool m_rightFlanking = false;
1806 }; // struct Delimiter
1807
1808 using Delims = typename Trait::template Vector<Delimiter>;
1809
1810 bool
1811 createShortcutImage(const typename MdBlock<Trait>::Data &text,
1812 TextParsingOpts<Trait> &po,
1813 long long int startLine,
1814 long long int startPos,
1815 long long int lastLineForText,
1816 long long int lastPosForText,
1817 typename Delims::const_iterator lastIt,
1818 const typename MdBlock<Trait>::Data &linkText,
1819 bool doNotCreateTextOnFail,
1820 const WithPosition &textPos,
1821 const WithPosition &linkTextPos);
1822
1823 typename Delims::const_iterator
1824 checkForImage(typename Delims::const_iterator it,
1825 typename Delims::const_iterator last,
1826 TextParsingOpts<Trait> &po);
1827
1828 bool
1829 createShortcutLink(const typename MdBlock<Trait>::Data &text,
1830 TextParsingOpts<Trait> &po,
1831 long long int startLine,
1832 long long int startPos,
1833 long long int lastLineForText,
1834 long long int lastPosForText,
1835 typename Delims::const_iterator lastIt,
1836 const typename MdBlock<Trait>::Data &linkText,
1837 bool doNotCreateTextOnFail,
1838 const WithPosition &textPos,
1839 const WithPosition &linkTextPos);
1840
1841 typename Delims::const_iterator
1842 checkForLink(typename Delims::const_iterator it,
1843 typename Delims::const_iterator last,
1844 TextParsingOpts<Trait> &po);
1845
1846 Delims
1847 collectDelimiters(const typename MdBlock<Trait>::Data &fr);
1848
1849 std::pair<typename Trait::String, bool>
1850 readHtmlTag(typename Delims::const_iterator it, TextParsingOpts<Trait> &po);
1851
1852 typename Delims::const_iterator
1853 findIt(typename Delims::const_iterator it,
1854 typename Delims::const_iterator last,
1855 TextParsingOpts<Trait> &po);
1856
1857 void
1858 finishRule1HtmlTag(typename Delims::const_iterator it,
1859 typename Delims::const_iterator last,
1860 TextParsingOpts<Trait> &po,
1861 bool skipFirst);
1862
1863 void
1864 finishRule2HtmlTag(typename Delims::const_iterator it,
1865 typename Delims::const_iterator last,
1866 TextParsingOpts<Trait> &po);
1867
1868 void
1869 finishRule3HtmlTag(typename Delims::const_iterator it,
1870 typename Delims::const_iterator last,
1871 TextParsingOpts<Trait> &po);
1872
1873 void
1874 finishRule4HtmlTag(typename Delims::const_iterator it,
1875 typename Delims::const_iterator last,
1876 TextParsingOpts<Trait> &po);
1877
1878 void
1879 finishRule5HtmlTag(typename Delims::const_iterator it,
1880 typename Delims::const_iterator last,
1881 TextParsingOpts<Trait> &po);
1882
1883 void
1884 finishRule6HtmlTag(typename Delims::const_iterator it,
1885 typename Delims::const_iterator last,
1886 TextParsingOpts<Trait> &po);
1887
1888 typename Parser<Trait>::Delims::const_iterator
1889 finishRule7HtmlTag(typename Delims::const_iterator it,
1890 typename Delims::const_iterator last,
1891 TextParsingOpts<Trait> &po);
1892
1893 typename Delims::const_iterator
1894 finishRawHtmlTag(typename Delims::const_iterator it,
1895 typename Delims::const_iterator last,
1896 TextParsingOpts<Trait> &po,
1897 bool skipFirst);
1898
1899 int
1900 htmlTagRule(typename Delims::const_iterator it,
1901 typename Delims::const_iterator last,
1902 TextParsingOpts<Trait> &po);
1903
1904 typename Delims::const_iterator
1905 checkForRawHtml(typename Delims::const_iterator it,
1906 typename Delims::const_iterator last,
1907 TextParsingOpts<Trait> &po);
1908
1909 typename Delims::const_iterator
1910 checkForMath(typename Delims::const_iterator it,
1911 typename Delims::const_iterator last,
1912 TextParsingOpts<Trait> &po);
1913
1914 typename Delims::const_iterator
1915 checkForAutolinkHtml(typename Delims::const_iterator it,
1916 typename Delims::const_iterator last,
1917 TextParsingOpts<Trait> &po,
1918 bool updatePos);
1919
1920 typename Delims::const_iterator
1921 checkForInlineCode(typename Delims::const_iterator it,
1922 typename Delims::const_iterator last,
1923 TextParsingOpts<Trait> &po);
1924
1925 std::pair<typename MdBlock<Trait>::Data, typename Delims::const_iterator>
1926 readTextBetweenSquareBrackets(typename Delims::const_iterator start,
1927 typename Delims::const_iterator it,
1928 typename Delims::const_iterator last,
1929 TextParsingOpts<Trait> &po,
1930 bool doNotCreateTextOnFail,
1931 WithPosition *pos = nullptr);
1932
1933 std::pair<typename MdBlock<Trait>::Data, typename Delims::const_iterator>
1934 checkForLinkText(typename Delims::const_iterator it,
1935 typename Delims::const_iterator last,
1936 TextParsingOpts<Trait> &po,
1937 WithPosition *pos = nullptr);
1938
1939 std::pair<typename MdBlock<Trait>::Data, typename Delims::const_iterator>
1940 checkForLinkLabel(typename Delims::const_iterator it,
1941 typename Delims::const_iterator last,
1942 TextParsingOpts<Trait> &po,
1943 WithPosition *pos = nullptr);
1944
1945 std::tuple<typename Trait::String, typename Trait::String, typename Delims::const_iterator, bool>
1946 checkForInlineLink(typename Delims::const_iterator it,
1947 typename Delims::const_iterator last,
1948 TextParsingOpts<Trait> &po,
1949 WithPosition *urlPos = nullptr);
1950
1951 inline std::tuple<typename Trait::String, typename Trait::String, typename Delims::const_iterator, bool>
1952 checkForRefLink(typename Delims::const_iterator it,
1953 typename Delims::const_iterator last,
1954 TextParsingOpts<Trait> &po,
1955 WithPosition *urlPos = nullptr);
1956
1957 typename Trait::String
1958 toSingleLine(const typename MdBlock<Trait>::Data &d);
1959
1960 template<class Func>
1961 typename Delims::const_iterator
1962 checkShortcut(typename Delims::const_iterator it,
1963 typename Delims::const_iterator last,
1964 TextParsingOpts<Trait> &po,
1965 Func functor)
1966 {
1967 const auto start = it;
1968
1969 typename MdBlock<Trait>::Data text;
1970
1971 WithPosition labelPos;
1972 std::tie(text, it) = checkForLinkLabel(start, last, po, &labelPos);
1973
1974 if (it != start && !toSingleLine(text).simplified().isEmpty()) {
1975 if ((this->*functor)(text, po, start->m_line, start->m_pos, start->m_line,
1976 start->m_pos + start->m_len, it, {}, false, labelPos, {})) {
1977 return it;
1978 }
1979 }
1980
1981 return start;
1982 }
1983
1984 void
1985 createStyles(std::vector<std::pair<Style, long long int>> &s,
1986 long long int l,
1987 typename Delimiter::DelimiterType t,
1988 long long int &count);
1989
1990 bool
1991 isSequence(typename Delims::const_iterator it,
1992 long long int itLine,
1993 long long int itPos,
1994 typename Delimiter::DelimiterType t);
1995
1996 typename Delims::const_iterator
1997 readSequence(typename Delims::const_iterator it,
1998 typename Delims::const_iterator last,
1999 long long int &line,
2000 long long int &pos,
2001 long long int &len,
2002 typename Delims::const_iterator &current);
2003
2004 int
2005 emphasisToInt(typename Delimiter::DelimiterType t);
2006
2007 std::pair<bool, size_t>
2008 checkEmphasisSequence(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s,
2009 size_t idx);
2010
2011 std::vector<std::pair<std::pair<long long int, bool>, int>>
2012 fixSequence(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s);
2013
2014 std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>>
2015 closedSequences(const std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>> &vars,
2016 size_t idx);
2017
2018 std::vector<std::pair<Style, long long int>>
2019 createStyles(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s,
2020 size_t i,
2021 typename Delimiter::DelimiterType t,
2022 long long int &count);
2023
2024 std::tuple<bool, std::vector<std::pair<Style, long long int>>, long long int, long long int>
2025 isStyleClosed(typename Delims::const_iterator it,
2026 typename Delims::const_iterator last,
2027 TextParsingOpts<Trait> &po);
2028
2029 typename Delims::const_iterator
2030 incrementIterator(typename Delims::const_iterator it,
2031 typename Delims::const_iterator last,
2032 long long int count);
2033
2034 typename Delims::const_iterator
2035 checkForStyle(typename Delims::const_iterator first,
2036 typename Delims::const_iterator it,
2037 typename Delims::const_iterator last,
2038 TextParsingOpts<Trait> &po);
2039
2040 bool
2041 isListOrQuoteAfterHtml(TextParsingOpts<Trait> &po);
2042
2043 void
2044 parseTableInParagraph(TextParsingOpts<Trait> &po,
2045 std::shared_ptr<Paragraph<Trait>> parent,
2046 std::shared_ptr<Document<Trait>> doc,
2047 typename Trait::StringList &linksToParse,
2048 const typename Trait::String &workingPath,
2049 const typename Trait::String &fileName,
2050 bool collectRefLinks);
2051
2052 bool
2053 isNewBlockIn(MdBlock<Trait> &fr,
2054 long long int startLine,
2055 long long int endLine);
2056
2057 void
2058 makeInlineCode(long long int startLine,
2059 long long int startPos,
2060 long long int lastLine,
2061 long long int lastPos,
2062 TextParsingOpts<Trait> &po,
2063 typename Delims::const_iterator startDelimIt,
2064 typename Delims::const_iterator endDelimIt);
2065
2067 defaultParagraphOptimization() const
2068 {
2069 return (m_fullyOptimizeParagraphs ? OptimizeParagraphType::Full :
2071 }
2072
2073private:
2074 //! Used in tests.
2075 friend struct PrivateAccess;
2076
2077private:
2078 typename Trait::StringList m_parsedFiles;
2079 TextPluginsMap<Trait> m_textPlugins;
2080 bool m_fullyOptimizeParagraphs = true;
2081
2083}; // class Parser
2084
2085//
2086// Parser
2087//
2088
2089template<class Trait>
2090inline std::shared_ptr<Document<Trait>>
2091Parser<Trait>::parse(const typename Trait::String &fileName,
2092 bool recursive,
2093 const typename Trait::StringList &ext,
2094 bool fullyOptimizeParagraphs)
2095{
2096 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2097
2098 std::shared_ptr<Document<Trait>> doc(new Document<Trait>);
2099
2100 parseFile(fileName, recursive, doc, ext);
2101
2102 clearCache();
2103
2104 return doc;
2105}
2106
2107template<class Trait>
2108inline std::shared_ptr<Document<Trait>>
2109Parser<Trait>::parse(typename Trait::TextStream &stream,
2110 const typename Trait::String &path,
2111 const typename Trait::String &fileName,
2112 bool fullyOptimizeParagraphs)
2113{
2114 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2115
2116 std::shared_ptr<Document<Trait>> doc(new Document<Trait>);
2117
2118 parseStream(stream, path, fileName, false, doc, typename Trait::StringList());
2119
2120 clearCache();
2121
2122 return doc;
2123}
2124
2125template<class Trait>
2127
2128#ifdef MD4QT_QT_SUPPORT
2129
2130//! Wrapper for QTextStream.
2131template<>
2133{
2134public:
2136 : m_stream(stream)
2137 , m_lastBuf(false)
2138 , m_pos(0)
2139 {
2140 }
2141
2142 bool
2143 atEnd() const
2144 {
2145 return (m_lastBuf && m_pos == m_buf.size());
2146 }
2147
2148 QString
2150 {
2151 QString line;
2152 bool rFound = false;
2153
2154 while (!atEnd()) {
2155 const auto c = getChar();
2156
2157 if (rFound && c != QLatin1Char('\n')) {
2158 --m_pos;
2159
2160 return line;
2161 }
2162
2163 if (c == QLatin1Char('\r')) {
2164 rFound = true;
2165
2166 continue;
2167 } else if (c == QLatin1Char('\n')) {
2168 return line;
2169 }
2170
2171 if (!c.isNull()) {
2172 line.push_back(c);
2173 }
2174 }
2175
2176 return line;
2177 }
2178
2179private:
2180 void
2181 fillBuf()
2182 {
2183 m_buf = m_stream.read(512);
2184
2185 if (m_stream.atEnd()) {
2186 m_lastBuf = true;
2187 }
2188
2189 m_pos = 0;
2190 }
2191
2192 QChar
2193 getChar()
2194 {
2195 if (m_pos < m_buf.size()) {
2196 return m_buf.at(m_pos++);
2197 } else if (!atEnd()) {
2198 fillBuf();
2199
2200 return getChar();
2201 } else {
2202 return QChar();
2203 }
2204 }
2205
2206private:
2207 QTextStream &m_stream;
2208 QString m_buf;
2209 bool m_lastBuf;
2210 long long int m_pos;
2211}; // class TextStream
2212
2213#endif
2214
2215#ifdef MD4QT_ICU_STL_SUPPORT
2216
2217//! Wrapper for std::istream.
2218template<>
2220{
2221public:
2222 TextStream(std::istream &stream)
2223 : m_pos(0)
2224 {
2225 std::vector<unsigned char> content;
2226
2227 stream.seekg(0, std::ios::end);
2228 const auto ssize = stream.tellg();
2229 content.resize((size_t)ssize + 1);
2230 stream.seekg(0, std::ios::beg);
2231 stream.read((char *)&content[0], ssize);
2232 content[(size_t)ssize] = 0;
2233
2234 const auto z = std::count(content.cbegin(), content.cend(), 0);
2235
2236 if (z > 1) {
2237 std::vector<unsigned char> tmp;
2238 tmp.resize(content.size() + (z - 1) * 2);
2239
2240 for (size_t i = 0, j = 0; i < content.size() - 1; ++i, ++j) {
2241 if (content[i] == 0) {
2242 // 0xFFFD - replacement character in UTF-8.
2243 tmp[j++] = 0xEF;
2244 tmp[j++] = 0xBF;
2245 tmp[j] = 0xBD;
2246 } else {
2247 tmp[j] = content[i];
2248 }
2249 }
2250
2251 tmp[tmp.size() - 1] = 0;
2252
2253 std::swap(content, tmp);
2254 }
2255
2256 m_str = UnicodeString::fromUTF8((char *)&content[0]);
2257 }
2258
2259 bool
2260 atEnd() const
2261 {
2262 return m_pos == m_str.size();
2263 }
2264
2267 {
2268 UnicodeString line;
2269
2270 bool rFound = false;
2271
2272 while (!atEnd()) {
2273 const auto c = getChar();
2274
2275 if (rFound && c != UnicodeChar('\n')) {
2276 --m_pos;
2277
2278 return line;
2279 }
2280
2281 if (c == UnicodeChar('\r')) {
2282 rFound = true;
2283
2284 continue;
2285 } else if (c == UnicodeChar('\n')) {
2286 return line;
2287 }
2288
2289 if (!c.isNull()) {
2290 line.push_back(c);
2291 }
2292 }
2293
2294 return line;
2295 }
2296
2297private:
2299 getChar()
2300 {
2301 if (!atEnd()) {
2302 return m_str[m_pos++];
2303 } else {
2304 return UnicodeChar();
2305 }
2306 }
2307
2308private:
2309 UnicodeString m_str;
2310 long long int m_pos;
2311};
2312
2313#endif
2314
2315//! \return Is HTML comment closed?
2316template<class Trait>
2317inline bool
2318checkForEndHtmlComments(const typename Trait::String &line,
2319 long long int pos)
2320{
2321 const long long int e = line.indexOf(Trait::latin1ToString("-->"), pos);
2322
2323 if (e != -1) {
2324 return isHtmlComment<Trait>(line.sliced(0, e + 3));
2325 }
2326
2327 return false;
2328}
2329
2330//! Collect information about HTML comments.
2331template<class Trait>
2332inline void
2333checkForHtmlComments(const typename Trait::InternalString &line,
2336{
2337 long long int p = 0, l = stream.currentLineNumber();
2338
2339 const auto &str = line.asString();
2340
2341 while ((p = str.indexOf(Trait::latin1ToString(s_startComment), p)) != -1) {
2342 bool addNegative = false;
2343
2344 auto c = str.sliced(p);
2345
2346 if (c.startsWith(Trait::latin1ToString("<!-->"))) {
2347 res.insert({line.virginPos(p), {0, true}});
2348
2349 p += 5;
2350
2351 continue;
2352 } else if (c.startsWith(Trait::latin1ToString("<!--->"))) {
2353 res.insert({line.virginPos(p), {1, true}});
2354
2355 p += 6;
2356
2357 continue;
2358 }
2359
2361 res.insert({line.virginPos(p), {2, true}});
2362 } else {
2363 addNegative = true;
2364
2365 for (; l < stream.size(); ++l) {
2366 c.push_back(Trait::latin1ToChar(' '));
2367 c.push_back(stream.lineAt(l).asString());
2368
2370 res.insert({line.virginPos(p), {2, true}});
2371
2372 addNegative = false;
2373
2374 break;
2375 }
2376 }
2377 }
2378
2379 if (addNegative) {
2380 res.insert({line.virginPos(p), {-1, false}});
2381 }
2382
2383 ++p;
2384 }
2385}
2386
2387template<class Trait>
2388inline void
2389Parser<Trait>::parseFragment(typename Parser<Trait>::ParserContext &ctx,
2390 std::shared_ptr<Block<Trait>> parent,
2391 std::shared_ptr<Document<Trait>> doc,
2392 typename Trait::StringList &linksToParse,
2393 const typename Trait::String &workingPath,
2394 const typename Trait::String &fileName,
2395 bool collectRefLinks)
2396{
2397 if (!ctx.m_fragment.empty()) {
2398 MdBlock<Trait> block = {ctx.m_fragment, ctx.m_emptyLinesBefore, ctx.m_emptyLinesCount > 0};
2399
2400 ctx.m_emptyLinesBefore = ctx.m_emptyLinesCount;
2401
2402 ctx.m_splitted.push_back(block);
2403
2404 long long int line = 0;
2405
2406 while (line >= 0) {
2407 line = parseFragment(block, parent, doc, linksToParse, workingPath,
2408 fileName, collectRefLinks, ctx.m_html);
2409
2410 assert(line != 0);
2411
2412 if (line > 0) {
2413 if (ctx.m_html.m_html) {
2414 if (!collectRefLinks) {
2415 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2416 }
2417
2418 resetHtmlTag<Trait>(ctx.m_html);
2419 }
2420
2421 const auto it = std::find_if(ctx.m_fragment.cbegin(), ctx.m_fragment.cend(), [line](const auto &d) {
2422 return (d.second.m_lineNumber == line);
2423 });
2424
2425 block.m_data.clear();
2426 std::copy(it, ctx.m_fragment.cend(), std::back_inserter(block.m_data));
2427 block.m_emptyLinesBefore = 0;
2428 }
2429 }
2430
2431 ctx.m_fragment.clear();
2432 }
2433
2434 ctx.m_type = BlockType::EmptyLine;
2435 ctx.m_emptyLineInList = false;
2436 ctx.m_fensedCodeInList = false;
2437 ctx.m_emptyLinesCount = 0;
2438 ctx.m_lineCounter = 0;
2439 ctx.m_indents.clear();
2440 ctx.m_indent = {-1, -1};
2441 ctx.m_startOfCode.clear();
2442 ctx.m_startOfCodeInList.clear();
2443}
2444
2445//! Replace tabs with spaces (just for internal simpler use).
2446template<class Trait>
2447inline void
2448replaceTabs(typename Trait::InternalString &s)
2449{
2450 unsigned char size = 4;
2451 long long int len = s.length();
2452
2453 for (long long int i = 0; i < len; ++i, --size) {
2454 if (s[i] == Trait::latin1ToChar('\t')) {
2455 s.replaceOne(i, 1, typename Trait::String(size, Trait::latin1ToChar(' ')));
2456
2457 len += size - 1;
2458 i += size - 1;
2459 size = 5;
2460 }
2461
2462 if (size == 1) {
2463 size = 5;
2464 }
2465 }
2466}
2467
2468template<class Trait>
2469inline void
2470Parser<Trait>::eatFootnote(typename Parser<Trait>::ParserContext &ctx,
2471 StringListStream<Trait> &stream,
2472 std::shared_ptr<Block<Trait>> parent,
2473 std::shared_ptr<Document<Trait>> doc,
2474 typename Trait::StringList &linksToParse,
2475 const typename Trait::String &workingPath,
2476 const typename Trait::String &fileName,
2477 bool collectRefLinks)
2478{
2479 long long int emptyLinesCount = 0;
2480 bool wasEmptyLine = false;
2481
2482 while (!stream.atEnd()) {
2483 const auto currentLineNumber = stream.currentLineNumber();
2484
2485 auto line = readLine(ctx, stream);
2486
2487 replaceTabs<Trait>(line);
2488
2489 const auto ns = skipSpaces<Trait>(0, line.asString());
2490
2491 if (ns == line.length() || line.asString().startsWith(Trait::latin1ToString(" "))) {
2492 if (ns == line.length()) {
2493 ++emptyLinesCount;
2494 wasEmptyLine = true;
2495 } else {
2496 emptyLinesCount = 0;
2497 }
2498
2499 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2500 } else if (!wasEmptyLine) {
2501 if (isFootnote<Trait>(line.sliced(ns).asString())) {
2502 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2503
2504 ctx.m_lineType = BlockType::Footnote;
2505
2506 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2507
2508 continue;
2509 } else {
2510 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2511 }
2512 } else {
2513 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2514
2515 ctx.m_lineType =
2516 whatIsTheLine(line, false, false, false, &ctx.m_startOfCodeInList, &ctx.m_indent,
2517 ctx.m_lineType == BlockType::EmptyLine, true, &ctx.m_indents);
2518
2519 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2520
2521 if (ctx.m_type == BlockType::Footnote) {
2522 wasEmptyLine = false;
2523
2524 continue;
2525 } else {
2526 break;
2527 }
2528 }
2529 }
2530
2531 if (stream.atEnd() && !ctx.m_fragment.empty()) {
2532 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2533 }
2534}
2535
2536template<class Trait>
2537inline void
2538Parser<Trait>::finishHtml(ParserContext &ctx,
2539 std::shared_ptr<Block<Trait>> parent,
2540 std::shared_ptr<Document<Trait>> doc,
2541 bool collectRefLinks,
2542 bool top,
2543 bool dontProcessLastFreeHtml)
2544{
2545 if (!collectRefLinks || top) {
2546 if (ctx.m_html.m_html->isFreeTag()) {
2547 if (!dontProcessLastFreeHtml) {
2548 if (ctx.m_html.m_parent) {
2549 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2550
2551 updateLastPosInList(ctx.m_html);
2552 } else {
2553 parent->appendItem(ctx.m_html.m_html);
2554 }
2555 }
2556 } else {
2557 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
2558 p->appendItem(ctx.m_html.m_html);
2559 p->setStartColumn(ctx.m_html.m_html->startColumn());
2560 p->setStartLine(ctx.m_html.m_html->startLine());
2561 p->setEndColumn(ctx.m_html.m_html->endColumn());
2562 p->setEndLine(ctx.m_html.m_html->endLine());
2563 doc->appendItem(p);
2564 }
2565 }
2566
2567 if (!dontProcessLastFreeHtml) {
2568 resetHtmlTag(ctx.m_html);
2569 }
2570
2571 ctx.m_html.m_toAdjustLastPos.clear();
2572}
2573
2574template<class Trait>
2575inline void
2576Parser<Trait>::makeLineMain(ParserContext &ctx,
2577 const typename Trait::InternalString &line,
2578 long long int emptyLinesCount,
2579 const ListIndent &currentIndent,
2580 long long int ns,
2581 long long int currentLineNumber)
2582{
2583 if (ctx.m_html.m_htmlBlockType >= 6) {
2584 ctx.m_html.m_continueHtml = (emptyLinesCount <= 0);
2585 }
2586
2587 ctx.m_type = ctx.m_lineType;
2588
2589 switch (ctx.m_type) {
2590 case BlockType::List:
2591 case BlockType::ListWithFirstEmptyLine: {
2592 if (ctx.m_indents.empty())
2593 ctx.m_indents.push_back(currentIndent.m_indent);
2594
2595 ctx.m_indent = currentIndent;
2596 } break;
2597
2598 case BlockType::Code:
2599 ctx.m_startOfCode = startSequence<Trait>(line.asString());
2600 break;
2601
2602 default:
2603 break;
2604 }
2605
2606 if (!line.isEmpty() && ns < line.length()) {
2607 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2608 }
2609
2610 ctx.m_lineCounter = 1;
2611 ctx.m_emptyLinesCount = 0;
2612}
2613
2614template<class Trait>
2615inline void
2616Parser<Trait>::parseFragmentAndMakeNextLineMain(ParserContext &ctx,
2617 std::shared_ptr<Block<Trait>> parent,
2618 std::shared_ptr<Document<Trait>> doc,
2619 typename Trait::StringList &linksToParse,
2620 const typename Trait::String &workingPath,
2621 const typename Trait::String &fileName,
2622 bool collectRefLinks,
2623 const typename Trait::InternalString &line,
2624 const ListIndent &currentIndent,
2625 long long int ns,
2626 long long int currentLineNumber)
2627{
2628 const auto empty = ctx.m_emptyLinesCount;
2629
2630 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2631
2632 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2633}
2634
2635template<class Trait>
2636inline bool
2637Parser<Trait>::isListType(BlockType t)
2638{
2639 switch (t) {
2640 case BlockType::List:
2641 case BlockType::ListWithFirstEmptyLine:
2642 return true;
2643
2644 default:
2645 return false;
2646 }
2647}
2648
2649template<class Trait>
2650typename Trait::InternalString
2651Parser<Trait>::readLine(typename Parser<Trait>::ParserContext &ctx,
2652 StringListStream<Trait> &stream)
2653{
2654 ctx.m_htmlCommentData.clear();
2655
2656 auto line = stream.readLine();
2657
2658 static const char16_t c_zeroReplaceWith[2] = {0xFFFD, 0};
2659
2660 line.replace(typename Trait::Char(0), Trait::utf16ToString(&c_zeroReplaceWith[0]));
2661
2662 checkForHtmlComments(line, stream, ctx.m_htmlCommentData);
2663
2664 return line;
2665}
2666
2667template<class Trait>
2668inline RawHtmlBlock<Trait>
2669Parser<Trait>::parse(StringListStream<Trait> &stream,
2670 std::shared_ptr<Block<Trait>> parent,
2671 std::shared_ptr<Document<Trait>> doc,
2672 typename Trait::StringList &linksToParse,
2673 const typename Trait::String &workingPath,
2674 const typename Trait::String &fileName,
2675 bool collectRefLinks,
2676 bool top,
2677 bool dontProcessLastFreeHtml)
2678{
2679 ParserContext ctx;
2680
2681 while (!stream.atEnd()) {
2682 const auto currentLineNumber = stream.currentLineNumber();
2683
2684 auto line = readLine(ctx, stream);
2685
2686 if (ctx.m_lineType != BlockType::Unknown) {
2687 ctx.m_prevLineType = ctx.m_lineType;
2688 }
2689
2690 ctx.m_lineType = whatIsTheLine(line,
2691 (ctx.m_emptyLineInList || isListType(ctx.m_type)),
2692 ctx.m_prevLineType == BlockType::ListWithFirstEmptyLine,
2693 ctx.m_fensedCodeInList,
2694 &ctx.m_startOfCodeInList,
2695 &ctx.m_indent,
2696 ctx.m_lineType == BlockType::EmptyLine,
2697 true,
2698 &ctx.m_indents);
2699
2700 if (isListType(ctx.m_type) && ctx.m_lineType == BlockType::FensedCodeInList) {
2701 ctx.m_fensedCodeInList = !ctx.m_fensedCodeInList;
2702 }
2703
2704 const auto currentIndent = ctx.m_indent;
2705
2706 const auto ns = skipSpaces<Trait>(0, line.asString());
2707
2708 const auto indentInListValue = indentInList(&ctx.m_indents, ns, true);
2709
2710 if (isListType(ctx.m_lineType) && !ctx.m_fensedCodeInList && ctx.m_indent.m_level > -1) {
2711 if (ctx.m_indent.m_level < (long long int)ctx.m_indents.size()) {
2712 ctx.m_indents.erase(ctx.m_indents.cbegin() + ctx.m_indent.m_level, ctx.m_indents.cend());
2713 }
2714
2715 ctx.m_indents.push_back(ctx.m_indent.m_indent);
2716 }
2717
2718 if (ctx.m_type == BlockType::CodeIndentedBySpaces && ns > 3) {
2719 ctx.m_lineType = BlockType::CodeIndentedBySpaces;
2720 }
2721
2722 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2 &&
2723 !isListType(ctx.m_lineType)) {
2724 if (ctx.m_emptyLinesCount > 0) {
2725 parseFragmentAndMakeNextLineMain(ctx,
2726 parent,
2727 doc,
2728 linksToParse,
2729 workingPath,
2730 fileName,
2731 collectRefLinks,
2732 line,
2733 currentIndent,
2734 ns,
2735 currentLineNumber);
2736
2737 continue;
2738 } else {
2739 ctx.m_emptyLineInList = false;
2740 ctx.m_emptyLinesCount = 0;
2741 }
2742 }
2743
2744 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2) {
2745 ctx.m_type = BlockType::List;
2746 }
2747
2748 // Footnote.
2749 if (ctx.m_lineType == BlockType::Footnote) {
2750 parseFragmentAndMakeNextLineMain(ctx,
2751 parent,
2752 doc,
2753 linksToParse,
2754 workingPath,
2755 fileName,
2756 collectRefLinks,
2757 line,
2758 currentIndent,
2759 ns,
2760 currentLineNumber);
2761
2762 eatFootnote(ctx, stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2763
2764 continue;
2765 }
2766
2767 // First line of the fragment.
2768 if (ns != line.length() && ctx.m_type == BlockType::EmptyLine) {
2769 makeLineMain(ctx, line, ctx.m_emptyLinesCount, currentIndent, ns, currentLineNumber);
2770
2771 continue;
2772 } else if (ns == line.length() && ctx.m_type == BlockType::EmptyLine) {
2773 continue;
2774 }
2775
2776 ++ctx.m_lineCounter;
2777
2778 // Got new empty line.
2779 if (ns == line.length()) {
2780 ++ctx.m_emptyLinesCount;
2781
2782 switch (ctx.m_type) {
2783 case BlockType::Blockquote: {
2784 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2785
2786 continue;
2787 }
2788
2789 case BlockType::Text:
2790 case BlockType::CodeIndentedBySpaces:
2791 continue;
2792 break;
2793
2794 case BlockType::Code: {
2795 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2796 ctx.m_emptyLinesCount = 0;
2797
2798 continue;
2799 }
2800
2801 case BlockType::List:
2802 case BlockType::ListWithFirstEmptyLine: {
2803 ctx.m_emptyLineInList = true;
2804
2805 continue;
2806 }
2807
2808 default:
2809 break;
2810 }
2811 }
2812 // Empty new line in list.
2813 else if (ctx.m_emptyLineInList) {
2814 if (indentInListValue || isListType(ctx.m_lineType) || ctx.m_lineType == BlockType::SomethingInList) {
2815 for (long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2816 ctx.m_fragment.push_back({typename Trait::String(),
2817 {currentLineNumber - ctx.m_emptyLinesCount + i, {}}});
2818 }
2819
2820 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2821
2822 ctx.m_emptyLineInList = false;
2823 ctx.m_emptyLinesCount = 0;
2824
2825 continue;
2826 } else {
2827 const auto empty = ctx.m_emptyLinesCount;
2828
2829 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2830
2831 ctx.m_lineType = whatIsTheLine(line, false, false, false, nullptr, nullptr,
2832 true, false, &ctx.m_indents);
2833
2834 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2835
2836 continue;
2837 }
2838 } else if (ctx.m_emptyLinesCount > 0) {
2839 if (ctx.m_type == BlockType::CodeIndentedBySpaces &&
2840 ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2841 const auto indent = skipSpaces<Trait>(0, ctx.m_fragment.front().first.asString());
2842
2843 for (long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2844 ctx.m_fragment.push_back({typename Trait::String(indent, Trait::latin1ToChar(' ')),
2845 {currentLineNumber - ctx.m_emptyLinesCount + i, {}}});
2846 }
2847
2848 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2849 ctx.m_emptyLinesCount = 0;
2850 } else {
2851 parseFragmentAndMakeNextLineMain(ctx,
2852 parent,
2853 doc,
2854 linksToParse,
2855 workingPath,
2856 fileName,
2857 collectRefLinks,
2858 line,
2859 currentIndent,
2860 ns,
2861 currentLineNumber);
2862 }
2863
2864 continue;
2865 }
2866
2867 // Something new and first block is not a code block or a list, blockquote.
2868 if (ctx.m_type != ctx.m_lineType && ctx.m_type != BlockType::Code &&
2869 !isListType(ctx.m_type) && ctx.m_type != BlockType::Blockquote) {
2870 if (ctx.m_type == BlockType::Text && ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2871 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2872 }
2873 else {
2874 if (ctx.m_type == BlockType::Text && isListType(ctx.m_lineType)) {
2875 if (ctx.m_lineType != BlockType::ListWithFirstEmptyLine) {
2876 int num = 0;
2877
2878 if (isOrderedList<Trait>(line.asString(), &num)) {
2879 if (num != 1) {
2880 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2881
2882 continue;
2883 }
2884 }
2885 } else {
2886 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2887
2888 continue;
2889 }
2890 }
2891
2892 parseFragmentAndMakeNextLineMain(ctx,
2893 parent,
2894 doc,
2895 linksToParse,
2896 workingPath,
2897 fileName,
2898 collectRefLinks,
2899 line,
2900 currentIndent,
2901 ns,
2902 currentLineNumber);
2903 }
2904 }
2905 // End of code block.
2906 else if (ctx.m_type == BlockType::Code && ctx.m_type == ctx.m_lineType &&
2907 !ctx.m_startOfCode.isEmpty() &&
2908 startSequence<Trait>(line.asString()).contains(ctx.m_startOfCode) &&
2909 isCodeFences<Trait>(line.asString(), true)) {
2910 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2911
2912 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2913 }
2914 // Not a continue of list.
2915 else if (ctx.m_type != ctx.m_lineType && isListType(ctx.m_type) &&
2916 ctx.m_lineType != BlockType::SomethingInList &&
2917 ctx.m_lineType != BlockType::FensedCodeInList && !isListType(ctx.m_lineType)) {
2918 parseFragmentAndMakeNextLineMain(ctx,
2919 parent,
2920 doc,
2921 linksToParse,
2922 workingPath,
2923 fileName,
2924 collectRefLinks,
2925 line,
2926 currentIndent,
2927 ns,
2928 currentLineNumber);
2929 } else if (ctx.m_type == BlockType::Heading) {
2930 parseFragmentAndMakeNextLineMain(ctx,
2931 parent,
2932 doc,
2933 linksToParse,
2934 workingPath,
2935 fileName,
2936 collectRefLinks,
2937 line,
2938 currentIndent,
2939 ns,
2940 currentLineNumber);
2941 } else {
2942 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2943 }
2944
2945 ctx.m_emptyLinesCount = 0;
2946 }
2947
2948 if (!ctx.m_fragment.empty()) {
2949 if (ctx.m_type == BlockType::Code) {
2950 ctx.m_fragment.push_back({ctx.m_startOfCode, {-1, {}}});
2951 }
2952
2953 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2954 }
2955
2956 if (top) {
2957 resetHtmlTag(ctx.m_html);
2958
2959 for (long long int i = 0; i < (long long int)ctx.m_splitted.size(); ++i) {
2960 long long int line = 0;
2961
2962 auto &data = ctx.m_splitted[i];
2963
2964 while (line >= 0) {
2965 line = parseFragment(data, parent, doc, linksToParse, workingPath, fileName, false, ctx.m_html);
2966
2967 assert(line != 0);
2968
2969 if (line > 0) {
2970 if (ctx.m_html.m_html) {
2971 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2972
2973 resetHtmlTag<Trait>(ctx.m_html);
2974 }
2975
2976 const auto it = std::find_if(data.m_data.cbegin(), data.m_data.cend(), [line](const auto &d) {
2977 return (d.second.m_lineNumber == line);
2978 });
2979
2980 data.m_data.erase(data.m_data.cbegin(), it);
2981 }
2982 }
2983
2984 if (ctx.m_html.m_htmlBlockType >= 6) {
2985 ctx.m_html.m_continueHtml = (!ctx.m_splitted[i].m_emptyLineAfter);
2986 }
2987
2988 if (ctx.m_html.m_html && !ctx.m_html.m_continueHtml) {
2989 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
2990 } else if (!ctx.m_html.m_html) {
2991 ctx.m_html.m_toAdjustLastPos.clear();
2992 }
2993 }
2994 }
2995
2996 if (ctx.m_html.m_html) {
2997 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
2998 }
2999
3000 return ctx.m_html;
3001}
3002
3003#ifdef MD4QT_QT_SUPPORT
3004
3005template<>
3006inline void
3007Parser<QStringTrait>::parseFile(const QString &fileName,
3008 bool recursive,
3009 std::shared_ptr<Document<QStringTrait>> doc,
3010 const QStringList &ext,
3011 QStringList *parentLinks)
3012{
3013 QFileInfo fi(fileName);
3014
3015 if (fi.exists() && ext.contains(fi.suffix().toLower())) {
3016 QFile f(fileName);
3017
3018 if (f.open(QIODevice::ReadOnly)) {
3019 QTextStream s(f.readAll());
3020 f.close();
3021
3022 parseStream(s, fi.absolutePath(), fi.fileName(), recursive, doc, ext, parentLinks);
3023 }
3024 }
3025}
3026
3027#endif
3028
3029#ifdef MD4QT_ICU_STL_SUPPORT
3030
3031template<>
3032inline void
3033Parser<UnicodeStringTrait>::parseFile(const UnicodeString &fileName,
3034 bool recursive,
3035 std::shared_ptr<Document<UnicodeStringTrait>> doc,
3036 const std::vector<UnicodeString> &ext,
3037 std::vector<UnicodeString> *parentLinks)
3038{
3039 if (UnicodeStringTrait::fileExists(fileName)) {
3040 std::string fn;
3041 fileName.toUTF8String(fn);
3042
3043 try {
3044 auto e = UnicodeString::fromUTF8(std::filesystem::u8path(fn).extension().u8string());
3045
3046 if (!e.isEmpty()) {
3047 e.remove(0, 1);
3048 }
3049
3050 if (std::find(ext.cbegin(), ext.cend(), e.toLower()) != ext.cend()) {
3051 auto path = std::filesystem::canonical(std::filesystem::u8path(fn));
3052 std::ifstream file(path.c_str(), std::ios::in | std::ios::binary);
3053
3054 if (file.good()) {
3055 const auto fileNameS = path.filename().u8string();
3056 auto workingDirectory = path.remove_filename().u8string();
3057
3058 if (!workingDirectory.empty()) {
3059 workingDirectory.erase(workingDirectory.size() - 1, 1);
3060 }
3061
3062 std::replace(workingDirectory.begin(), workingDirectory.end(), '\\', '/');
3063
3064 parseStream(file, UnicodeString::fromUTF8(workingDirectory),
3065 UnicodeString::fromUTF8(fileNameS), recursive, doc, ext, parentLinks);
3066
3067 file.close();
3068 }
3069 }
3070 } catch (const std::exception &) {
3071 }
3072 }
3073}
3074
3075#endif
3076
3077//! Resolve links in the document.
3078template<class Trait>
3079void
3080resolveLinks(typename Trait::StringList &linksToParse,
3081 std::shared_ptr<Document<Trait>> doc)
3082{
3083 for (auto it = linksToParse.begin(), last = linksToParse.end(); it != last; ++it) {
3084 auto nextFileName = *it;
3085
3086 if (nextFileName.startsWith(Trait::latin1ToString("#"))) {
3087 const auto lit = doc->labeledLinks().find(nextFileName);
3088
3089 if (lit != doc->labeledLinks().cend()) {
3090 nextFileName = lit->second->url();
3091 } else {
3092 continue;
3093 }
3094 }
3095
3096 if (Trait::fileExists(nextFileName)) {
3097 *it = Trait::absoluteFilePath(nextFileName);
3098 }
3099 }
3100}
3101
3102template<class Trait>
3103inline void
3104Parser<Trait>::parseStream(typename Trait::TextStream &s,
3105 const typename Trait::String &workingPath,
3106 const typename Trait::String &fileName,
3107 bool recursive,
3108 std::shared_ptr<Document<Trait>> doc,
3109 const typename Trait::StringList &ext,
3110 typename Trait::StringList *parentLinks)
3111{
3112 typename Trait::StringList linksToParse;
3113
3114 const auto path = workingPath.isEmpty() ? typename Trait::String(fileName) :
3115 typename Trait::String(workingPath + Trait::latin1ToString("/") + fileName);
3116
3117 doc->appendItem(std::shared_ptr<Anchor<Trait>>(new Anchor<Trait>(path)));
3118
3119 typename MdBlock<Trait>::Data data;
3120
3121 {
3122 TextStream<Trait> stream(s);
3123
3124 long long int i = 0;
3125
3126 while (!stream.atEnd()) {
3127 data.push_back(std::pair<typename Trait::InternalString, MdLineData>(stream.readLine(), {i}));
3128 ++i;
3129 }
3130 }
3131
3132 StringListStream<Trait> stream(data);
3133
3134 parse(stream, doc, doc, linksToParse, workingPath, fileName, true, true);
3135
3136 m_parsedFiles.push_back(path);
3137
3138 resolveLinks<Trait>(linksToParse, doc);
3139
3140 // Parse all links if parsing is recursive.
3141 if (recursive && !linksToParse.empty()) {
3142 const auto tmpLinks = linksToParse;
3143
3144 while (!linksToParse.empty()) {
3145 auto nextFileName = linksToParse.front();
3146 linksToParse.erase(linksToParse.cbegin());
3147
3148 if (parentLinks) {
3149 const auto pit = std::find(parentLinks->cbegin(), parentLinks->cend(), nextFileName);
3150
3151 if (pit != parentLinks->cend()) {
3152 continue;
3153 }
3154 }
3155
3156 if (nextFileName.startsWith(Trait::latin1ToString("#"))) {
3157 continue;
3158 }
3159
3160 const auto pit = std::find(m_parsedFiles.cbegin(), m_parsedFiles.cend(), nextFileName);
3161
3162 if (pit == m_parsedFiles.cend()) {
3163 if (!doc->isEmpty() && doc->items().back()->type() != ItemType::PageBreak) {
3164 doc->appendItem(std::shared_ptr<PageBreak<Trait>>(new PageBreak<Trait>));
3165 }
3166
3167 parseFile(nextFileName, recursive, doc, ext, &linksToParse);
3168 }
3169 }
3170
3171 if (parentLinks) {
3172 std::copy(tmpLinks.cbegin(), tmpLinks.cend(), std::back_inserter(*parentLinks));
3173 }
3174 }
3175}
3176
3177//! \return Position of first character in list item.
3178template<class Trait>
3179inline long long int
3180posOfListItem(const typename Trait::String &s,
3181 bool ordered)
3182{
3183 long long int p = 0;
3184
3185 for (; p < s.size(); ++p) {
3186 if (!s[p].isSpace()) {
3187 break;
3188 }
3189 }
3190
3191 if (ordered) {
3192 for (; p < s.size(); ++p) {
3193 if (!s[p].isDigit()) {
3194 break;
3195 }
3196 }
3197 }
3198
3199 ++p;
3200
3201 long long int sc = 0;
3202
3203 for (; p < s.size(); ++p) {
3204 if (!s[p].isSpace()) {
3205 break;
3206 } else {
3207 ++sc;
3208 }
3209 }
3210
3211 if (p == s.length() || sc > 4) {
3212 p = p - sc + 1;
3213 } else if (sc == 0) {
3214 ++p;
3215 }
3216
3217 return p;
3218}
3219
3220//! \return Level in indents for the given position.
3221inline long long int
3222listLevel(const std::vector<long long int> &indents,
3223 long long int pos)
3224{
3225 long long int level = indents.size();
3226
3227 for (auto it = indents.crbegin(), last = indents.crend(); it != last; ++it) {
3228 if (pos >= *it) {
3229 break;
3230 } else {
3231 --level;
3232 }
3233 }
3234
3235 return level;
3236}
3237
3238template<class Trait>
3239inline typename Parser<Trait>::BlockType
3240Parser<Trait>::whatIsTheLine(typename Trait::InternalString &str,
3241 bool inList,
3242 bool inListWithFirstEmptyLine,
3243 bool fensedCodeInList,
3244 typename Trait::String *startOfCode,
3245 ListIndent *indent,
3246 bool emptyLinePreceded,
3247 bool calcIndent,
3248 const std::vector<long long int> *indents)
3249{
3250 replaceTabs<Trait>(str);
3251
3252 const auto first = skipSpaces<Trait>(0, str.asString());
3253
3254 if (first < str.length()) {
3255 auto s = str.sliced(first);
3256
3257 const bool isBlockquote = s.asString().startsWith(Trait::latin1ToString(">"));
3258 const bool indentIn = indentInList(indents, first, false);
3259 bool isHeading = false;
3260
3261 if (first < 4 && isFootnote<Trait>(s.asString())) {
3262 return BlockType::Footnote;
3263 }
3264
3265 if (s.asString().startsWith(Trait::latin1ToString("#")) &&
3266 (indent ? first - indent->m_indent < 4 : first < 4)) {
3267 long long int c = 0;
3268
3269 while (c < s.length() && s[c] == Trait::latin1ToChar('#')) {
3270 ++c;
3271 }
3272
3273 if (c <= 6 && ((c < s.length() && s[c].isSpace()) || c == s.length())) {
3274 isHeading = true;
3275 }
3276 }
3277
3278 if (inList) {
3279 bool isFirstLineEmpty = false;
3280 const auto orderedList = isOrderedList<Trait>(str.asString(), nullptr, nullptr, nullptr,
3281 &isFirstLineEmpty);
3282 const bool fensedCode = isCodeFences<Trait>(s.asString());
3283 const auto codeIndentedBySpaces = emptyLinePreceded && first >= 4 &&
3284 !indentInList(indents, first, true);
3285
3286 if (fensedCodeInList) {
3287 if (indentInList(indents, first, true)) {
3288 if (fensedCode) {
3289 if (startOfCode && startSequence<Trait>(s.asString()).contains(*startOfCode)) {
3290 return BlockType::FensedCodeInList;
3291 }
3292 }
3293
3294 return BlockType::SomethingInList;
3295 }
3296 }
3297
3298 if (fensedCode && indentIn) {
3299 if (startOfCode) {
3300 *startOfCode = startSequence<Trait>(s.asString());
3301 }
3302
3303 return BlockType::FensedCodeInList;
3304 } else if ((((s.asString().startsWith(Trait::latin1ToString("-")) ||
3305 s.asString().startsWith(Trait::latin1ToString("+")) ||
3306 s.asString().startsWith(Trait::latin1ToString("*"))) &&
3307 ((s.length() > 1 && s[1] == Trait::latin1ToChar(' ')) || s.length() == 1)) ||
3308 orderedList) && (first < 4 || indentIn)) {
3309 if (codeIndentedBySpaces) {
3310 return BlockType::CodeIndentedBySpaces;
3311 }
3312
3313 if (indent && calcIndent) {
3314 indent->m_indent = posOfListItem<Trait>(str.asString(), orderedList);
3315 indent->m_level = (indents ? listLevel(*indents, first) : -1);
3316 }
3317
3318 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3319 return BlockType::ListWithFirstEmptyLine;
3320 } else {
3321 return BlockType::List;
3322 }
3323 } else if (indentInList(indents, first, true)) {
3324 return BlockType::SomethingInList;
3325 }
3326 else {
3327 if (!isHeading && !isBlockquote &&
3328 !(fensedCode && first < 4) && !emptyLinePreceded && !inListWithFirstEmptyLine) {
3329 return BlockType::SomethingInList;
3330 }
3331 }
3332 } else {
3333 bool isFirstLineEmpty = false;
3334
3335 const auto orderedList = isOrderedList<Trait>(str.asString(), nullptr, nullptr, nullptr,
3336 &isFirstLineEmpty);
3337 const bool isHLine = first < 4 && isHorizontalLine<Trait>(s.asString());
3338
3339 if (!isHLine &&
3340 (((s.asString().startsWith(Trait::latin1ToString("-")) || s.asString().startsWith(Trait::latin1ToString("+")) ||
3341 s.asString().startsWith(Trait::latin1ToString("*"))) &&
3342 ((s.length() > 1 && s[1] == Trait::latin1ToChar(' ')) || s.length() == 1)) ||
3343 orderedList) && first < 4) {
3344 if (indent && calcIndent) {
3345 indent->m_indent = posOfListItem<Trait>(str.asString(), orderedList);
3346 indent->m_level = (indents ? listLevel(*indents, first) : -1);
3347 }
3348
3349 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3350 return BlockType::ListWithFirstEmptyLine;
3351 } else {
3352 return BlockType::List;
3353 }
3354 }
3355 }
3356
3357 if (str.asString().startsWith(typename Trait::String(4, Trait::latin1ToChar(' ')))) {
3358 return BlockType::CodeIndentedBySpaces;
3359 } else if (isCodeFences<Trait>(str.asString())) {
3360 return BlockType::Code;
3361 } else if (isBlockquote) {
3362 return BlockType::Blockquote;
3363 } else if (isHeading) {
3364 return BlockType::Heading;
3365 }
3366 } else {
3367 return BlockType::EmptyLine;
3368 }
3369
3370 return BlockType::Text;
3371}
3372
3373template<class Trait>
3374inline long long int
3375Parser<Trait>::parseFragment(MdBlock<Trait> &fr,
3376 std::shared_ptr<Block<Trait>> parent,
3377 std::shared_ptr<Document<Trait>> doc,
3378 typename Trait::StringList &linksToParse,
3379 const typename Trait::String &workingPath,
3380 const typename Trait::String &fileName,
3381 bool collectRefLinks,
3382 RawHtmlBlock<Trait> &html)
3383{
3384 if (html.m_continueHtml) {
3385 parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3386 } else {
3387 if (html.m_html) {
3388 if (!collectRefLinks) {
3389 parent->appendItem(html.m_html);
3390 }
3391
3392 resetHtmlTag(html);
3393 }
3394
3395 switch (whatIsTheLine(fr.m_data.front().first)) {
3396 case BlockType::Footnote:
3397 parseFootnote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3398 break;
3399
3400 case BlockType::Text:
3401 parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3402 break;
3403
3404 case BlockType::Blockquote:
3405 parseBlockquote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3406 break;
3407
3408 case BlockType::Code:
3409 parseCode(fr, parent, collectRefLinks);
3410 break;
3411
3412 case BlockType::CodeIndentedBySpaces: {
3413 int indent = 1;
3414
3415 if (fr.m_data.front().first.asString().startsWith(Trait::latin1ToString(" "))) {
3416 indent = 4;
3417 }
3418
3419 parseCodeIndentedBySpaces(fr, parent, collectRefLinks, indent, {}, -1, -1, false);
3420 } break;
3421
3422 case BlockType::Heading:
3423 parseHeading(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3424 break;
3425
3426 case BlockType::List:
3427 case BlockType::ListWithFirstEmptyLine:
3428 return parseList(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3429
3430 default:
3431 break;
3432 }
3433 }
3434
3435 return -1;
3436}
3437
3438template<class Trait>
3439inline void
3440Parser<Trait>::clearCache()
3441{
3442 m_parsedFiles.clear();
3443}
3444
3445//! \return Number of columns in table, if the given string is a table header.
3446template<class Trait>
3447inline int
3448isTableHeader(const typename Trait::String &s)
3449{
3450 if (s.contains(Trait::latin1ToChar('|'))) {
3451 int c = 0;
3452
3453 const auto tmp = s.simplified();
3454 const auto p = tmp.startsWith(Trait::latin1ToString("|")) ? 1 : 0;
3455 const auto n = tmp.size() - p - (tmp.endsWith(Trait::latin1ToString("|")) && tmp.size() > 1 ? 1 : 0);
3456 const auto v = tmp.sliced(p, n);
3457
3458 bool backslash = false;
3459
3460 for (long long int i = 0; i < v.size(); ++i) {
3461 bool now = false;
3462
3463 if (v[i] == Trait::latin1ToChar('\\') && !backslash) {
3464 backslash = true;
3465 now = true;
3466 } else if (v[i] == Trait::latin1ToChar('|') && !backslash) {
3467 ++c;
3468 }
3469
3470 if (!now) {
3471 backslash = false;
3472 }
3473 }
3474
3475 ++c;
3476
3477 return c;
3478 } else {
3479 return 0;
3480 }
3481}
3482
3483template<class Trait>
3484inline void
3485Parser<Trait>::parseText(MdBlock<Trait> &fr,
3486 std::shared_ptr<Block<Trait>> parent,
3487 std::shared_ptr<Document<Trait>> doc,
3488 typename Trait::StringList &linksToParse,
3489 const typename Trait::String &workingPath,
3490 const typename Trait::String &fileName,
3491 bool collectRefLinks,
3492 RawHtmlBlock<Trait> &html)
3493{
3494 const auto h = isTableHeader<Trait>(fr.m_data.front().first.asString());
3495 const auto c = fr.m_data.size() > 1 ? isTableAlignment<Trait>(fr.m_data[1].first.asString()) : 0;
3496
3497 if (c && h && c == h && !html.m_continueHtml) {
3498 parseTable(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, c);
3499
3500 if (!fr.m_data.empty()) {
3501 StringListStream<Trait> stream(fr.m_data);
3502
3503 Parser<Trait>::parse(stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3504 }
3505 } else {
3506 parseParagraph(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3507 }
3508}
3509
3510//! Find and remove heading label.
3511template<class Trait>
3512inline std::pair<typename Trait::String, WithPosition>
3513findAndRemoveHeaderLabel(typename Trait::InternalString &s)
3514{
3515 const auto start = s.asString().indexOf(Trait::latin1ToString("{#"));
3516
3517 if (start >= 0) {
3518 long long int p = start + 2;
3519
3520 for (; p < s.length(); ++p) {
3521 if (s[p] == Trait::latin1ToChar('}')) {
3522 break;
3523 }
3524 }
3525
3526 if (p < s.length() && s[p] == Trait::latin1ToChar('}')) {
3527 WithPosition pos;
3528 pos.setStartColumn(s.virginPos(start));
3529 pos.setEndColumn(s.virginPos(p));
3530
3531 const auto label = s.sliced(start, p - start + 1).asString();
3532 s.remove(start, p - start + 1);
3533 return {label, pos};
3534 }
3535 }
3536
3537 return {};
3538}
3539
3540//! Convert string to label.
3541template<class Trait>
3542inline typename Trait::String
3543stringToLabel(const typename Trait::String &s)
3544{
3545 typename Trait::String res;
3546
3547 for (long long int i = 0; i < s.length(); ++i) {
3548 const auto c = s[i];
3549
3550 if (c.isLetter() || c.isDigit() || c == Trait::latin1ToChar('-') ||
3551 c == Trait::latin1ToChar('_')) {
3552 res.push_back(c.toLower());
3553 } else if (c.isSpace()) {
3554 res.push_back(Trait::latin1ToString("-"));
3555 }
3556 }
3557
3558 return res;
3559}
3560
3561//! Convert Paragraph to label.
3562template<class Trait>
3563inline typename Trait::String
3565{
3566 typename Trait::String l;
3567
3568 if (!p) {
3569 return l;
3570 }
3571
3572 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
3573 switch ((*it)->type()) {
3574 case ItemType::Text: {
3575 auto t = static_cast<Text<Trait> *>(it->get());
3576 const auto text = t->text();
3577 l.push_back(stringToLabel<Trait>(text));
3578 } break;
3579
3580 case ItemType::Image: {
3581 auto i = static_cast<Image<Trait> *>(it->get());
3582
3583 if (!i->p()->isEmpty()) {
3584 l.push_back(paragraphToLabel(i->p().get()));
3585 } else if (!i->text().isEmpty()) {
3586 l.push_back(stringToLabel<Trait>(i->text()));
3587 }
3588 } break;
3589
3590 case ItemType::Link: {
3591 auto link = static_cast<Link<Trait> *>(it->get());
3592
3593 if (!link->p()->isEmpty()) {
3594 l.push_back(paragraphToLabel(link->p().get()));
3595 } else if (!link->text().isEmpty()) {
3596 l.push_back(stringToLabel<Trait>(link->text()));
3597 }
3598 } break;
3599
3600 case ItemType::Code: {
3601 auto c = static_cast<Code<Trait> *>(it->get());
3602
3603 if (!c->text().isEmpty()) {
3604 l.push_back(stringToLabel<Trait>(c->text()));
3605 }
3606 } break;
3607
3608 default:
3609 break;
3610 }
3611 }
3612
3613 return l;
3614}
3615
3616//! Find and remove closing sequence of "#" in heading.
3617template<class Trait>
3618inline WithPosition
3619findAndRemoveClosingSequence(typename Trait::InternalString &s)
3620{
3621 long long int end = -1;
3622 long long int start = -1;
3623
3624 for (long long int i = s.length() - 1; i >= 0; --i) {
3625 if (!s[i].isSpace() && s[i] != Trait::latin1ToChar('#') && end == -1) {
3626 return {};
3627 }
3628
3629 if (s[i] == Trait::latin1ToChar('#')) {
3630 if (end == -1) {
3631 end = i;
3632 }
3633
3634 if (i - 1 >= 0) {
3635 if (s[i - 1].isSpace()) {
3636 start = i;
3637 break;
3638 } else if (s[i - 1] != Trait::latin1ToChar('#')) {
3639 return {};
3640 }
3641 } else {
3642 start = 0;
3643 }
3644 }
3645 }
3646
3647 WithPosition ret;
3648
3649 if (start != -1 && end != -1) {
3650 ret.setStartColumn(s.virginPos(start));
3651 ret.setEndColumn(s.virginPos(end));
3652
3653 s.remove(start, end - start + 1);
3654 }
3655
3656 return ret;
3657}
3658
3659template<class Trait>
3660inline void
3661Parser<Trait>::parseHeading(MdBlock<Trait> &fr,
3662 std::shared_ptr<Block<Trait>> parent,
3663 std::shared_ptr<Document<Trait>> doc,
3664 typename Trait::StringList &linksToParse,
3665 const typename Trait::String &workingPath,
3666 const typename Trait::String &fileName,
3667 bool collectRefLinks)
3668{
3669 if (!fr.m_data.empty() && !collectRefLinks) {
3670 auto line = fr.m_data.front().first;
3671
3672 std::shared_ptr<Heading<Trait>> h(new Heading<Trait>);
3673 h->setStartColumn(line.virginPos(skipSpaces<Trait>(0, line.asString())));
3674 h->setStartLine(fr.m_data.front().second.m_lineNumber);
3675 h->setEndColumn(line.virginPos(line.length() - 1));
3676 h->setEndLine(h->startLine());
3677
3678 long long int pos = 0;
3679 pos = skipSpaces<Trait>(pos, line.asString());
3680
3681 if (pos > 0) {
3682 line = line.sliced(pos);
3683 }
3684
3685 pos = 0;
3686 int lvl = 0;
3687
3688 while (pos < line.length() && line[pos] == Trait::latin1ToChar('#')) {
3689 ++lvl;
3690 ++pos;
3691 }
3692
3693 WithPosition startDelim = {h->startColumn(), h->startLine(),
3694 line.virginPos(pos - 1), h->startLine()};
3695
3696 pos = skipSpaces<Trait>(pos, line.asString());
3697
3698 if (pos > 0) {
3699 fr.m_data.front().first = line.sliced(pos);
3700 }
3701
3702 auto label = findAndRemoveHeaderLabel<Trait>(fr.m_data.front().first);
3703
3704 typename Heading<Trait>::Delims delims = {startDelim};
3705
3706 auto endDelim = findAndRemoveClosingSequence<Trait>(fr.m_data.front().first);
3707
3708 if (endDelim.startColumn() != -1) {
3709 endDelim.setStartLine(fr.m_data.front().second.m_lineNumber);
3710 endDelim.setEndLine(endDelim.startLine());
3711
3712 delims.push_back(endDelim);
3713 }
3714
3715 h->setDelims(delims);
3716
3717 h->setLevel(lvl);
3718
3719 if (!label.first.isEmpty()) {
3720 h->setLabel(label.first.sliced(1, label.first.length() - 2) + Trait::latin1ToString("/") +
3721 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString("/") :
3722 Trait::latin1ToString("")) + fileName);
3723
3724 label.second.setStartLine(fr.m_data.front().second.m_lineNumber);
3725 label.second.setEndLine(label.second.startLine());
3726
3727 h->setLabelPos(label.second);
3728 }
3729
3730 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
3731
3732 typename MdBlock<Trait>::Data tmp;
3734 tmp.push_back(fr.m_data.front());
3735 MdBlock<Trait> block = {tmp, 0};
3736
3737 RawHtmlBlock<Trait> html;
3738
3739 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
3740 false, false, html, false);
3741
3742 fr.m_data.erase(fr.m_data.cbegin());
3743
3744 if (p->items().size() && p->items().at(0)->type() == ItemType::Paragraph) {
3745 h->setText(std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0)));
3746 } else {
3747 h->setText(p);
3748 }
3749
3750 if (h->isLabeled()) {
3751 doc->insertLabeledHeading(h->label(), h);
3752 } else {
3753 typename Trait::String label = Trait::latin1ToString("#") +
3754 paragraphToLabel(h->text().get());
3755
3756 label += Trait::latin1ToString("/") +
3757 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString("/") :
3758 Trait::latin1ToString("")) + fileName;
3759
3760 h->setLabel(label);
3761
3762 doc->insertLabeledHeading(label, h);
3763 }
3764
3765 parent->appendItem(h);
3766 }
3767}
3768
3769//! Prepare data in table cell for parsing.
3770template<class Trait>
3771inline typename Trait::InternalString
3772prepareTableData(typename Trait::InternalString s)
3773{
3774 s.replace(Trait::latin1ToString("\\|"), Trait::latin1ToString("|"));
3775
3776 return s;
3777}
3778
3779//! Split table's row on cells.
3780template<class Trait>
3781inline std::pair<typename Trait::InternalStringList, std::vector<long long int>>
3782splitTableRow(const typename Trait::InternalString &s)
3783{
3784 typename Trait::InternalStringList res;
3785 std::vector<long long int> columns;
3786
3787 bool backslash = false;
3788 long long int start = 0;
3789
3790 for (long long int i = 0; i < s.length(); ++i) {
3791 bool now = false;
3792
3793 if (s[i] == Trait::latin1ToChar('\\') && !backslash) {
3794 backslash = true;
3795 now = true;
3796 } else if (s[i] == Trait::latin1ToChar('|') && !backslash) {
3797 res.push_back(prepareTableData<Trait>(s.sliced(start, i - start)));
3798 columns.push_back(s.virginPos(i));
3799 start = i + 1;
3800 }
3801
3802 if (!now) {
3803 backslash = false;
3804 }
3805 }
3806
3807 res.push_back(prepareTableData<Trait>(s.sliced(start, s.length() - start)));
3808
3809 return {res, columns};
3810}
3811
3812template<class Trait>
3813inline void
3814Parser<Trait>::parseTable(MdBlock<Trait> &fr,
3815 std::shared_ptr<Block<Trait>> parent,
3816 std::shared_ptr<Document<Trait>> doc,
3817 typename Trait::StringList &linksToParse,
3818 const typename Trait::String &workingPath,
3819 const typename Trait::String &fileName,
3820 bool collectRefLinks,
3821 int columnsCount)
3822{
3823 static const char sep = '|';
3824
3825 if (fr.m_data.size() >= 2) {
3826 std::shared_ptr<Table<Trait>> table(new Table<Trait>);
3827 table->setStartColumn(fr.m_data.front().first.virginPos(0));
3828 table->setStartLine(fr.m_data.front().second.m_lineNumber);
3829 table->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
3830 table->setEndLine(fr.m_data.back().second.m_lineNumber);
3831
3832 auto parseTableRow = [&](const typename MdBlock<Trait>::Line &lineData) -> bool {
3833 const auto &row = lineData.first;
3834
3835 if (row.asString().startsWith(Trait::latin1ToString(" "))) {
3836 return false;
3837 }
3838
3839 auto line = row;
3840 auto p = skipSpaces<Trait>(0, line.asString());
3841
3842 if (p == line.length()) {
3843 return false;
3844 }
3845
3846 if (line[p] == Trait::latin1ToChar(sep)) {
3847 line.remove(0, p + 1);
3848 }
3849
3850 for (p = line.length() - 1; p >= 0; --p) {
3851 if (!line[p].isSpace()) {
3852 break;
3853 }
3854 }
3855
3856 if (p < 0) {
3857 return false;
3858 }
3859
3860 if (line[p] == Trait::latin1ToChar(sep)) {
3861 line.remove(p, line.length() - p);
3862 }
3863
3864 auto columns = splitTableRow<Trait>(line);
3865 columns.second.insert(columns.second.begin(), row.virginPos(0));
3866 columns.second.push_back(row.virginPos(row.length() - 1));
3867
3868 std::shared_ptr<TableRow<Trait>> tr(new TableRow<Trait>);
3869 tr->setStartColumn(row.virginPos(0));
3870 tr->setStartLine(lineData.second.m_lineNumber);
3871 tr->setEndColumn(row.virginPos(row.length() - 1));
3872 tr->setEndLine(lineData.second.m_lineNumber);
3873
3874 int col = 0;
3875
3876 for (auto it = columns.first.begin(), last = columns.first.end(); it != last; ++it, ++col) {
3877 if (col == columnsCount) {
3878 break;
3879 }
3880
3881 std::shared_ptr<TableCell<Trait>> c(new TableCell<Trait>);
3882 c->setStartColumn(columns.second.at(col));
3883 c->setStartLine(lineData.second.m_lineNumber);
3884 c->setEndColumn(columns.second.at(col + 1));
3885 c->setEndLine(lineData.second.m_lineNumber);
3886
3887 if (!it->isEmpty()) {
3888 it->replace(Trait::latin1ToString("&#124;"), Trait::latin1ToChar(sep));
3889
3890 typename MdBlock<Trait>::Data fragment;
3891 fragment.push_back({*it, lineData.second});
3892 MdBlock<Trait> block = {fragment, 0};
3893
3894 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
3895
3896 RawHtmlBlock<Trait> html;
3897
3898 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
3899 collectRefLinks, false, html, false);
3900
3901 if (!p->isEmpty()) {
3902 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it ) {
3903 switch ((*it)->type()) {
3904 case ItemType::Paragraph: {
3905 const auto pp = std::static_pointer_cast<Paragraph<Trait>>(*it);
3906
3907 for (auto it = pp->items().cbegin(), last = pp->items().cend(); it != last; ++it) {
3908 c->appendItem((*it));
3909 }
3910 }
3911 break;
3912
3913 default:
3914 c->appendItem((*it));
3915 break;
3916 }
3917 }
3918 }
3919
3920 if (html.m_html.get()) {
3921 c->appendItem(html.m_html);
3922 }
3923 }
3924
3925 tr->appendCell(c);
3926 }
3927
3928 if (!tr->isEmpty())
3929 table->appendRow(tr);
3930
3931 return true;
3932 };
3933
3934 {
3935 auto fmt = fr.m_data.at(1).first;
3936
3937 auto columns = fmt.split(typename Trait::InternalString(Trait::latin1ToChar(sep)));
3938
3939 for (auto it = columns.begin(), last = columns.end(); it != last; ++it) {
3940 *it = it->simplified();
3941
3942 if (!it->isEmpty()) {
3944
3945 if (it->asString().endsWith(Trait::latin1ToString(":")) &&
3946 it->asString().startsWith(Trait::latin1ToString(":"))) {
3948 } else if (it->asString().endsWith(Trait::latin1ToString(":"))) {
3950 }
3951
3952 table->setColumnAlignment(table->columnsCount(), a);
3953 }
3954 }
3955 }
3956
3957 fr.m_data.erase(fr.m_data.cbegin() + 1);
3958
3959 long long int r = 0;
3960
3961 for (const auto &line : std::as_const(fr.m_data)) {
3962 if (!parseTableRow(line)) {
3963 break;
3964 }
3965
3966 ++r;
3967 }
3968
3969 fr.m_data.erase(fr.m_data.cbegin(), fr.m_data.cbegin() + r);
3970
3971 if (!table->isEmpty() && !collectRefLinks) {
3972 parent->appendItem(table);
3973 }
3974 }
3975}
3976
3977//! \return Is the given string a heading's service sequence?
3978template<class Trait>
3979inline bool
3980isH(const typename Trait::String &s,
3981 const typename Trait::Char &c)
3982{
3983 long long int p = skipSpaces<Trait>(0, s);
3984
3985 if (p > 3) {
3986 return false;
3987 }
3988
3989 const auto start = p;
3990
3991 for (; p < s.size(); ++p) {
3992 if (s[p] != c) {
3993 break;
3994 }
3995 }
3996
3997 if (p - start < 1) {
3998 return false;
3999 }
4000
4001 for (; p < s.size(); ++p) {
4002 if (!s[p].isSpace()) {
4003 return false;
4004 }
4005 }
4006
4007 return true;
4008}
4009
4010//! \return Is the given string a heading's service sequence of level 1?
4011template<class Trait>
4012inline bool
4013isH1(const typename Trait::String &s)
4014{
4015 return isH<Trait>(s, Trait::latin1ToChar('='));
4016}
4017
4018//! \return Is the given string a heading's service sequence of level 2?
4019template<class Trait>
4020inline bool
4021isH2(const typename Trait::String &s)
4022{
4023 return isH<Trait>(s, Trait::latin1ToChar('-'));
4024}
4025
4026//! \return Previous position in the block.
4027template<class Trait>
4028inline std::pair<long long int, long long int>
4030 long long int pos,
4031 long long int line)
4032{
4033 if (pos > 0) {
4034 return {pos - 1, line};
4035 }
4036
4037 for (long long int i = 0; i < static_cast<long long int>(fr.m_data.size()); ++i) {
4038 if (fr.m_data.at(i).second.m_lineNumber == line) {
4039 if (i > 0) {
4040 return {fr.m_data.at(i - 1).first.virginPos(fr.m_data.at(i - 1).first.length() - 1),
4041 line - 1};
4042 }
4043 }
4044 }
4045
4046 return {pos, line};
4047}
4048
4049//! \return Next position in the block.
4050template<class Trait>
4051inline std::pair<long long int, long long int>
4053 long long int pos,
4054 long long int line)
4055{
4056 for (long long int i = 0; i < static_cast<long long int>(fr.m_data.size()); ++i) {
4057 if (fr.m_data.at(i).second.m_lineNumber == line) {
4058 if (fr.m_data.at(i).first.virginPos(fr.m_data.at(i).first.length() - 1) >= pos + 1) {
4059 return {pos + 1, line};
4060 } else if (i + 1 < static_cast<long long int>(fr.m_data.size())) {
4061 return {fr.m_data.at(i + 1).first.virginPos(0), fr.m_data.at(i + 1).second.m_lineNumber};
4062 } else {
4063 return {pos, line};
4064 }
4065 }
4066 }
4067
4068 return {pos, line};
4069}
4070
4071template<class Trait>
4072inline void
4073Parser<Trait>::parseParagraph(MdBlock<Trait> &fr,
4074 std::shared_ptr<Block<Trait>> parent,
4075 std::shared_ptr<Document<Trait>> doc,
4076 typename Trait::StringList &linksToParse,
4077 const typename Trait::String &workingPath,
4078 const typename Trait::String &fileName,
4079 bool collectRefLinks,
4080 RawHtmlBlock<Trait> &html)
4081{
4082 parseFormattedTextLinksImages(fr, parent, doc, linksToParse, workingPath, fileName,
4083 collectRefLinks, false, html, false);
4084}
4085
4086template<class Trait>
4088 static bool
4089 isFreeTag(std::shared_ptr<RawHtml<Trait>> html)
4090 {
4091 return html->isFreeTag();
4092 }
4093
4094 static void
4095 setFreeTag(std::shared_ptr<RawHtml<Trait>> html, bool on)
4096 {
4097 html->setFreeTag(on);
4098 }
4099};
4100
4101template<class Trait>
4102inline typename Parser<Trait>::Delims
4103Parser<Trait>::collectDelimiters(const typename MdBlock<Trait>::Data &fr)
4104{
4105 Delims d;
4106
4107 for (long long int line = 0; line < (long long int)fr.size(); ++line) {
4108 const typename Trait::String &str = fr.at(line).first.asString();
4109 const auto p = skipSpaces<Trait>(0, str);
4110 const auto withoutSpaces = str.sliced(p);
4111
4112 if (isHorizontalLine<Trait>(withoutSpaces) && p < 4) {
4113 d.push_back({Delimiter::HorizontalLine, line, 0, str.length(), false, false, false});
4114 } else if (isH1<Trait>(withoutSpaces) && p < 4) {
4115 d.push_back({Delimiter::H1, line, 0, str.length(), false, false, false});
4116 } else if (isH2<Trait>(withoutSpaces) && p < 4) {
4117 d.push_back({Delimiter::H2, line, 0, str.length(), false, false, false});
4118 } else {
4119 bool backslash = false;
4120 bool word = false;
4121
4122 for (long long int i = p; i < str.size(); ++i) {
4123 bool now = false;
4124
4125 if (str[i] == Trait::latin1ToChar('\\') && !backslash) {
4126 backslash = true;
4127 now = true;
4128 }
4129 // * or _
4130 else if ((str[i] == Trait::latin1ToChar('_') || str[i] == Trait::latin1ToChar('*')) && !backslash) {
4131 typename Trait::String style;
4132
4133 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() : true);
4134 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4135 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4136 const bool alNumBefore = (i > 0 ? str[i - 1].isLetterOrNumber() : false);
4137
4138 const auto ch = str[i];
4139
4140 while (i < str.length() && str[i] == ch) {
4141 style.push_back(str[i]);
4142 ++i;
4143 }
4144
4145 typename Delimiter::DelimiterType dt = Delimiter::Unknown;
4146
4147 if (ch == Trait::latin1ToChar('*')) {
4148 dt = Delimiter::Emphasis1;
4149 } else {
4150 dt = Delimiter::Emphasis2;
4151 }
4152
4153 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() : true);
4154 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) : true);
4155 const bool alNumAfter = (i < str.length() ? str[i].isLetterOrNumber() : false);
4156 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore))
4157 && !(ch == Trait::latin1ToChar('_') && alNumBefore && alNumAfter);
4158 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)))
4159 && !(ch == Trait::latin1ToChar('_') && alNumBefore && alNumAfter);
4160
4161 if (leftFlanking || rightFlanking) {
4162 for (auto j = 0; j < style.length(); ++j) {
4163 d.push_back({dt, line, i - style.length() + j, 1,
4164 word, false, leftFlanking, rightFlanking});
4165 }
4166
4167 word = false;
4168 } else {
4169 word = true;
4170 }
4171
4172 --i;
4173 }
4174 // ~
4175 else if (str[i] == Trait::latin1ToChar('~') && !backslash) {
4176 typename Trait::String style;
4177
4178 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() : true);
4179 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4180 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4181
4182 while (i < str.length() && str[i] == Trait::latin1ToChar('~')) {
4183 style.push_back(str[i]);
4184 ++i;
4185 }
4186
4187 if (style.length() <= 2) {
4188 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() : true);
4189 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) : true);
4190 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore));
4191 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)));
4192
4193 if (leftFlanking || rightFlanking) {
4194 d.push_back({Delimiter::Strikethrough,
4195 line,
4196 i - style.length(),
4197 style.length(),
4198 word,
4199 false,
4200 leftFlanking,
4201 rightFlanking});
4202
4203 word = false;
4204 } else {
4205 word = true;
4206 }
4207 } else {
4208 word = true;
4209 }
4210
4211 --i;
4212 }
4213 // [
4214 else if (str[i] == Trait::latin1ToChar('[') && !backslash) {
4215 d.push_back({Delimiter::SquareBracketsOpen, line, i, 1, word, false});
4216
4217 word = false;
4218 }
4219 // !
4220 else if (str[i] == Trait::latin1ToChar('!') && !backslash) {
4221 if (i + 1 < str.length()) {
4222 if (str[i + 1] == Trait::latin1ToChar('[')) {
4223 d.push_back({Delimiter::ImageOpen, line, i, 2, word, false});
4224
4225 ++i;
4226
4227 word = false;
4228 } else {
4229 word = true;
4230 }
4231 } else {
4232 word = true;
4233 }
4234 }
4235 // (
4236 else if (str[i] == Trait::latin1ToChar('(') && !backslash) {
4237 d.push_back({Delimiter::ParenthesesOpen, line, i, 1, word, false});
4238
4239 word = false;
4240 }
4241 // ]
4242 else if (str[i] == Trait::latin1ToChar(']') && !backslash) {
4243 d.push_back({Delimiter::SquareBracketsClose, line, i, 1, word, false});
4244
4245 word = false;
4246 }
4247 // )
4248 else if (str[i] == Trait::latin1ToChar(')') && !backslash) {
4249 d.push_back({Delimiter::ParenthesesClose, line, i, 1, word, false});
4250
4251 word = false;
4252 }
4253 // <
4254 else if (str[i] == Trait::latin1ToChar('<') && !backslash) {
4255 d.push_back({Delimiter::Less, line, i, 1, word, false});
4256
4257 word = false;
4258 }
4259 // >
4260 else if (str[i] == Trait::latin1ToChar('>') && !backslash) {
4261 d.push_back({Delimiter::Greater, line, i, 1, word, false});
4262
4263 word = false;
4264 }
4265 // `
4266 else if (str[i] == Trait::latin1ToChar('`')) {
4267 typename Trait::String code;
4268
4269 while (i < str.length() && str[i] == Trait::latin1ToChar('`')) {
4270 code.push_back(str[i]);
4271 ++i;
4272 }
4273
4274 d.push_back({Delimiter::InlineCode,
4275 line,
4276 i - code.length() - (backslash ? 1 : 0),
4277 code.length() + (backslash ? 1 : 0),
4278 word,
4279 backslash});
4280
4281 word = false;
4282
4283 --i;
4284 }
4285 // $
4286 else if (str[i] == Trait::latin1ToChar('$')) {
4287 typename Trait::String m;
4288
4289 while (i < str.length() && str[i] == Trait::latin1ToChar('$')) {
4290 m.push_back(str[i]);
4291 ++i;
4292 }
4293
4294 if (m.length() <= 2 && !backslash) {
4295 d.push_back({Delimiter::Math, line, i - m.length(), m.length(),
4296 false, false, false, false});
4297 }
4298
4299 word = false;
4300
4301 --i;
4302 } else {
4303 word = true;
4304 }
4305
4306 if (!now) {
4307 backslash = false;
4308 }
4309 }
4310 }
4311 }
4312
4313 return d;
4314}
4315
4316//! \return Is the given string a line break.
4317template<class Trait>
4318inline bool
4319isLineBreak(const typename Trait::String &s)
4320{
4321 return (s.endsWith(Trait::latin1ToString(" ")) || s.endsWith(Trait::latin1ToString("\\")));
4322}
4323
4324//! \return Length of line break.
4325template<class Trait>
4326inline long long int
4327lineBreakLength(const typename Trait::String &s)
4328{
4329 return (s.endsWith(Trait::latin1ToString(" ")) ? 2 : 1);
4330}
4331
4332//! Remove line break from the end of string.
4333template<class Trait>
4334inline typename Trait::String
4335removeLineBreak(const typename Trait::String &s)
4336{
4337 if (s.endsWith(Trait::latin1ToString("\\"))) {
4338 return s.sliced(0, s.size() - 1);
4339 } else {
4340 return s;
4341 }
4342}
4343
4344//! Initialize item with style information and set it as last item.
4345template<class Trait>
4346inline void
4348 std::shared_ptr<ItemWithOpts<Trait>> item)
4349{
4350 item->openStyles() = po.m_openStyles;
4351 po.m_openStyles.clear();
4352 po.m_lastItemWithStyle = item;
4353}
4354
4355//! Make text item.
4356template<class Trait>
4357inline void
4358makeTextObject(const typename Trait::String &text,
4360 long long int startPos,
4361 long long int startLine,
4362 long long int endPos,
4363 long long int endLine,
4364 bool doRemoveSpacesAtEnd = false)
4365{
4366 if (endPos < 0 && endLine - 1 >= 0) {
4367 endPos = po.m_fr.m_data.at(endLine - 1).first.length() - 1;
4368 --endLine;
4369 }
4370
4371 if (endPos == po.m_fr.m_data.at(endLine).first.length() - 1) {
4372 doRemoveSpacesAtEnd = true;
4373 }
4374
4375 auto s = removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(text));
4376
4377 if (doRemoveSpacesAtEnd) {
4378 removeSpacesAtEnd<typename Trait::String>(s);
4379 }
4380
4381 if (startPos == 0) {
4382 if (s.length()) {
4383 const auto p = skipSpaces<Trait>(0, s);
4384
4385 if (p > 0) {
4386 s.remove(0, p);
4387 }
4388 }
4389 }
4390
4391 if (!s.isEmpty()) {
4392 po.m_rawTextData.push_back({text, startPos, startLine});
4393
4394 std::shared_ptr<Text<Trait>> t(new Text<Trait>);
4395 t->setText(s);
4396 t->setOpts(po.m_opts);
4397 t->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
4398 t->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
4399 t->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(endPos, true));
4400 t->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4401
4402 initLastItemWithOpts<Trait>(po, t);
4403
4404 po.m_parent->setEndColumn(t->endColumn());
4405 po.m_parent->setEndLine(t->endLine());
4406
4407 po.m_wasRefLink = false;
4408 po.m_firstInParagraph = false;
4409 po.m_parent->appendItem(t);
4410
4411 po.m_lastText = t;
4412 } else {
4413 po.m_pos = startPos;
4414 }
4415}
4416
4417//! Make text item with line break.
4418template<class Trait>
4419inline void
4420makeTextObjectWithLineBreak(const typename Trait::String &text,
4422 long long int startPos,
4423 long long int startLine,
4424 long long int endPos,
4425 long long int endLine)
4426{
4427 makeTextObject(text, po, startPos, startLine, endPos, endLine, true);
4428
4429 std::shared_ptr<LineBreak<Trait>> hr(new LineBreak<Trait>);
4430 hr->setText(po.m_fr.m_data.at(endLine).first.asString().sliced(endPos + 1));
4431 hr->setStartColumn(po.m_fr.m_data.at(endLine).first.virginPos(endPos + 1));
4432 hr->setStartLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4433 hr->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(po.m_fr.m_data.at(endLine).first.length() - 1));
4434 hr->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4435 po.m_parent->setEndColumn(hr->endColumn());
4436 po.m_parent->setEndLine(hr->endLine());
4437 po.m_wasRefLink = false;
4438 po.m_firstInParagraph = false;
4439 po.m_parent->appendItem(hr);
4440}
4441
4442//! Check for table in paragraph.
4443template<class Trait>
4444inline void
4446 long long int lastLine)
4447{
4448 if (!po.m_opts) {
4449 long long int i = po.m_pos > 0 ? po.m_line + 1 : po.m_line;
4450
4451 for (; i <= lastLine; ++i) {
4452 const auto h = isTableHeader<Trait>(po.m_fr.m_data[i].first.asString());
4453 const auto c = i + 1 < static_cast<long long int>(po.m_fr.m_data.size()) ?
4454 isTableAlignment<Trait>(po.m_fr.m_data[i + 1].first.asString()) : 0;
4455
4456 if (h && c && c == h) {
4458 po.m_startTableLine = i;
4459 po.m_columnsCount = c;
4460 po.m_lastTextLine = i - 1;
4461 po.m_lastTextPos = po.m_fr.m_data[po.m_lastTextLine].first.length();
4462
4463 return;
4464 }
4465 }
4466 }
4467
4468 po.m_lastTextLine = po.m_fr.m_data.size() - 1;
4469 po.m_lastTextPos = po.m_fr.m_data.back().first.length();
4470}
4471
4472//! Make text item.
4473template<class Trait>
4474inline void
4476 // Inclusive. Don't pass lastLine > actual line position with 0 lastPos. Pass as is,
4477 // i.e. if line length is 18 and you need whole line then pass lastLine = index of line,
4478 // and lastPos = 18, or you may crash here if you will pass lastLine = index of line + 1
4479 // and lastPos = 0...
4480 long long int lastLine,
4481 // Not inclusive
4482 long long int lastPos,
4484{
4485 if (po.m_line > lastLine) {
4486 return;
4487 } else if (po.m_line == lastLine && po.m_pos >= lastPos) {
4488 return;
4489 }
4490
4491 typename Trait::String text;
4492
4493 const auto isLastChar = po.m_pos >= po.m_fr.m_data.at(po.m_line).first.length();
4494 long long int startPos = (isLastChar ? 0 : po.m_pos);
4495 long long int startLine = (isLastChar ? po.m_line + 1 : po.m_line);
4496
4497 bool lineBreak =
4498 (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4499 (po.m_line == lastLine ? (lastPos == po.m_fr.m_data.at(po.m_line).first.length() &&
4500 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString())) :
4501 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString())));
4502
4503 // makeTOWLB
4504 auto makeTOWLB = [&]() {
4505 if (po.m_line != (long long int)(po.m_fr.m_data.size() - 1)) {
4506 const auto &line = po.m_fr.m_data.at(po.m_line).first.asString();
4507
4508 makeTextObjectWithLineBreak(text, po, startPos, startLine,
4509 line.length() - lineBreakLength<Trait>(line) - 1, po.m_line);
4510
4511 startPos = 0;
4512 startLine = po.m_line + 1;
4513
4514 text.clear();
4515 }
4516 }; // makeTOWLB
4517
4518 if (lineBreak) {
4519 text.push_back(removeLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString(po.m_pos)));
4520
4521 makeTOWLB();
4522 } else {
4523 const auto length = (po.m_line == lastLine ?
4524 lastPos - po.m_pos : po.m_fr.m_data.at(po.m_line).first.length() - po.m_pos);
4525 const auto s = po.m_fr.m_data.at(po.m_line).first.virginSubString(po.m_pos, length);
4526 text.push_back(s);
4527
4528 po.m_pos = (po.m_line == lastLine ? lastPos : po.m_fr.m_data.at(po.m_line).first.length());
4529
4530 makeTextObject(text,
4531 po,
4532 startPos,
4533 startLine,
4534 po.m_line == lastLine ? lastPos - 1 : po.m_fr.m_data.at(po.m_line).first.length() - 1,
4535 po.m_line);
4536
4537 text.clear();
4538 }
4539
4540 if (po.m_line != lastLine) {
4541 ++po.m_line;
4542 startPos = 0;
4543 startLine = po.m_line;
4544
4545 for (; po.m_line < lastLine; ++po.m_line) {
4546 lineBreak = (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4547 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.asString()));
4548
4549 const auto s = (lineBreak ? removeLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString()) :
4550 po.m_fr.m_data.at(po.m_line).first.virginSubString());
4551 text.push_back(s);
4552
4553 if (lineBreak) {
4554 makeTOWLB();
4555 } else {
4556 makeTextObject(text, po, 0, po.m_line,
4557 po.m_fr.m_data.at(po.m_line).first.length() - 1, po.m_line);
4558 }
4559
4560 text.clear();
4561 }
4562
4563 lineBreak = (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4564 lastPos == po.m_fr.m_data.at(po.m_line).first.length() &&
4565 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.asString()));
4566
4567 auto s = po.m_fr.m_data.at(po.m_line).first.virginSubString(0, lastPos);
4568
4569 po.m_pos = lastPos;
4570
4571 if (!lineBreak) {
4572 text.push_back(s);
4573
4574 makeTextObject(text, po, 0, lastLine, lastPos - 1, lastLine);
4575 } else {
4576 s = removeLineBreak<Trait>(s);
4577 text.push_back(s);
4578
4579 makeTOWLB();
4580 }
4581 }
4582}
4583
4584//! Skip spaces.
4585template<class Trait>
4586inline void
4587skipSpacesInHtml(long long int &l,
4588 long long int &p,
4589 const typename MdBlock<Trait>::Data &fr)
4590{
4591 while (l < (long long int)fr.size()) {
4592 p = skipSpaces<Trait>(p, fr[l].first.asString());
4593
4594 if (p < fr[l].first.length()) {
4595 return;
4596 }
4597
4598 p = 0;
4599 ++l;
4600 }
4601}
4602
4603//! Read HTML attribute value.
4604template<class Trait>
4605inline std::pair<bool, bool>
4607 long long int &p,
4608 const typename MdBlock<Trait>::Data &fr)
4609{
4610 static const typename Trait::String notAllowed = Trait::latin1ToString("\"`=<'");
4611
4612 const auto start = p;
4613
4614 for (; p < fr[l].first.length(); ++p) {
4615 if (fr[l].first[p].isSpace()) {
4616 break;
4617 } else if (notAllowed.contains(fr[l].first[p])) {
4618 return {false, false};
4619 } else if (fr[l].first[p] == Trait::latin1ToChar('>')) {
4620 return {p - start > 0, p - start > 0};
4621 }
4622 }
4623
4624 return {p - start > 0, p - start > 0};
4625}
4626
4627//! Read HTML attribute value.
4628template<class Trait>
4629inline std::pair<bool, bool>
4630readHtmlAttrValue(long long int &l,
4631 long long int &p,
4632 const typename MdBlock<Trait>::Data &fr)
4633{
4634 if (p < fr[l].first.length() && fr[l].first[p] != Trait::latin1ToChar('"') &&
4635 fr[l].first[p] != Trait::latin1ToChar('\'')) {
4636 return readUnquotedHtmlAttrValue<Trait>(l, p, fr);
4637 }
4638
4639 const auto s = fr[l].first[p];
4640
4641 ++p;
4642
4643 if (p >= fr[l].first.length()) {
4644 return {false, false};
4645 }
4646
4647 for (; l < (long long int)fr.size(); ++l) {
4648 bool doBreak = false;
4649
4650 for (; p < fr[l].first.length(); ++p) {
4651 const auto ch = fr[l].first[p];
4652
4653 if (ch == s) {
4654 doBreak = true;
4655
4656 break;
4657 }
4658 }
4659
4660 if (doBreak) {
4661 break;
4662 }
4663
4664 p = 0;
4665 }
4666
4667 if (l >= (long long int)fr.size()) {
4668 return {false, false};
4669 }
4670
4671 if (p >= fr[l].first.length()) {
4672 return {false, false};
4673 }
4674
4675 if (fr[l].first[p] != s) {
4676 return {false, false};
4677 }
4678
4679 ++p;
4680
4681 return {true, true};
4682}
4683
4684//! Read HTML attribute.
4685template<class Trait>
4686inline std::pair<bool, bool>
4687readHtmlAttr(long long int &l,
4688 long long int &p,
4689 const typename MdBlock<Trait>::Data &fr,
4690 bool checkForSpace)
4691{
4692 long long int tl = l, tp = p;
4693
4694 skipSpacesInHtml<Trait>(l, p, fr);
4695
4696 if (l >= (long long int)fr.size()) {
4697 return {false, false};
4698 }
4699
4700 // /
4701 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('/')) {
4702 return {false, true};
4703 }
4704
4705 // >
4706 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('>')) {
4707 return {false, true};
4708 }
4709
4710 if (checkForSpace) {
4711 if (tl == l && tp == p) {
4712 return {false, false};
4713 }
4714 }
4715
4716 const auto start = p;
4717
4718 for (; p < fr[l].first.length(); ++p) {
4719 const auto ch = fr[l].first[p];
4720
4721 if (ch.isSpace() || ch == Trait::latin1ToChar('>') || ch == Trait::latin1ToChar('=')) {
4722 break;
4723 }
4724 }
4725
4726 const typename Trait::String name = fr[l].first.asString().sliced(start, p - start).toLower();
4727
4728 if (!name.startsWith(Trait::latin1ToString("_")) && !name.startsWith(Trait::latin1ToString(":")) &&
4729 !name.isEmpty() && !(name[0].unicode() >= 97 && name[0].unicode() <= 122)) {
4730 return {false, false};
4731 }
4732
4733 static const typename Trait::String allowedInName =
4734 Trait::latin1ToString("abcdefghijklmnopqrstuvwxyz0123456789_.:-");
4735
4736 for (long long int i = 1; i < name.length(); ++i) {
4737 if (!allowedInName.contains(name[i])) {
4738 return {false, false};
4739 }
4740 }
4741
4742 // >
4743 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('>')) {
4744 return {false, true};
4745 }
4746
4747 tl = l;
4748 tp = p;
4749
4750 skipSpacesInHtml<Trait>(l, p, fr);
4751
4752 if (l >= (long long int)fr.size()) {
4753 return {false, false};
4754 }
4755
4756 // =
4757 if (p < fr[l].first.length()) {
4758 if (fr[l].first[p] != Trait::latin1ToChar('=')) {
4759 l = tl;
4760 p = tp;
4761
4762 return {true, true};
4763 } else {
4764 ++p;
4765 }
4766 } else {
4767 return {true, false};
4768 }
4769
4770 skipSpacesInHtml<Trait>(l, p, fr);
4771
4772 if (l >= (long long int)fr.size()) {
4773 return {false, false};
4774 }
4775
4776 return readHtmlAttrValue<Trait>(l, p, fr);
4777}
4778
4779//! \return Is HTML tag at the given position?
4780template<class Trait>
4781inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
4782isHtmlTag(long long int line, long long int pos, TextParsingOpts<Trait> &po, int rule);
4783
4784//! \return Is after the given position only HTML tags?
4785template<class Trait>
4786inline bool
4788 long long int pos,
4790 int rule)
4791{
4792 static const std::set<typename Trait::String> s_rule1Finish = {Trait::latin1ToString("/pre"),
4793 Trait::latin1ToString("/script"),
4794 Trait::latin1ToString("/style"),
4795 Trait::latin1ToString("/textarea")};
4796
4797 auto p = skipSpaces<Trait>(pos, po.m_fr.m_data[line].first.asString());
4798
4799 while (p < po.m_fr.m_data[line].first.length()) {
4800 bool ok = false;
4801
4802 long long int l;
4803 typename Trait::String tag;
4804
4805 std::tie(ok, l, p, std::ignore, tag) = isHtmlTag(line, p, po, rule);
4806
4807 ++p;
4808
4809 if (rule != 1) {
4810 if (!ok) {
4811 return false;
4812 }
4813
4814 if (l > line) {
4815 return true;
4816 }
4817 } else {
4818 if (s_rule1Finish.find(tag.toLower()) != s_rule1Finish.cend() && l == line) {
4819 return true;
4820 }
4821
4822 if (l > line) {
4823 return false;
4824 }
4825 }
4826
4827 p = skipSpaces<Trait>(p, po.m_fr.m_data[line].first.asString());
4828 }
4829
4830 if (p >= po.m_fr.m_data[line].first.length()) {
4831 return true;
4832 }
4833
4834 return false;
4835}
4836
4837//! \return Is setext heading in the lines?
4838template<class Trait>
4839inline bool
4841 long long int startLine,
4842 long long int endLine)
4843{
4844 for (; startLine <= endLine; ++startLine) {
4845 const auto pos = skipSpaces<Trait>(0, po.m_fr.m_data.at(startLine).first.asString());
4846 const auto line = po.m_fr.m_data.at(startLine).first.asString().sliced(pos);
4847
4848 if ((isH1<Trait>(line) || isH2<Trait>(line)) && pos < 4) {
4849 return true;
4850 }
4851 }
4852
4853 return false;
4854}
4855
4856//! \return Is HTML tag at the given position?
4857template<class Trait>
4858inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
4859isHtmlTag(long long int line,
4860 long long int pos,
4862 int rule)
4863{
4864 if (po.m_fr.m_data[line].first[pos] != Trait::latin1ToChar('<')) {
4865 return {false, line, pos, false, {}};
4866 }
4867
4868 typename Trait::String tag;
4869
4870 long long int l = line;
4871 long long int p = pos + 1;
4872 bool first = false;
4873
4874 {
4875 const auto tmp = skipSpaces<Trait>(0, po.m_fr.m_data[l].first.asString());
4876 first = (tmp == pos);
4877 }
4878
4879 if (p >= po.m_fr.m_data[l].first.length()) {
4880 return {false, line, pos, first, tag};
4881 }
4882
4883 bool closing = false;
4884
4885 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
4886 closing = true;
4887
4888 tag.push_back(Trait::latin1ToChar('/'));
4889
4890 ++p;
4891 }
4892
4893 const auto start = p;
4894
4895 // tag
4896 for (; p < po.m_fr.m_data[l].first.length(); ++p) {
4897 const auto ch = po.m_fr.m_data[l].first[p];
4898
4899 if (ch.isSpace() || ch == Trait::latin1ToChar('>') || ch == Trait::latin1ToChar('/')) {
4900 break;
4901 }
4902 }
4903
4904 tag.push_back(po.m_fr.m_data[l].first.asString().sliced(start, p - start));
4905
4906 if (p < po.m_fr.m_data[l].first.length() && po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
4907 if (p + 1 < po.m_fr.m_data[l].first.length() &&
4908 po.m_fr.m_data[l].first[p + 1] == Trait::latin1ToChar('>')) {
4909 long long int tmp = 0;
4910
4911 if (rule == 7) {
4912 tmp = skipSpaces<Trait>(p + 2, po.m_fr.m_data[l].first.asString());
4913 }
4914
4915 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
4916 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 2, po, rule == 1)));
4917
4918 if (!isSetextHeadingBetween(po, line, l)) {
4919 return {true, l, p + 1, onLine, tag};
4920 } else {
4921 return {false, line, pos, first, tag};
4922 }
4923 } else {
4924 return {false, line, pos, first, tag};
4925 }
4926 }
4927
4928 if (p < po.m_fr.m_data[l].first.length() && po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
4929 long long int tmp = 0;
4930
4931 if (rule == 7) {
4932 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
4933 }
4934
4935 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
4936 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
4937
4938 if (!isSetextHeadingBetween(po, line, l)) {
4939 return {true, l, p, onLine, tag};
4940 } else {
4941 return {false, line, pos, first, tag};
4942 }
4943 }
4944
4945 skipSpacesInHtml<Trait>(l, p, po.m_fr.m_data);
4946
4947 if (l >= (long long int)po.m_fr.m_data.size()) {
4948 return {false, line, pos, first, tag};
4949 }
4950
4951 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
4952 long long int tmp = 0;
4953
4954 if (rule == 7) {
4955 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
4956 }
4957
4958 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
4959 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
4960
4961 if (!isSetextHeadingBetween(po, line, l)) {
4962 return {true, l, p, onLine, tag};
4963 } else {
4964 return {false, line, pos, first, tag};
4965 }
4966 }
4967
4968 bool attr = true;
4969 bool firstAttr = true;
4970
4971 while (attr) {
4972 bool ok = false;
4973
4974 std::tie(attr, ok) = readHtmlAttr<Trait>(l, p, po.m_fr.m_data, !firstAttr);
4975
4976 firstAttr = false;
4977
4978 if (closing && attr) {
4979 return {false, line, pos, first, tag};
4980 }
4981
4982 if (!ok) {
4983 return {false, line, pos, first, tag};
4984 }
4985 }
4986
4987 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
4988 ++p;
4989 } else {
4990 skipSpacesInHtml<Trait>(l, p, po.m_fr.m_data);
4991
4992 if (l >= (long long int)po.m_fr.m_data.size()) {
4993 return {false, line, pos, first, tag};
4994 }
4995 }
4996
4997 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
4998 long long int tmp = 0;
4999
5000 if (rule == 7) {
5001 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
5002 }
5003
5004 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5005 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
5006
5007 if (!isSetextHeadingBetween(po, line, l)) {
5008 return {true, l, p, onLine, tag};
5009 } else {
5010 return {false, line, pos, first, tag};
5011 }
5012 }
5013
5014 return {false, line, pos, first, {}};
5015}
5016
5017//! Read HTML tag.
5018template<class Trait>
5019inline std::pair<typename Trait::String, bool>
5020Parser<Trait>::readHtmlTag(typename Delims::const_iterator it,
5021 TextParsingOpts<Trait> &po)
5022{
5023 long long int i = it->m_pos + 1;
5024 const auto start = i;
5025
5026 if (start >= po.m_fr.m_data[it->m_line].first.length()) {
5027 return {{}, false};
5028 }
5029
5030 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5031 const auto ch = po.m_fr.m_data[it->m_line].first[i];
5032
5033 if (ch.isSpace() || ch == Trait::latin1ToChar('>')) {
5034 break;
5035 }
5036 }
5037
5038 return {po.m_fr.m_data[it->m_line].first.asString().sliced(start, i - start),
5039 i < po.m_fr.m_data[it->m_line].first.length() ?
5040 po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('>') : false};
5041}
5042
5043template<class Trait>
5044inline typename Parser<Trait>::Delims::const_iterator
5045Parser<Trait>::findIt(typename Delims::const_iterator it,
5046 typename Delims::const_iterator last,
5047 TextParsingOpts<Trait> &po)
5048{
5049 auto ret = it;
5050
5051 for (; it != last; ++it) {
5052 if ((it->m_line == po.m_line && it->m_pos < po.m_pos) || it->m_line < po.m_line) {
5053 ret = it;
5054 } else {
5055 break;
5056 }
5057 }
5058
5059 return ret;
5060}
5061
5062//! Read HTML data.
5063template<class Trait>
5064inline void
5065eatRawHtml(long long int line,
5066 long long int pos,
5067 long long int toLine,
5068 long long int toPos,
5070 bool finish,
5071 int htmlRule,
5072 bool onLine,
5073 bool continueEating = false)
5074{
5075 if (line <= toLine) {
5076 typename Trait::String h = po.m_html.m_html->text();
5077
5078 if (!h.isEmpty() && !continueEating) {
5079 for (long long int i = 0; i < po.m_fr.m_emptyLinesBefore; ++i) {
5080 h.push_back(Trait::latin1ToChar('\n'));
5081 }
5082 }
5083
5084 const auto first = po.m_fr.m_data[line].first.asString().sliced(
5085 pos,
5086 (line == toLine ? (toPos >= 0 ? toPos - pos : po.m_fr.m_data[line].first.length() - pos) :
5087 po.m_fr.m_data[line].first.length() - pos));
5088
5089 if (!h.isEmpty() && !first.isEmpty() && po.m_html.m_html->endLine() != po.m_fr.m_data[line].second.m_lineNumber) {
5090 h.push_back(Trait::latin1ToChar('\n'));
5091 }
5092
5093 if (!first.isEmpty()) {
5094 h.push_back(first);
5095 }
5096
5097 ++line;
5098
5099 for (; line < toLine; ++line) {
5100 h.push_back(Trait::latin1ToChar('\n'));
5101 h.push_back(po.m_fr.m_data[line].first.asString());
5102 }
5103
5104 if (line == toLine && toPos != 0) {
5105 h.push_back(Trait::latin1ToChar('\n'));
5106 h.push_back(po.m_fr.m_data[line].first.asString().sliced(0, toPos > 0 ?
5107 toPos : po.m_fr.m_data[line].first.length()));
5108 }
5109
5110 auto endColumn = toPos;
5111 auto endLine = toLine;
5112
5113 if (endColumn == 0 && endLine > 0) {
5114 --endLine;
5115 endColumn = po.m_fr.m_data.at(endLine).first.length();
5116 }
5117
5118 po.m_html.m_html->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(endColumn >= 0 ?
5119 endColumn - 1 : po.m_fr.m_data.at(endLine).first.length() - 1));
5120 po.m_html.m_html->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
5121
5122 po.m_line = (toPos >= 0 ? toLine : toLine + 1);
5123 po.m_pos = (toPos >= 0 ? toPos : 0);
5124
5125 if (po.m_line + 1 < static_cast<long long int>(po.m_fr.m_data.size()) &&
5126 po.m_pos >= po.m_fr.m_data.at(po.m_line).first.length()) {
5127 ++po.m_line;
5128 po.m_pos = 0;
5129 }
5130
5131 po.m_html.m_html->setText(h);
5132 }
5133
5135
5136 if (finish) {
5137 if (po.m_html.m_onLine || htmlRule == 7 || po.m_line < (long long int)po.m_fr.m_data.size()) {
5138 if (!po.m_collectRefLinks) {
5139 po.m_parent->appendItem(po.m_html.m_html);
5140 po.m_parent->setEndColumn(po.m_html.m_html->endColumn());
5141 po.m_parent->setEndLine(po.m_html.m_html->endLine());
5142 initLastItemWithOpts<Trait>(po, po.m_html.m_html);
5143 po.m_html.m_html->setOpts(po.m_opts);
5144 po.m_lastText = nullptr;
5145 } else {
5146 po.m_tmpHtml = po.m_html.m_html;
5147 }
5148
5149 resetHtmlTag(po.m_html);
5150 }
5151 } else {
5152 po.m_html.m_continueHtml = true;
5153 }
5154}
5155
5156template<class Trait>
5157inline bool
5158Parser<Trait>::isNewBlockIn(MdBlock<Trait> &fr,
5159 long long int startLine,
5160 long long int endLine)
5161{
5162 for (auto i = startLine + 1; i <= endLine; ++i) {
5163 const auto type = whatIsTheLine(fr.m_data[i].first);
5164
5165 switch (type) {
5166 case Parser<Trait>::BlockType::Footnote:
5167 case Parser<Trait>::BlockType::FensedCodeInList:
5168 case Parser<Trait>::BlockType::SomethingInList:
5169 case Parser<Trait>::BlockType::List:
5170 case Parser<Trait>::BlockType::ListWithFirstEmptyLine:
5171 case Parser<Trait>::BlockType::Code:
5172 case Parser<Trait>::BlockType::Blockquote:
5173 case Parser<Trait>::BlockType::Heading:
5174 case Parser<Trait>::BlockType::EmptyLine:
5175 return true;
5176
5177 default:
5178 break;
5179 }
5180
5181 const auto ns = skipSpaces<Trait>(0, fr.m_data[i].first.asString());
5182
5183 if (ns < 4) {
5184 const auto s = fr.m_data[i].first.asString().sliced(ns);
5185
5186 if (isHorizontalLine<Trait>(s) || isH1<Trait>(s) || isH2<Trait>(s)) {
5187 return true;
5188 }
5189 }
5190 }
5191
5192 return false;
5193}
5194
5195template<class Trait>
5196inline void
5197Parser<Trait>::finishRule1HtmlTag(typename Delims::const_iterator it,
5198 typename Delims::const_iterator last,
5199 TextParsingOpts<Trait> &po,
5200 bool skipFirst)
5201{
5202 static const std::set<typename Trait::String> s_finish = {Trait::latin1ToString("/pre"),
5203 Trait::latin1ToString("/script"),
5204 Trait::latin1ToString("/style"),
5205 Trait::latin1ToString("/textarea")};
5206
5207 if (it != last) {
5208 bool ok = false;
5209 long long int l = -1, p = -1;
5210
5211 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less && skipFirst) {
5212 std::tie(ok, l, p, po.m_html.m_onLine, std::ignore) =
5213 isHtmlTag(it->m_line, it->m_pos, po, 1);
5214 }
5215
5216 if (po.m_html.m_onLine) {
5217 for (it = (skipFirst && it != last ? std::next(it) : it); it != last; ++it) {
5218 if (it->m_type == Delimiter::Less) {
5219 typename Trait::String tag;
5220 bool closed = false;
5221
5222 std::tie(tag, closed) = readHtmlTag(it, po);
5223
5224 if (closed) {
5225 if (s_finish.find(tag.toLower()) != s_finish.cend()) {
5226 eatRawHtml(po.m_line, po.m_pos, it->m_line, -1, po,
5227 true, 1, po.m_html.m_onLine);
5228
5229 return;
5230 }
5231 }
5232 }
5233 }
5234 } else if (ok && !isNewBlockIn(po.m_fr, it->m_line, l)) {
5235 eatRawHtml(po.m_line, po.m_pos, l, p + 1, po, true, 1, false);
5236
5237 return;
5238 } else {
5239 resetHtmlTag(po.m_html);
5240
5241 return;
5242 }
5243 }
5244
5245 if (po.m_html.m_onLine) {
5246 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 1, po.m_html.m_onLine);
5247 } else {
5248 resetHtmlTag(po.m_html);
5249 }
5250}
5251
5252template<class Trait>
5253inline void
5254Parser<Trait>::finishRule2HtmlTag(typename Delims::const_iterator it,
5255 typename Delims::const_iterator last,
5256 TextParsingOpts<Trait> &po)
5257{
5258 if (it != last) {
5259 const auto start = it;
5260
5261 MdLineData::CommentData commentData = {2, true};
5262 bool onLine = po.m_html.m_onLine;
5263
5264 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5265 long long int i = po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos);
5266
5267 commentData = po.m_fr.m_data[it->m_line].second.m_htmlCommentData[i];
5268
5269 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5270 po.m_html.m_onLine = onLine;
5271 }
5272
5273 if (commentData.first != -1 && commentData.second) {
5274 for (; it != last; ++it) {
5275 if (it->m_type == Delimiter::Greater) {
5276 auto p = it->m_pos;
5277
5278 bool doContinue = false;
5279
5280 for (char i = 0; i < commentData.first; ++i) {
5281 if (!(p > 0 && po.m_fr.m_data[it->m_line].first[p - 1] == Trait::latin1ToChar('-'))) {
5282 doContinue = true;
5283
5284 break;
5285 }
5286
5287 --p;
5288 }
5289
5290 if (doContinue) {
5291 continue;
5292 }
5293
5294 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5295 eatRawHtml(po.m_line, po.m_pos, it->m_line,
5296 onLine ? po.m_fr.m_data[it->m_line].first.length() : it->m_pos + 1,
5297 po, true, 2, onLine);
5298 } else {
5299 resetHtmlTag(po.m_html);
5300 }
5301
5302 return;
5303 }
5304 }
5305 }
5306 }
5307
5308 if (po.m_html.m_onLine) {
5309 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 2, po.m_html.m_onLine);
5310 } else {
5311 resetHtmlTag(po.m_html);
5312 }
5313}
5314
5315template<class Trait>
5316inline void
5317Parser<Trait>::finishRule3HtmlTag(typename Delims::const_iterator it,
5318 typename Delims::const_iterator last,
5319 TextParsingOpts<Trait> &po)
5320{
5321 bool onLine = po.m_html.m_onLine;
5322
5323 if (it != last) {
5324 const auto start = it;
5325
5326 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5327 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5328 po.m_html.m_onLine = onLine;
5329 }
5330
5331 for (; it != last; ++it) {
5332 if (it->m_type == Delimiter::Greater) {
5333 if (it->m_pos > 0 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar('?')) {
5334 long long int i = it->m_pos + 1;
5335
5336 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5337 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5338 break;
5339 }
5340 }
5341
5342 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5343 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 3, onLine);
5344 } else {
5345 resetHtmlTag(po.m_html);
5346 }
5347
5348 return;
5349 }
5350 }
5351 }
5352 }
5353
5354 if (po.m_html.m_onLine) {
5355 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 3, onLine);
5356 } else {
5357 resetHtmlTag(po.m_html);
5358 }
5359}
5360
5361template<class Trait>
5362inline void
5363Parser<Trait>::finishRule4HtmlTag(typename Delims::const_iterator it,
5364 typename Delims::const_iterator last,
5365 TextParsingOpts<Trait> &po)
5366{
5367 if (it != last) {
5368 const auto start = it;
5369
5370 bool onLine = po.m_html.m_onLine;
5371
5372 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5373 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5374 po.m_html.m_onLine = onLine;
5375 }
5376
5377 for (; it != last; ++it) {
5378 if (it->m_type == Delimiter::Greater) {
5379 long long int i = it->m_pos + 1;
5380
5381 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5382 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5383 break;
5384 }
5385 }
5386
5387 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5388 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 4, onLine);
5389 } else {
5390 resetHtmlTag(po.m_html);
5391 }
5392
5393 return;
5394 }
5395 }
5396 }
5397
5398 if (po.m_html.m_onLine) {
5399 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 4, true);
5400 } else {
5401 resetHtmlTag(po.m_html);
5402 }
5403}
5404
5405template<class Trait>
5406inline void
5407Parser<Trait>::finishRule5HtmlTag(typename Delims::const_iterator it,
5408 typename Delims::const_iterator last,
5409 TextParsingOpts<Trait> &po)
5410{
5411 if (it != last) {
5412 const auto start = it;
5413
5414 bool onLine = po.m_html.m_onLine;
5415
5416 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5417 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5418 po.m_html.m_onLine = onLine;
5419 }
5420
5421 for (; it != last; ++it) {
5422 if (it->m_type == Delimiter::Greater) {
5423 if (it->m_pos > 1 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar(']') &&
5424 po.m_fr.m_data[it->m_line].first[it->m_pos - 2] == Trait::latin1ToChar(']')) {
5425 long long int i = it->m_pos + 1;
5426
5427 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5428 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5429 break;
5430 }
5431 }
5432
5433 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5434 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 5, onLine);
5435 } else {
5436 resetHtmlTag(po.m_html);
5437 }
5438
5439 return;
5440 }
5441 }
5442 }
5443 }
5444
5445 if (po.m_html.m_onLine) {
5446 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 5, true);
5447 } else {
5448 resetHtmlTag(po.m_html);
5449 }
5450}
5451
5452template<class Trait>
5453inline void
5454Parser<Trait>::finishRule6HtmlTag(typename Delims::const_iterator it,
5455 typename Delims::const_iterator last,
5456 TextParsingOpts<Trait> &po)
5457{
5458 po.m_html.m_onLine = (it != last ?
5459 it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()) : true);
5460
5461 if (po.m_html.m_onLine) {
5462 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
5463 false, 6, po.m_html.m_onLine);
5464 } else {
5465 const auto nit = std::find_if(std::next(it), last, [](const auto &d) {
5466 return (d.m_type == Delimiter::Greater);
5467 });
5468
5469 if (nit != last && !isNewBlockIn(po.m_fr, it->m_line, nit->m_line)) {
5470 eatRawHtml(po.m_line, po.m_pos, nit->m_line, nit->m_pos + nit->m_len, po,
5471 true, 6, false);
5472 }
5473 }
5474
5475 if (po.m_fr.m_emptyLineAfter && po.m_html.m_html) {
5476 po.m_html.m_continueHtml = false;
5477 }
5478}
5479
5480template<class Trait>
5481inline typename Parser<Trait>::Delims::const_iterator
5482Parser<Trait>::finishRawHtmlTag(typename Delims::const_iterator it,
5483 typename Delims::const_iterator last,
5484 TextParsingOpts<Trait> &po,
5485 bool skipFirst)
5486{
5487 po.m_detected = TextParsingOpts<Trait>::Detected::HTML;
5488
5489 switch (po.m_html.m_htmlBlockType) {
5490 case 1:
5491 finishRule1HtmlTag(it, last, po, skipFirst);
5492 break;
5493
5494 case 2:
5495 finishRule2HtmlTag(it, last, po);
5496 break;
5497
5498 case 3:
5499 finishRule3HtmlTag(it, last, po);
5500 break;
5501
5502 case 4:
5503 finishRule4HtmlTag(it, last, po);
5504 break;
5505
5506 case 5:
5507 finishRule5HtmlTag(it, last, po);
5508 break;
5509
5510 case 6:
5511 finishRule6HtmlTag(it, last, po);
5512 break;
5513
5514 case 7:
5515 return finishRule7HtmlTag(it, last, po);
5516
5517 default:
5518 po.m_detected = TextParsingOpts<Trait>::Detected::Nothing;
5519 break;
5520 }
5521
5522 return findIt(it, last, po);
5523}
5524
5525template<class Trait>
5526inline int
5527Parser<Trait>::htmlTagRule(typename Delims::const_iterator it,
5528 typename Delims::const_iterator last,
5529 TextParsingOpts<Trait> &po)
5530{
5531 MD_UNUSED(last)
5532
5533 typename Trait::String tag;
5534
5535 std::tie(tag, std::ignore) = readHtmlTag(it, po);
5536
5537 if (tag.startsWith(Trait::latin1ToString("![CDATA["))) {
5538 return 5;
5539 }
5540
5541 tag = tag.toLower();
5542
5543 static const typename Trait::String s_validHtmlTagLetters =
5544 Trait::latin1ToString("abcdefghijklmnopqrstuvwxyz0123456789-");
5545
5546 bool closing = false;
5547
5548 if (tag.startsWith(Trait::latin1ToString("/"))) {
5549 tag.remove(0, 1);
5550 closing = true;
5551 }
5552
5553 if (tag.endsWith(Trait::latin1ToString("/"))) {
5554 tag.remove(tag.size() - 1, 1);
5555 }
5556
5557 if (tag.isEmpty()) {
5558 return -1;
5559 }
5560
5561 if (!tag.startsWith(Trait::latin1ToString("!")) &&
5562 !tag.startsWith(Trait::latin1ToString("?")) &&
5563 !(tag[0].unicode() >= 97 && tag[0].unicode() <= 122)) {
5564 return -1;
5565 }
5566
5567 static const std::set<typename Trait::String> s_rule1 = {Trait::latin1ToString("pre"),
5568 Trait::latin1ToString("script"),
5569 Trait::latin1ToString("style"),
5570 Trait::latin1ToString("textarea")};
5571
5572 if (!closing && s_rule1.find(tag) != s_rule1.cend()) {
5573 return 1;
5574 } else if (tag.startsWith(Trait::latin1ToString("!--"))) {
5575 return 2;
5576 } else if (tag.startsWith(Trait::latin1ToString("?"))) {
5577 return 3;
5578 } else if (tag.startsWith(Trait::latin1ToString("!")) && tag.size() > 1 &&
5579 ((tag[1].unicode() >= 65 && tag[1].unicode() <= 90) ||
5580 (tag[1].unicode() >= 97 && tag[1].unicode() <= 122))) {
5581 return 4;
5582 } else {
5583 static const std::set<typename Trait::String> s_rule6 = {
5584 Trait::latin1ToString("address"), Trait::latin1ToString("article"), Trait::latin1ToString("aside"), Trait::latin1ToString("base"),
5585 Trait::latin1ToString("basefont"), Trait::latin1ToString("blockquote"), Trait::latin1ToString("body"), Trait::latin1ToString("caption"),
5586 Trait::latin1ToString("center"), Trait::latin1ToString("col"), Trait::latin1ToString("colgroup"), Trait::latin1ToString("dd"),
5587 Trait::latin1ToString("details"), Trait::latin1ToString("dialog"), Trait::latin1ToString("dir"), Trait::latin1ToString("div"),
5588 Trait::latin1ToString("dl"), Trait::latin1ToString("dt"), Trait::latin1ToString("fieldset"), Trait::latin1ToString("figcaption"),
5589 Trait::latin1ToString("figure"), Trait::latin1ToString("footer"), Trait::latin1ToString("form"), Trait::latin1ToString("frame"),
5590 Trait::latin1ToString("frameset"), Trait::latin1ToString("h1"), Trait::latin1ToString("h2"), Trait::latin1ToString("h3"),
5591 Trait::latin1ToString("h4"), Trait::latin1ToString("h5"), Trait::latin1ToString("h6"), Trait::latin1ToString("head"),
5592 Trait::latin1ToString("header"), Trait::latin1ToString("hr"), Trait::latin1ToString("html"), Trait::latin1ToString("iframe"),
5593 Trait::latin1ToString("legend"), Trait::latin1ToString("li"), Trait::latin1ToString("link"), Trait::latin1ToString("main"),
5594 Trait::latin1ToString("menu"), Trait::latin1ToString("menuitem"), Trait::latin1ToString("nav"), Trait::latin1ToString("noframes"),
5595 Trait::latin1ToString("ol"), Trait::latin1ToString("optgroup"), Trait::latin1ToString("option"), Trait::latin1ToString("p"),
5596 Trait::latin1ToString("param"), Trait::latin1ToString("section"), Trait::latin1ToString("search"), Trait::latin1ToString("summary"),
5597 Trait::latin1ToString("table"), Trait::latin1ToString("tbody"), Trait::latin1ToString("td"), Trait::latin1ToString("tfoot"),
5598 Trait::latin1ToString("th"), Trait::latin1ToString("thead"), Trait::latin1ToString("title"), Trait::latin1ToString("tr"),
5599 Trait::latin1ToString("track"), Trait::latin1ToString("ul")};
5600
5601 for (long long int i = 1; i < tag.size(); ++i) {
5602 if (!s_validHtmlTagLetters.contains(tag[i])) {
5603 return -1;
5604 }
5605 }
5606
5607 if (s_rule6.find(tag) != s_rule6.cend()) {
5608 return 6;
5609 } else {
5610 bool tag = false;
5611
5612 std::tie(tag, std::ignore, std::ignore, std::ignore, std::ignore) =
5613 isHtmlTag(it->m_line, it->m_pos, po, 7);
5614
5615 if (tag) {
5616 return 7;
5617 }
5618 }
5619 }
5620
5621 return -1;
5622}
5623
5624template<class Trait>
5625inline typename Parser<Trait>::Delims::const_iterator
5626Parser<Trait>::checkForRawHtml(typename Delims::const_iterator it,
5627 typename Delims::const_iterator last,
5628 TextParsingOpts<Trait> &po)
5629{
5630 const auto rule = htmlTagRule(it, last, po);
5631
5632 if (rule == -1) {
5633 resetHtmlTag(po.m_html);
5634
5635 po.m_firstInParagraph = false;
5636
5637 return it;
5638 }
5639
5640 po.m_html.m_htmlBlockType = rule;
5641 po.m_html.m_html.reset(new RawHtml<Trait>);
5642 po.m_html.m_html->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
5643 po.m_html.m_html->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
5644
5645 return finishRawHtmlTag(it, last, po, true);
5646}
5647
5648template<class Trait>
5649inline typename Parser<Trait>::Delims::const_iterator
5650Parser<Trait>::finishRule7HtmlTag(typename Delims::const_iterator it,
5651 typename Delims::const_iterator last,
5652 TextParsingOpts<Trait> &po)
5653{
5654 if (it != last) {
5655 const auto start = it;
5656 long long int l = -1, p = -1;
5657 bool onLine = false;
5658 bool ok = false;
5659
5660 std::tie(ok, l, p, onLine, std::ignore) = isHtmlTag(it->m_line, it->m_pos, po, 7);
5661
5662 onLine = onLine && it->m_line == 0 && l == start->m_line;
5663
5664 if (ok) {
5665 eatRawHtml(po.m_line, po.m_pos, l, ++p, po, !onLine, 7, onLine);
5666
5667 po.m_html.m_onLine = onLine;
5668
5669 it = findIt(it, last, po);
5670
5671 if (onLine) {
5672 for (; it != last; ++it) {
5673 if (it->m_type == Delimiter::Less) {
5674 const auto rule = htmlTagRule(it, last, po);
5675
5676 if (rule != -1 && rule != 7) {
5677 eatRawHtml(po.m_line, po.m_pos, it->m_line, it->m_pos, po, true, 7, onLine, true);
5678
5679 return std::prev(it);
5680 }
5681 }
5682 }
5683
5684 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 7, onLine, true);
5685
5686 return std::prev(last);
5687 } else {
5688 return it;
5689 }
5690 } else {
5691 return it;
5692 }
5693 } else {
5694 if (po.m_html.m_onLine) {
5695 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, true, 7, true);
5696
5697 return last;
5698 } else {
5699 resetHtmlTag(po.m_html);
5700 }
5701 }
5702
5703 return it;
5704}
5705
5706template<class Trait>
5707inline typename Parser<Trait>::Delims::const_iterator
5708Parser<Trait>::checkForMath(typename Delims::const_iterator it,
5709 typename Delims::const_iterator last,
5710 TextParsingOpts<Trait> &po)
5711{
5712 po.m_wasRefLink = false;
5713 po.m_firstInParagraph = false;
5714
5715 const auto end = std::find_if(std::next(it), last, [&](const auto &d) {
5716 return (d.m_type == Delimiter::Math && d.m_len == it->m_len);
5717 });
5718
5719 if (end != last && end->m_line <= po.m_lastTextLine) {
5720 typename Trait::String math;
5721
5722 if (it->m_line == end->m_line) {
5723 math = po.m_fr.m_data[it->m_line].first.asString().sliced(
5724 it->m_pos + it->m_len, end->m_pos - (it->m_pos + it->m_len));
5725 } else {
5726 math = po.m_fr.m_data[it->m_line].first.asString().sliced(it->m_pos + it->m_len);
5727
5728 for (long long int i = it->m_line + 1; i < end->m_line; ++i) {
5729 math.push_back(Trait::latin1ToChar('\n'));
5730 math.push_back(po.m_fr.m_data[i].first.asString());
5731 }
5732
5733 math.push_back(Trait::latin1ToChar('\n'));
5734 math.push_back(po.m_fr.m_data[end->m_line].first.asString().sliced(0, end->m_pos));
5735 }
5736
5737 if (!po.m_collectRefLinks) {
5738 std::shared_ptr<Math<Trait>> m(new Math<Trait>);
5739
5740 auto startLine = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
5741 auto startColumn = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len);
5742
5743 if (it->m_pos + it->m_len >= po.m_fr.m_data.at(it->m_line).first.length()) {
5744 std::tie(startColumn, startLine) = nextPosition(po.m_fr, startColumn, startLine);
5745 }
5746
5747 auto endColumn = po.m_fr.m_data.at(end->m_line).first.virginPos(end->m_pos);
5748 auto endLine = po.m_fr.m_data.at(end->m_line).second.m_lineNumber;
5749
5750 if (endColumn == 0) {
5751 std::tie(endColumn, endLine) = prevPosition(po.m_fr, endColumn, endLine);
5752 } else {
5753 --endColumn;
5754 }
5755
5756 m->setStartColumn(startColumn);
5757 m->setStartLine(startLine);
5758 m->setEndColumn(endColumn);
5759 m->setEndLine(endLine);
5760 m->setInline(it->m_len == 1);
5761 m->setStartDelim({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
5762 po.m_fr.m_data[it->m_line].second.m_lineNumber,
5763 po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
5764 po.m_fr.m_data[it->m_line].second.m_lineNumber});
5765 m->setEndDelim({po.m_fr.m_data[end->m_line].first.virginPos(end->m_pos),
5766 po.m_fr.m_data[end->m_line].second.m_lineNumber,
5767 po.m_fr.m_data[end->m_line].first.virginPos(end->m_pos + end->m_len - 1),
5768 po.m_fr.m_data[end->m_line].second.m_lineNumber});
5769 m->setFensedCode(false);
5770
5771 initLastItemWithOpts<Trait>(po, m);
5772
5773 if (math.startsWith(Trait::latin1ToString("`")) &&
5774 math.endsWith(Trait::latin1ToString("`")) &&
5775 !math.endsWith(Trait::latin1ToString("\\`")) &&
5776 math.length() > 1) {
5777 math = math.sliced(1, math.length() - 2);
5778 }
5779
5780 m->setExpr(math);
5781
5782 po.m_parent->appendItem(m);
5783
5784 po.m_pos = end->m_pos + end->m_len;
5785 po.m_line = end->m_line;
5786 po.m_lastText = nullptr;
5787 }
5788
5789 return end;
5790 }
5791
5792 return it;
5793}
5794
5795template<class Trait>
5796inline typename Parser<Trait>::Delims::const_iterator
5797Parser<Trait>::checkForAutolinkHtml(typename Delims::const_iterator it,
5798 typename Delims::const_iterator last,
5799 TextParsingOpts<Trait> &po,
5800 bool updatePos)
5801{
5802 const auto nit = std::find_if(std::next(it), last, [](const auto &d) {
5803 return (d.m_type == Delimiter::Greater);
5804 });
5805
5806 if (nit != last) {
5807 if (nit->m_line == it->m_line) {
5808 const auto url = po.m_fr.m_data.at(it->m_line).first.asString().sliced(
5809 it->m_pos + 1, nit->m_pos - it->m_pos - 1);
5810
5811 bool isUrl = true;
5812
5813 for (long long int i = 0; i < url.size(); ++i) {
5814 if (url[i].isSpace()) {
5815 isUrl = false;
5816
5817 break;
5818 }
5819 }
5820
5821 if (isUrl) {
5822 if (!isValidUrl<Trait>(url) && !isEmail<Trait>(url)) {
5823 isUrl = false;
5824 }
5825 }
5826
5827 if (isUrl) {
5828 if (!po.m_collectRefLinks) {
5829 std::shared_ptr<Link<Trait>> lnk(new Link<Trait>);
5830 lnk->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
5831 lnk->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
5832 lnk->setEndColumn(po.m_fr.m_data.at(nit->m_line).first.virginPos(nit->m_pos + nit->m_len - 1));
5833 lnk->setEndLine(po.m_fr.m_data.at(nit->m_line).second.m_lineNumber);
5834 lnk->setUrl(url);
5835 lnk->setOpts(po.m_opts);
5836 lnk->setTextPos({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + 1),
5837 po.m_fr.m_data[it->m_line].second.m_lineNumber,
5838 po.m_fr.m_data[nit->m_line].first.virginPos(nit->m_pos - 1),
5839 po.m_fr.m_data[nit->m_line].second.m_lineNumber});
5840 lnk->setUrlPos(lnk->textPos());
5841 po.m_parent->appendItem(lnk);
5842 }
5843
5844 po.m_wasRefLink = false;
5845 po.m_firstInParagraph = false;
5846 po.m_lastText = nullptr;
5847
5848 if (updatePos) {
5849 po.m_pos = nit->m_pos + nit->m_len;
5850 po.m_line = nit->m_line;
5851 }
5852
5853 return nit;
5854 } else {
5855 return checkForRawHtml(it, last, po);
5856 }
5857 } else {
5858 return checkForRawHtml(it, last, po);
5859 }
5860 } else {
5861 return checkForRawHtml(it, last, po);
5862 }
5863}
5864
5865template<class Trait>
5866inline void
5867Parser<Trait>::makeInlineCode(long long int startLine,
5868 long long int startPos,
5869 long long int lastLine,
5870 long long int lastPos,
5871 TextParsingOpts<Trait> &po,
5872 typename Delims::const_iterator startDelimIt,
5873 typename Delims::const_iterator endDelimIt)
5874{
5875 typename Trait::String c;
5876
5877 for (; po.m_line <= lastLine; ++po.m_line) {
5878 c.push_back(po.m_fr.m_data.at(po.m_line).first.asString().sliced(
5879 po.m_pos, (po.m_line == lastLine ? lastPos - po.m_pos :
5880 po.m_fr.m_data.at(po.m_line).first.length() - po.m_pos)));
5881
5882 if (po.m_line < lastLine) {
5883 c.push_back(Trait::latin1ToChar(' '));
5884 }
5885
5886 po.m_pos = 0;
5887 }
5888
5889 po.m_line = lastLine;
5890
5891 if (c[0] == Trait::latin1ToChar(' ') && c[c.size() - 1] == Trait::latin1ToChar(' ') &&
5892 skipSpaces<Trait>(0, c) < c.size()) {
5893 c.remove(0, 1);
5894 c.remove(c.size() - 1, 1);
5895 ++startPos;
5896 --lastPos;
5897 }
5898
5899 if (!c.isEmpty()) {
5900 auto code = std::make_shared<Code<Trait>>(c, false, true);
5901
5902 code->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
5903 code->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
5904 code->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
5905 code->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
5906 code->setStartDelim({po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
5907 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)),
5908 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber,
5909 po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
5910 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)) +
5911 startDelimIt->m_len - 1 - (startDelimIt->m_backslashed ? 1 : 0),
5912 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber});
5913 code->setEndDelim(
5914 {po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
5915 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0)),
5916 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber,
5917 po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
5918 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0) +
5919 endDelimIt->m_len - 1 - (endDelimIt->m_backslashed ? 1 : 0)),
5920 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber});
5921 code->setOpts(po.m_opts);
5922
5923 initLastItemWithOpts<Trait>(po, code);
5924
5925 po.m_parent->appendItem(code);
5926 }
5927
5928 po.m_wasRefLink = false;
5929 po.m_firstInParagraph = false;
5930 po.m_lastText = nullptr;
5931}
5932
5933template<class Trait>
5934inline typename Parser<Trait>::Delims::const_iterator
5935Parser<Trait>::checkForInlineCode(typename Delims::const_iterator it,
5936 typename Delims::const_iterator last,
5937 TextParsingOpts<Trait> &po)
5938{
5939 const auto len = it->m_len;
5940 const auto start = it;
5941
5942 po.m_wasRefLink = false;
5943 po.m_firstInParagraph = false;
5944
5945 ++it;
5946
5947 for (; it != last; ++it) {
5948 if (it->m_line <= po.m_lastTextLine) {
5949 const auto p = skipSpaces<Trait>(0, po.m_fr.m_data.at(it->m_line).first.asString());
5950 const auto withoutSpaces = po.m_fr.m_data.at(it->m_line).first.asString().sliced(p);
5951
5952 if ((it->m_type == Delimiter::HorizontalLine && withoutSpaces[0] == Trait::latin1ToChar('-')) ||
5953 it->m_type == Delimiter::H1 || it->m_type == Delimiter::H2) {
5954 break;
5955 } else if (it->m_type == Delimiter::InlineCode && (it->m_len - (it->m_backslashed ? 1 : 0)) == len) {
5956 if (!po.m_collectRefLinks) {
5957 makeText(start->m_line, start->m_pos, po);
5958
5959 po.m_pos = start->m_pos + start->m_len;
5960
5961 makeInlineCode(start->m_line, start->m_pos + start->m_len, it->m_line,
5962 it->m_pos + (it->m_backslashed ? 1 : 0), po, start, it);
5963
5964 po.m_line = it->m_line;
5965 po.m_pos = it->m_pos + it->m_len;
5966 }
5967
5968 return it;
5969 }
5970 } else {
5971 break;
5972 }
5973 }
5974
5975 if (!po.m_collectRefLinks) {
5976 makeText(start->m_line, start->m_pos + start->m_len, po);
5977 }
5978
5979 return start;
5980}
5981
5982template<class Trait>
5983inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::const_iterator>
5984Parser<Trait>::readTextBetweenSquareBrackets(typename Delims::const_iterator start,
5985 typename Delims::const_iterator it,
5986 typename Delims::const_iterator last,
5987 TextParsingOpts<Trait> &po,
5988 bool doNotCreateTextOnFail,
5989 WithPosition *pos)
5990{
5991 if (it != last && it->m_line <= po.m_lastTextLine) {
5992 if (start->m_line == it->m_line) {
5993 const auto p = start->m_pos + start->m_len;
5994 const auto n = it->m_pos - p;
5995
5996 if (pos) {
5997 long long int startPos, startLine, endPos, endLine;
5998 std::tie(startPos, startLine) = nextPosition(po.m_fr,
5999 po.m_fr.m_data[start->m_line].first.virginPos(
6000 start->m_pos + start->m_len - 1),
6001 po.m_fr.m_data[start->m_line].second.m_lineNumber);
6002 std::tie(endPos, endLine) =
6003 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6004 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6005
6006 *pos = {startPos, startLine, endPos, endLine};
6007 }
6008
6009 return {{{po.m_fr.m_data.at(start->m_line).first.sliced(p, n),
6010 {po.m_fr.m_data.at(start->m_line).second.m_lineNumber}}}, it};
6011 } else {
6012 if (it->m_line - start->m_line < 3) {
6013 typename MdBlock<Trait>::Data res;
6014 res.push_back({po.m_fr.m_data.at(start->m_line).first.sliced(
6015 start->m_pos + start->m_len), po.m_fr.m_data.at(start->m_line).second});
6016
6017 long long int i = start->m_line + 1;
6018
6019 for (; i <= it->m_line; ++i) {
6020 if (i == it->m_line) {
6021 res.push_back({po.m_fr.m_data.at(i).first.sliced(0, it->m_pos),
6022 po.m_fr.m_data.at(i).second});
6023 } else {
6024 res.push_back({po.m_fr.m_data.at(i).first, po.m_fr.m_data.at(i).second});
6025 }
6026 }
6027
6028 if (pos) {
6029 long long int startPos, startLine, endPos, endLine;
6030 std::tie(startPos, startLine) = nextPosition(po.m_fr,
6031 po.m_fr.m_data[start->m_line].first.virginPos(
6032 start->m_pos + start->m_len - 1),
6033 po.m_fr.m_data[start->m_line].second.m_lineNumber);
6034 std::tie(endPos, endLine) =
6035 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6036 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6037
6038 *pos = {startPos, startLine, endPos, endLine};
6039 }
6040
6041 return {res, it};
6042 } else {
6043 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6044 makeText(start->m_line, start->m_pos + start->m_len, po);
6045 }
6046
6047 return {{}, start};
6048 }
6049 }
6050 } else {
6051 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6052 makeText(start->m_line, start->m_pos + start->m_len, po);
6053 }
6054
6055 return {{}, start};
6056 }
6057}
6058
6059template<class Trait>
6060inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::const_iterator>
6061Parser<Trait>::checkForLinkText(typename Delims::const_iterator it,
6062 typename Delims::const_iterator last,
6063 TextParsingOpts<Trait> &po,
6064 WithPosition *pos)
6065{
6066 const auto start = it;
6067
6068 long long int brackets = 0;
6069
6070 const bool collectRefLinks = po.m_collectRefLinks;
6071 po.m_collectRefLinks = true;
6072 long long int l = po.m_line, p = po.m_pos;
6073
6074 for (it = std::next(it); it != last; ++it) {
6075 bool quit = false;
6076
6077 switch (it->m_type) {
6078 case Delimiter::SquareBracketsClose: {
6079 if (!brackets)
6080 quit = true;
6081 else
6082 --brackets;
6083 } break;
6084
6085