Md4qt

parser.h
Go to the documentation of this file.
1/*
2 SPDX-FileCopyrightText: 2022-2025 Igor Mironchik <igor.mironchik@gmail.com>
3 SPDX-License-Identifier: MIT
4*/
5
6#ifndef MD4QT_MD_PARSER_HPP_INCLUDED
7#define MD4QT_MD_PARSER_HPP_INCLUDED
8
9// md4qt include.
10#include "doc.h"
11#include "entities_map.h"
12#include "traits.h"
13#include "utils.h"
14
15#ifdef MD4QT_QT_SUPPORT
16
17// Qt include.
18#include <QDir>
19#include <QFile>
20#include <QTextStream>
21
22#endif // MD4QT_QT_SUPPORT
23
24#ifdef MD4QT_ICU_STL_SUPPORT
25
26// C++ include.
27#include <exception>
28
29#endif // MD4QT_ICU_STL_SUPPORT
30
31// C++ include.
32#include <algorithm>
33#include <cassert>
34#include <cmath>
35#include <fstream>
36#include <functional>
37#include <memory>
38#include <set>
39#include <tuple>
40#include <unordered_map>
41#include <vector>
42
43namespace MD
44{
45
46//! Starting HTML comment string.
47static const char *s_startComment = "<!--";
48
49//! \return Is \p indent indent belongs to list with previous \p indents indents.
50inline bool
51indentInList(const std::vector<long long int> *indents,
52 long long int indent,
53 bool codeIndentedBySpaces)
54{
55 if (indents && !indents->empty()) {
56 return (std::find_if(indents->cbegin(),
57 indents->cend(),
58 [indent, codeIndentedBySpaces](const auto &v) {
59 return (indent >= v && (codeIndentedBySpaces ?
60 true : indent <= v + 3));
61 })
62 != indents->cend());
63 } else {
64 return false;
65 }
66}
67
68//! Skip spaces in line from position \p i.
69template<class Trait>
70inline long long int
71skipSpaces(long long int i, const typename Trait::String &line)
72{
73 const auto length = line.length();
74
75 while (i < length && line[i].isSpace()) {
76 ++i;
77 }
78
79 return i;
80}
81
82//! \return Last non-space character position.
83template<class String>
84inline long long int
85lastNonSpacePos(const String &line)
86{
87 long long int i = line.length() - 1;
88
89 while (i >= 0 && line[i].isSpace()) {
90 --i;
91 }
92
93 return i;
94}
95
96//! Remove spaces at the end of string \p s.
97template<class String>
98inline void
100{
101 const auto i = lastNonSpacePos(s);
102
103 if (i != s.length() - 1) {
104 s.remove(i + 1, s.length() - i - 1);
105 }
106}
107
108//! \return Starting sequence of the same characters.
109template<class Trait>
110inline typename Trait::String
111startSequence(const typename Trait::String &line)
112{
113 auto pos = skipSpaces<Trait>(0, line);
114
115 if (pos >= line.length()) {
116 return {};
117 }
118
119 const auto sch = line[pos];
120 const auto start = pos;
121
122 ++pos;
123
124 while (pos < line.length() && line[pos] == sch) {
125 ++pos;
126 }
127
128 return line.sliced(start, pos - start);
129}
130
131//! \return Is string an ordered list.
132template<class Trait>
133inline bool
134isOrderedList(const typename Trait::String &s,
135 int *num = nullptr,
136 int *len = nullptr,
137 typename Trait::Char *delim = nullptr,
138 bool *isFirstLineEmpty = nullptr)
139{
140 long long int p = skipSpaces<Trait>(0, s);
141
142 long long int dp = p;
143
144 for (; p < s.size(); ++p) {
145 if (!s[p].isDigit()) {
146 break;
147 }
148 }
149
150 if (dp != p && p < s.size()) {
151 const auto digits = s.sliced(dp, p - dp);
152
153 if (digits.size() > 9) {
154 return false;
155 }
156
157 const auto i = digits.toInt();
158
159 if (num) {
160 *num = i;
161 }
162
163 if (len) {
164 *len = p - dp;
165 }
166
167 if (s[p] == Trait::latin1ToChar('.') || s[p] == Trait::latin1ToChar(')')) {
168 if (delim) {
169 *delim = s[p];
170 }
171
172 ++p;
173
174 long long int tmp = skipSpaces<Trait>(p, s);
175
176 if (isFirstLineEmpty) {
177 *isFirstLineEmpty = (tmp == s.size());
178 }
179
180 if ((p < s.size() && s[p] == Trait::latin1ToChar(' ')) || p == s.size()) {
181 return true;
182 }
183 }
184 }
185
186 return false;
187}
188
189//
190// RawHtmlBlock
191//
192
193//! Internal structure for pre-storing HTML.
194template<class Trait>
196 std::shared_ptr<RawHtml<Trait>> m_html = {};
197 std::shared_ptr<Block<Trait>> m_parent = {};
198 std::shared_ptr<Block<Trait>> m_topParent = {};
199 using SequenceOfBlock = std::vector<std::pair<std::shared_ptr<Block<Trait>>, long long int>>;
201 std::unordered_map<std::shared_ptr<Block<Trait>>, SequenceOfBlock> m_toAdjustLastPos = {};
203 bool m_continueHtml = false;
204 bool m_onLine = false;
205
206 std::shared_ptr<Block<Trait>>
207 findParent(long long int indent) const
208 {
209 for (auto it = m_blocks.crbegin(), last = m_blocks.crend(); it != last; ++it) {
210 if (indent >= it->second) {
211 return it->first;
212 }
213 }
214
215 return nullptr;
216 }
217}; // struct RawHtmlBlock
218
219//
220// MdLineData
221//
222
223//! Internal structure for auxiliary information about a line in Markdown.
225 long long int m_lineNumber = -1;
226 using CommentData = std::pair<char, bool>;
227 using CommentDataMap = std::map<long long int, CommentData>;
228 // std::pair< closed, valid >
230 // May this line break a list?
231 bool m_mayBreakList = false;
232}; // struct MdLineData
233
234//
235// MdBlock
236//
237
238//! Internal structure for block of text in Markdown.
239template<class Trait>
240struct MdBlock {
241 using Line = std::pair<typename Trait::InternalString, MdLineData>;
242 using Data = typename Trait::template Vector<Line>;
243
245 long long int m_emptyLinesBefore = 0;
246 bool m_emptyLineAfter = true;
247}; // struct MdBlock
248
249template<class Trait>
250inline long long int
253{
254 long long int count = 0;
255
256 if (it != begin) {
257 while (it != begin) {
258 it = std::prev(it);
259
260 if (it->first.asString().simplified().isEmpty()) {
261 ++count;
262 } else {
263 break;
264 }
265 }
266 }
267
268 return count;
269}
270
271//
272// StringListStream
273//
274
275//! Wrapper for typename Trait::StringList to be behaved like a stream.
276template<class Trait>
278{
279public:
281 : m_stream(stream)
282 , m_pos(0)
283 {
284 }
285
286 bool atEnd() const
287 {
288 return (m_pos >= (long long int)m_stream.size());
289 }
290
291 std::pair<typename Trait::InternalString, bool> readLine()
292 {
293 const std::pair<typename Trait::InternalString, bool> ret =
294 {m_stream.at(m_pos).first, m_stream.at(m_pos).second.m_mayBreakList};
295
296 ++m_pos;
297
298 return ret;
299 }
300
301 long long int currentLineNumber() const
302 {
303 return (m_pos < size() ? m_stream.at(m_pos).second.m_lineNumber :
304 (size() > 0 ? m_stream.at(0).second.m_lineNumber + size() : -1));
305 }
306
307 long long int currentStreamPos() const
308 {
309 return m_pos;
310 }
311
312 typename Trait::InternalString lineAt(long long int pos)
313 {
314 return m_stream.at(pos).first;
315 }
316
317 long long int size() const
318 {
319 return m_stream.size();
320 }
321
322 void setLineNumber(long long int lineNumber)
323 {
324 m_pos = 0;
325
326 m_pos += lineNumber - currentLineNumber();
327 }
328
329private:
330 typename MdBlock<Trait>::Data &m_stream;
331 long long int m_pos;
332}; // class StringListStream
333
334//! \return Is string a footnote?
335template<class Trait>
336inline bool
337isFootnote(const typename Trait::String &s)
338{
339 long long int p = skipSpaces<Trait>(0, s);
340
341 if (s.size() - p < 5) {
342 return false;
343 }
344
345 if (s[p++] != Trait::latin1ToChar('[')) {
346 return false;
347 }
348
349 if (s[p++] != Trait::latin1ToChar('^')) {
350 return false;
351 }
352
353 if (s[p] == Trait::latin1ToChar(']') || s[p].isSpace()) {
354 return false;
355 }
356
357 for (; p < s.size(); ++p) {
358 if (s[p] == Trait::latin1ToChar(']')) {
359 break;
360 } else if (s[p].isSpace()) {
361 return false;
362 }
363 }
364
365 ++p;
366
367 if (p < s.size() && s[p] == Trait::latin1ToChar(':')) {
368 return true;
369 } else {
370 return false;
371 }
372}
373
374//! \return Is string a code fences?
375template<class Trait>
376inline bool
377isCodeFences(const typename Trait::String &s, bool closing = false)
378{
379 auto p = skipSpaces<Trait>(0, s);
380
381 if (p > 3 || p == s.length()) {
382 return false;
383 }
384
385 const auto ch = s[p];
386
387 if (ch != Trait::latin1ToChar('~') && ch != Trait::latin1ToChar('`')) {
388 return false;
389 }
390
391 bool space = false;
392
393 long long int c = 1;
394 ++p;
395
396 for (; p < s.length(); ++p) {
397 if (s[p].isSpace()) {
398 space = true;
399 } else if (s[p] == ch) {
400 if (space && (closing ? true : ch == Trait::latin1ToChar('`'))) {
401 return false;
402 }
403
404 if (!space) {
405 ++c;
406 }
407 } else if (closing) {
408 return false;
409 } else {
410 break;
411 }
412 }
413
414 if (c < 3) {
415 return false;
416 }
417
418 if (ch == Trait::latin1ToChar('`')) {
419 for (; p < s.length(); ++p) {
420 if (s[p] == Trait::latin1ToChar('`')) {
421 return false;
422 }
423 }
424 }
425
426 return true;
427}
428
429//! Skip escaped sequence of characters till first space.
430template<class Trait>
431inline typename Trait::String
432readEscapedSequence(long long int i,
433 const typename Trait::String &str,
434 long long int *endPos = nullptr)
435{
436 bool backslash = false;
437 const auto start = i;
438
439 if (start >= str.length()) {
440 return {};
441 }
442
443 while (i < str.length()) {
444 bool now = false;
445
446 if (str[i] == Trait::latin1ToChar('\\') && !backslash) {
447 backslash = true;
448 now = true;
449 } else if (str[i].isSpace() && !backslash) {
450 break;
451 }
452
453 if (!now) {
454 backslash = false;
455 }
456
457 ++i;
458 }
459
460 if (endPos) {
461 *endPos = i - 1;
462 }
463
464 return str.sliced(start, i - start);
465}
466
467//! Characters that can be escaped.
468template<class Trait>
469static const typename Trait::String s_canBeEscaped =
470 Trait::latin1ToString("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
471
472//! Remove backslashes from the string.
473template<class String, class Trait>
474inline String
475removeBackslashes(const String &s)
476{
477 String r = s;
478 bool backslash = false;
479 long long int extra = 0;
480
481 for (long long int i = 0; i < s.length(); ++i) {
482 bool now = false;
483
484 if (s[i] == Trait::latin1ToChar('\\') && !backslash && i != s.length() - 1) {
485 backslash = true;
486 now = true;
487 } else if (s_canBeEscaped<Trait>.contains(s[i]) && backslash) {
488 r.remove(i - extra - 1, 1);
489 ++extra;
490 }
491
492 if (!now) {
493 backslash = false;
494 }
495 }
496
497 return r;
498}
499
500//! \return Is string a start of code?
501template<class Trait>
502inline bool
503isStartOfCode(const typename Trait::String &str,
504 typename Trait::String *syntax = nullptr,
505 WithPosition *delim = nullptr,
506 WithPosition *syntaxPos = nullptr)
507{
508 long long int p = skipSpaces<Trait>(0, str);
509
510 if (delim) {
511 delim->setStartColumn(p);
512 }
513
514 if (p > 3) {
515 return false;
516 }
517
518 if (str.size() - p < 3) {
519 return false;
520 }
521
522 const bool c96 = str[p] == Trait::latin1ToChar('`');
523 const bool c126 = str[p] == Trait::latin1ToChar('~');
524
525 if (c96 || c126) {
526 ++p;
527 long long int c = 1;
528
529 while (p < str.length()) {
530 if (str[p] != (c96 ? Trait::latin1ToChar('`') : Trait::latin1ToChar('~'))) {
531 break;
532 }
533
534 ++c;
535 ++p;
536 }
537
538 if (delim) {
539 delim->setEndColumn(p - 1);
540 }
541
542 if (c < 3) {
543 return false;
544 }
545
546 if (syntax) {
547 p = skipSpaces<Trait>(p, str);
548 long long int endSyntaxPos = p;
549
550 if (p < str.size()) {
552 readEscapedSequence<Trait>(p, str, &endSyntaxPos));
553
554 if (syntaxPos) {
555 syntaxPos->setStartColumn(p);
556 syntaxPos->setEndColumn(endSyntaxPos);
557 }
558 }
559 }
560
561 return true;
562 }
563
564 return false;
565}
566
567//! \return Is string a horizontal line?
568template<class Trait>
569inline bool
570isHorizontalLine(const typename Trait::String &s)
571{
572 if (s.size() < 3) {
573 return false;
574 }
575
576 typename Trait::Char c;
577
578 if (s[0] == Trait::latin1ToChar('*')) {
579 c = Trait::latin1ToChar('*');
580 } else if (s[0] == Trait::latin1ToChar('-')) {
581 c = Trait::latin1ToChar('-');
582 } else if (s[0] == Trait::latin1ToChar('_')) {
583 c = Trait::latin1ToChar('_');
584 } else {
585 return false;
586 }
587
588 long long int p = 1;
589 long long int count = 1;
590
591 for (; p < s.size(); ++p) {
592 if (s[p] != c && !s[p].isSpace()) {
593 break;
594 } else if (s[p] == c) {
595 ++count;
596 }
597 }
598
599 if (count < 3) {
600 return false;
601 }
602
603 if (p == s.size()) {
604 return true;
605 }
606
607 return false;
608}
609
610//! \return Is string a column alignment?
611template<class Trait>
612inline bool
613isColumnAlignment(const typename Trait::String &s)
614{
615 long long int p = skipSpaces<Trait>(0, s);
616
617 static const typename Trait::String s_legitime = Trait::latin1ToString(":-");
618
619 if (p >= s.length()) {
620 return false;
621 }
622
623 if (!s_legitime.contains(s[p])) {
624 return false;
625 }
626
627 if (s[p] == Trait::latin1ToChar(':')) {
628 ++p;
629 }
630
631 for (; p < s.size(); ++p) {
632 if (s[p] != Trait::latin1ToChar('-')) {
633 break;
634 }
635 }
636
637 if (p == s.size()) {
638 return true;
639 }
640
641 if (s[p] != Trait::latin1ToChar(':') && !s[p].isSpace()) {
642 return false;
643 }
644
645 ++p;
646
647 for (; p < s.size(); ++p) {
648 if (!s[p].isSpace()) {
649 return false;
650 }
651 }
652
653 return true;
654}
655
656//! Split string.
657template<class Trait>
658typename Trait::StringList
659splitString(const typename Trait::String &str, const typename Trait::Char &ch);
660
661#ifdef MD4QT_ICU_STL_SUPPORT
662
663template<>
666{
667 return str.split(ch);
668}
669
670#endif
671
672#ifdef MD4QT_QT_SUPPORT
673
674template<>
676splitString<QStringTrait>(const QString &str, const QChar &ch)
677{
678 return str.split(ch, Qt::SkipEmptyParts);
679}
680
681#endif
682
683//! \return Number of columns?
684template<class Trait>
685inline int
686isTableAlignment(const typename Trait::String &s)
687{
688 const auto columns = splitString<Trait>(s.simplified(), Trait::latin1ToChar('|'));
689
690 for (const auto &c : columns) {
691 if (!isColumnAlignment<Trait>(c)) {
692 return 0;
693 }
694 }
695
696 return columns.size();
697}
698
699//! \return Is given string a HTML comment.
700template<class Trait>
701inline bool
702isHtmlComment(const typename Trait::String &s)
703{
704 auto c = s;
705
706 if (s.startsWith(Trait::latin1ToString(s_startComment))) {
707 c.remove(0, 4);
708 } else {
709 return false;
710 }
711
712 long long int p = -1;
713 bool endFound = false;
714
715 while ((p = c.indexOf(Trait::latin1ToString("--"), p + 1)) > -1) {
716 if (c.size() > p + 2 && c[p + 2] == Trait::latin1ToChar('>')) {
717 if (!endFound) {
718 endFound = true;
719 } else {
720 return false;
721 }
722 } else if (p - 2 >= 0 && c.sliced(p - 2, 4) == Trait::latin1ToString("<!--")) {
723 return false;
724 } else if (c.size() > p + 3 && c.sliced(p, 4) == Trait::latin1ToString("--!>")) {
725 return false;
726 }
727 }
728
729 return endFound;
730}
731
732//! Replace entities in the string with corresponding character.
733template<class Trait>
734inline typename Trait::String
735replaceEntity(const typename Trait::String &s)
736{
737 long long int p1 = 0;
738
739 typename Trait::String res;
740 long long int i = 0;
741
742 while ((p1 = s.indexOf(Trait::latin1ToChar('&'), p1)) != -1) {
743 if (p1 > 0 && s[p1 - 1] == Trait::latin1ToChar('\\')) {
744 ++p1;
745
746 continue;
747 }
748
749 const auto p2 = s.indexOf(Trait::latin1ToChar(';'), p1);
750
751 if (p2 != -1) {
752 const auto en = s.sliced(p1, p2 - p1 + 1);
753
754 if (en.size() > 2 && en[1] == Trait::latin1ToChar('#')) {
755 if (en.size() > 3 && en[2].toLower() == Trait::latin1ToChar('x')) {
756 const auto hex = en.sliced(3, en.size() - 4);
757
758 if (hex.size() <= 6 && hex.size() > 0) {
759 bool ok = false;
760
761 const char32_t c = hex.toInt(&ok, 16);
762
763 if (ok) {
764 res.push_back(s.sliced(i, p1 - i));
765 i = p2 + 1;
766
767 if (c) {
768 Trait::appendUcs4(res, c);
769 } else {
770 res.push_back(typename Trait::Char(0xFFFD));
771 }
772 }
773 }
774 } else {
775 const auto dec = en.sliced(2, en.size() - 3);
776
777 if (dec.size() <= 7 && dec.size() > 0) {
778 bool ok = false;
779
780 const char32_t c = dec.toInt(&ok, 10);
781
782 if (ok) {
783 res.push_back(s.sliced(i, p1 - i));
784 i = p2 + 1;
785
786 if (c) {
787 Trait::appendUcs4(res, c);
788 } else {
789 res.push_back(typename Trait::Char(0xFFFD));
790 }
791 }
792 }
793 }
794 } else {
795 const auto it = s_entityMap<Trait>.find(en);
796
797 if (it != s_entityMap<Trait>.cend()) {
798 res.push_back(s.sliced(i, p1 - i));
799 i = p2 + 1;
800 res.push_back(Trait::utf16ToString(it->second));
801 }
802 }
803 } else {
804 break;
805 }
806
807 p1 = p2 + 1;
808 }
809
810 res.push_back(s.sliced(i, s.size() - i));
811
812 return res;
813}
814
815//! Remove backslashes in block.
816template<class Trait>
817inline typename MdBlock<Trait>::Data
819{
820 auto tmp = d;
821
822 for (auto &line : tmp) {
824 }
825
826 return tmp;
827}
828
829//! Type of the paragraph's optimization.
831 //! Full optimization.
833 //! Semi optimization, optimization won't concatenate text
834 //! items if style delimiters will be in the middle.
836 //! Full optimization, but raw text data won't be concatenated (will be untouched).
838 //! Semi optimization, but raw text data won't be concatenated (will be untouched).
840};
841
842//
843// TextPlugin
844//
845
846//! ID of text plugin.
847enum TextPlugin : int {
848 //! Unknown plugin.
850 //! GitHub's autolinks plugin.
852 //! First user defined plugin ID.
854}; // enum TextPlugin
855
856//
857// Style
858//
859
860//! Emphasis type.
861enum class Style {
862 //! "*"
864 //! "_"
866 //! "**"
868 //! "__"
870 //! "~"
872 //! Unknown.
874};
875
876//! \return Text option from style.
877inline TextOption
879{
880 switch (s) {
881 case Style::Italic1:
882 case Style::Italic2:
883 return ItalicText;
884
885 case Style::Bold1:
886 case Style::Bold2:
887 return BoldText;
888
890 return StrikethroughText;
891
892 default:
893 return TextWithoutFormat;
894 }
895}
896
897//
898// TextPluginFunc
899//
900
901template<class Trait>
902struct TextParsingOpts;
903
904//! Functor type for text plugin.
905template<class Trait>
906using TextPluginFunc = std::function<void(std::shared_ptr<Paragraph<Trait>>,
908 const typename Trait::StringList &)>;
909
910//
911// TextPluginsMap
912//
913
914//! Type of the map of text plugins.
915template<class Trait>
916using TextPluginsMap = std::map<int, std::tuple<TextPluginFunc<Trait>,
917 bool,
918 typename Trait::StringList>>;
919
920//
921// TextParsingOpts
922//
923
924//! Internal structure for auxiliary options for parser.
925template<class Trait>
928 std::shared_ptr<Block<Trait>> m_parent;
929 std::shared_ptr<RawHtml<Trait>> m_tmpHtml;
930 std::shared_ptr<Document<Trait>> m_doc;
931 typename Trait::StringList &m_linksToParse;
932 typename Trait::String m_workingPath;
933 typename Trait::String m_fileName;
938 std::shared_ptr<Text<Trait>> m_lastText = {};
939 bool m_wasRefLink = false;
941 // This flag is set only in second step!
943 bool m_headingAllowed = false;
944
945 struct TextData {
946 typename Trait::String m_str;
947 long long int m_pos = -1;
948 long long int m_line = -1;
949 };
950
951 std::vector<TextData> m_rawTextData = {};
952
953 inline void
954 concatenateAuxText(long long int start, long long int end)
955 {
956 if (start < end && (end - start > 1)) {
957 for (auto i = start + 1; i < end; ++i) {
958 m_rawTextData[start].m_str += m_rawTextData[i].m_str;
959 }
960
961 m_rawTextData.erase(m_rawTextData.cbegin() + start + 1, m_rawTextData.cbegin() + end);
962 }
963 }
964
965 enum class Detected { Nothing, Table, HTML, Code, List, Blockquote }; // enum class Detected
966
968
969 inline bool
971 {
972 switch (m_detected) {
973 case Detected::Table:
974 case Detected::Code:
975 case Detected::List:
977 case Detected::HTML:
978 return true;
979
980 default:
981 return false;
982 }
983 }
984
985 long long int m_line = 0;
986 long long int m_pos = 0;
987 long long int m_startTableLine = -1;
988 long long int m_lastTextLine = -1;
989 long long int m_lastTextPos = -1;
992
993 struct StyleInfo {
995 long long int m_length;
997 };
998
999 std::vector<StyleInfo> m_styles = {};
1001 std::shared_ptr<ItemWithOpts<Trait>> m_lastItemWithStyle = nullptr;
1002}; // struct TextParsingOpts
1003
1004//! Reset pre-stored HTML.
1005template<class Trait>
1007{
1008 html.m_html.reset();
1009 html.m_parent.reset();
1010 html.m_htmlBlockType = -1;
1011 html.m_continueHtml = false;
1012 html.m_onLine = false;
1013
1014 if (po && po->m_detected == TextParsingOpts<Trait>::Detected::HTML) {
1016 }
1017}
1018
1019//
1020// virginSubstr
1021//
1022
1023//! \return Substring from fragment with given virgin positions.
1024template<class Trait>
1025inline typename Trait::String
1026virginSubstr(const MdBlock<Trait> &fr, const WithPosition &virginPos)
1027{
1028 if (fr.m_data.empty()) {
1029 return {};
1030 }
1031
1032 long long int startLine = virginPos.startLine() < fr.m_data.at(0).second.m_lineNumber ?
1033 (virginPos.endLine() < fr.m_data.at(0).second.m_lineNumber ? -1 : 0) :
1034 virginPos.startLine() - fr.m_data.at(0).second.m_lineNumber;
1035
1036 if (startLine >= static_cast<long long int>(fr.m_data.size()) || startLine < 0) {
1037 return {};
1038 }
1039
1040 auto spos = virginPos.startColumn() - fr.m_data.at(startLine).first.virginPos(0);
1041
1042 if (spos < 0) {
1043 spos = 0;
1044 }
1045
1046 long long int epos = 0;
1047 long long int linesCount = virginPos.endLine() - virginPos.startLine() -
1048 (virginPos.startLine() < fr.m_data.at(0).second.m_lineNumber ?
1049 fr.m_data.at(0).second.m_lineNumber - virginPos.startLine() : 0);
1050
1051 if (startLine + linesCount > static_cast<long long int>(fr.m_data.size())) {
1052 linesCount = fr.m_data.size() - startLine - 1;
1053 epos = fr.m_data.back().first.length();
1054 } else {
1055 epos = virginPos.endColumn() - fr.m_data.at(linesCount + startLine).first.virginPos(0) + 1;
1056 }
1057
1058 if (epos < 0) {
1059 epos = 0;
1060 }
1061
1062 if (epos > fr.m_data.at(linesCount + startLine).first.length()) {
1063 epos = fr.m_data.at(linesCount + startLine).first.length();
1064 }
1065
1066 typename Trait::String str =
1067 (linesCount ? fr.m_data.at(startLine).first.sliced(spos).asString() :
1068 fr.m_data.at(startLine).first.sliced(spos, epos - spos).asString());
1069
1070 long long int i = startLine + 1;
1071
1072 for (; i < startLine + linesCount; ++i) {
1073 str.push_back(Trait::latin1ToString("\n"));
1074 str.push_back(fr.m_data.at(i).first.asString());
1075 }
1076
1077 if (linesCount) {
1078 str.push_back(Trait::latin1ToString("\n"));
1079 str.push_back(fr.m_data.at(i).first.sliced(0, epos).asString());
1080 }
1081
1082 return str;
1083}
1084
1085//
1086// localPosFromVirgin
1087//
1088
1089//! \return Local position ( { column, line } ) in fragment for given virgin position if exists.
1090//! \return { -1, -1 } if there is no given position.
1091template<class Trait>
1092inline std::pair<long long int, long long int>
1093localPosFromVirgin(const MdBlock<Trait> &fr, long long int virginColumn, long long int virginLine)
1094{
1095 if (fr.m_data.empty()) {
1096 return {-1, -1};
1097 }
1098
1099 if (fr.m_data.front().second.m_lineNumber > virginLine ||
1100 fr.m_data.back().second.m_lineNumber < virginLine) {
1101 return {-1, -1};
1102 }
1103
1104 auto line = virginLine - fr.m_data.front().second.m_lineNumber;
1105
1106 if (fr.m_data.at(line).first.isEmpty()) {
1107 return {-1, -1};
1108 }
1109
1110 const auto vzpos = fr.m_data.at(line).first.virginPos(0);
1111
1112 if (vzpos > virginColumn || virginColumn > vzpos + fr.m_data.at(line).first.length() - 1) {
1113 return {-1, -1};
1114 }
1115
1116 return {virginColumn - vzpos, line};
1117}
1118
1119//
1120// GitHubAutolinkPlugin
1121//
1122
1123/*
1124 "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
1125 "(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
1126*/
1127//! \return Is the given string a valid email?
1128template<class Trait>
1129inline bool
1130isEmail(const typename Trait::String &url)
1131{
1132 auto isAllowed = [](const typename Trait::Char &ch) -> bool {
1133 const auto unicode = ch.unicode();
1134 return ((unicode >= 48 && unicode <= 57) || (unicode >= 97 && unicode <= 122) ||
1135 (unicode >= 65 && unicode <= 90));
1136 };
1137
1138 auto isAdditional = [](const typename Trait::Char &ch) -> bool {
1139 const auto unicode = ch.unicode();
1140 return (unicode == 33 || (unicode >= 35 && unicode <= 39) ||
1141 unicode == 42 || unicode == 43 || (unicode >= 45 && unicode <= 47) ||
1142 unicode == 61 || unicode == 63 || (unicode >= 94 && unicode <= 96) ||
1143 (unicode >= 123 && unicode <= 126));
1144 };
1145
1146 static const auto s_delim = Trait::latin1ToChar('-');
1147 static const auto s_dog = Trait::latin1ToChar('@');
1148 static const auto s_dot = Trait::latin1ToChar('.');
1149
1150 long long int i = (url.startsWith(Trait::latin1ToString("mailto:")) ? 7 : 0);
1151 const auto dogPos = url.indexOf(s_dog, i);
1152
1153 if (dogPos != -1) {
1154 if (i == dogPos) {
1155 return false;
1156 }
1157
1158 for (; i < dogPos; ++i) {
1159 if (!isAllowed(url[i]) && !isAdditional(url[i])) {
1160 return false;
1161 }
1162 }
1163
1164 auto checkToDot = [&](long long int start, long long int dotPos) -> bool {
1165 static const long long int maxlen = 63;
1166
1167 if (dotPos - start > maxlen ||
1168 start + 1 > dotPos ||
1169 start >= url.length() ||
1170 dotPos > url.length()) {
1171 return false;
1172 }
1173
1174 if (url[start] == s_delim) {
1175 return false;
1176 }
1177
1178 if (url[dotPos - 1] == s_delim) {
1179 return false;
1180 }
1181
1182 for (; start < dotPos; ++start) {
1183 if (!isAllowed(url[start]) && url[start] != s_delim) {
1184 return false;
1185 }
1186 }
1187
1188 return true;
1189 };
1190
1191 long long int dotPos = url.indexOf(s_dot, dogPos + 1);
1192
1193 if (dotPos != -1) {
1194 i = dogPos + 1;
1195
1196 while (dotPos != -1) {
1197 if (!checkToDot(i, dotPos)) {
1198 return false;
1199 }
1200
1201 i = dotPos + 1;
1202 dotPos = url.indexOf(s_dot, i);
1203 }
1204
1205 if (!checkToDot(i, url.length())) {
1206 return false;
1207 }
1208
1209 return true;
1210 }
1211 }
1212
1213 return false;
1214}
1215
1216//! \return Is the fiven string a valid URL?
1217template<class Trait>
1218inline bool
1219isValidUrl(const typename Trait::String &url);
1220
1221//! \return Is the given string a GitHub autolink?
1222template<class Trait>
1223inline bool
1224isGitHubAutolink(const typename Trait::String &url);
1225
1226#ifdef MD4QT_QT_SUPPORT
1227
1228template<>
1229inline bool
1231{
1232 const QUrl u(url, QUrl::StrictMode);
1233
1234 return (u.isValid() && !u.isRelative());
1235}
1236
1237template<>
1238inline bool
1240{
1241 const QUrl u(url, QUrl::StrictMode);
1242
1243 return (u.isValid()
1244 && ((!u.scheme().isEmpty() && !u.host().isEmpty())
1245 || (url.startsWith(QStringLiteral("www.")) && url.length() >= 7 &&
1246 url.indexOf(QLatin1Char('.'), 4) != -1)));
1247}
1248
1249#endif
1250
1251#ifdef MD4QT_ICU_STL_SUPPORT
1252
1253template<>
1254inline bool
1256{
1257 const UrlUri u(url);
1258
1259 return (u.isValid() && !u.isRelative());
1260}
1261
1262template<>
1263inline bool
1265{
1266 const UrlUri u(url);
1267
1268 return (u.isValid()
1269 && ((!u.scheme().isEmpty() && !u.host().isEmpty())
1270 || (url.startsWith(UnicodeString("www.")) && url.length() >= 7 &&
1271 url.indexOf(UnicodeChar('.'), 4) != -1)));
1272}
1273
1274#endif
1275
1276//! Process GitHub autolinks for the text with index \p idx.
1277template<class Trait>
1278inline long long int
1281 long long int idx)
1282{
1283 if (idx < 0 || idx >= (long long int)po.m_rawTextData.size()) {
1284 return idx;
1285 }
1286
1287 static const auto s_delims = Trait::latin1ToString("*_~()<>");
1288 auto s = po.m_rawTextData[idx];
1289 bool first = true;
1290 long long int j = 0;
1291 auto end = typename Trait::Char(0x00);
1292 bool skipSpace = true;
1293 long long int ret = idx;
1294
1295 while (s.m_str.length()) {
1296 long long int i = 0;
1297 end = typename Trait::Char(0x00);
1298
1299 for (; i < s.m_str.length(); ++i) {
1300 if (first) {
1301 if (s.m_str[i] == Trait::latin1ToChar('(')) {
1302 end = Trait::latin1ToChar(')');
1303 }
1304
1305 if (s_delims.indexOf(s.m_str[i]) == -1 && !s.m_str[i].isSpace()) {
1306 first = false;
1307 j = i;
1308 }
1309 } else {
1310 if (s.m_str[i].isSpace() || i == s.m_str.length() - 1 || s.m_str[i] == end) {
1311 auto tmp = s.m_str.sliced(j, i - j +
1312 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1313 1 : 0));
1314 skipSpace = s.m_str[i].isSpace();
1315
1316 const auto email = isEmail<Trait>(tmp);
1317
1318 if (isGitHubAutolink<Trait>(tmp) || email) {
1319 auto ti = textAtIdx(p, idx);
1320
1321 if (ti >= 0 && ti < static_cast<long long int>(p->items().size())) {
1322 typename ItemWithOpts<Trait>::Styles openStyles, closeStyles;
1323 const auto opts = std::static_pointer_cast<Text<Trait>>(p->items().at(ti))->opts();
1324
1325 if (j == 0 || s.m_str.sliced(0, j).isEmpty()) {
1326 openStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->openStyles();
1327 closeStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->closeStyles();
1328 p->removeItemAt(ti);
1329 po.m_rawTextData.erase(po.m_rawTextData.cbegin() + idx);
1330 --ret;
1331 } else {
1332 const auto tmp = s.m_str.sliced(0, j);
1333
1334 auto t = std::static_pointer_cast<Text<Trait>>(p->items().at(ti));
1335 t->setEndColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j - 1));
1336 closeStyles = t->closeStyles();
1337 t->closeStyles() = {};
1338 po.m_rawTextData[idx].m_str = tmp;
1339 ++idx;
1341 ++ti;
1342 }
1343
1344 std::shared_ptr<Link<Trait>> lnk(new Link<Trait>);
1345 lnk->setStartColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j));
1346 lnk->setStartLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1347 lnk->setEndColumn(
1348 po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + i -
1349 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1350 0 : 1)));
1351 lnk->setEndLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1352 lnk->openStyles() = openStyles;
1353 lnk->setTextPos({lnk->startColumn(), lnk->startLine(), lnk->endColumn(), lnk->endLine()});
1354 lnk->setUrlPos(lnk->textPos());
1355
1356 if (email && !tmp.toLower().startsWith(Trait::latin1ToString("mailto:"))) {
1357 tmp = Trait::latin1ToString("mailto:") + tmp;
1358 }
1359
1360 if (!email && tmp.toLower().startsWith(Trait::latin1ToString("www."))) {
1361 tmp = Trait::latin1ToString("http://") + tmp;
1362 }
1363
1364 lnk->setUrl(tmp);
1365 lnk->setOpts(opts);
1366 p->insertItem(ti, lnk);
1367
1368 s.m_pos += i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1);
1369 s.m_str.remove(0, i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1));
1370 j = 0;
1371 i = 0;
1372
1373 if (!s.m_str.isEmpty()) {
1374 po.m_rawTextData.insert(po.m_rawTextData.cbegin() + idx, s);
1375 ++ret;
1376
1377 auto t = std::make_shared<Text<Trait>>();
1378 t->setStartColumn(po.m_fr.m_data[s.m_line].first.virginPos(s.m_pos));
1379 t->setStartLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1380 t->setEndLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1381 t->setEndColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + s.m_str.length() - 1));
1383 t->closeStyles() = closeStyles;
1384 p->insertItem(ti + 1, t);
1385 } else {
1386 lnk->closeStyles() = closeStyles;
1387 }
1388
1389 break;
1390 }
1391 }
1392
1393 j = i + (skipSpace ? 1 : 0);
1394 }
1395 }
1396 }
1397
1398 first = true;
1399
1400 if (i == s.m_str.length()) {
1401 break;
1402 }
1403 }
1404
1405 return ret;
1406}
1407
1408//! GitHub autolinks plugin.
1409template<class Trait>
1410inline void
1413 const typename Trait::StringList &)
1414{
1415 if (!po.m_collectRefLinks) {
1416 long long int i = 0;
1417
1418 while (i >= 0 && i < (long long int)po.m_rawTextData.size()) {
1419 i = processGitHubAutolinkExtension(p, po, i);
1420
1421 ++i;
1422 }
1423 }
1424}
1425
1426//
1427// Parser
1428//
1429
1430//! Markdown parser.
1431template<class Trait>
1432class Parser final
1433{
1434public:
1439
1440 ~Parser() = default;
1441
1442 //! \return Parsed Markdown document.
1443 std::shared_ptr<Document<Trait>>
1444 parse(
1445 //! File name of the Markdown document.
1446 const typename Trait::String &fileName,
1447 //! Should parsing be recursive? If recursive all links to existing Markdown
1448 //! files will be parsed and presented in the returned document.
1449 bool recursive = true,
1450 //! Allowed extensions for Markdonw document files. If Markdown file doesn't
1451 //! have given extension it will be ignored.
1452 const typename Trait::StringList &ext = {Trait::latin1ToString("md"), Trait::latin1ToString("markdown")},
1453 //! Make full optimization, or just semi one. In full optimization
1454 //! text items with one style but with some closing delimiters
1455 //! in the middle will be concatenated in one, like in **text* text*,
1456 //! here in full optimization will be "text text" with 2 open/close
1457 //! style delimiters, but one closing delimiter is in the middle.
1458 bool fullyOptimizeParagraphs = true);
1459
1460 //! \return Parsed Markdown document.
1461 std::shared_ptr<Document<Trait>>
1462 parse(
1463 //! Stream to parse.
1464 typename Trait::TextStream &stream,
1465 //! Absolute path to the root folder for the document.
1466 //! This path will be used to resolve local links.
1467 const typename Trait::String &path,
1468 //! This argument needed only for anchor.
1469 const typename Trait::String &fileName,
1470 //! Make full optimization, or just semi one. In full optimization
1471 //! text items with one style but with some closing delimiters
1472 //! in the middle will be concatenated in one, like in **text* text*,
1473 //! here in full optimization will be "text text" with 2 open/close
1474 //! style delimiters, but one closing delimiter is in the middle.
1475 bool fullyOptimizeParagraphs = true);
1476
1477 //! Add text plugin.
1478 void
1480 //! ID of a plugin. Use TextPlugin::UserDefinedPluginID value for start ID.
1481 int id,
1482 //! Function of a plugin, that will be invoked to processs raw text.
1483 TextPluginFunc<Trait> plugin,
1484 //! Should this plugin be used in parsing of internals of links?
1485 bool processInLinks,
1486 //! User data that will be passed to plugin function.
1487 const typename Trait::StringList &userData)
1488 {
1489 m_textPlugins.insert({id, {plugin, processInLinks, userData}});
1490 }
1491
1492 //! Remove text plugin.
1493 void
1495 //! ID of plugin that should be removed.
1496 int id)
1497 {
1498 m_textPlugins.erase(id);
1499 }
1500
1501private:
1502 void
1503 parseFile(const typename Trait::String &fileName,
1504 bool recursive,
1505 std::shared_ptr<Document<Trait>> doc,
1506 const typename Trait::StringList &ext,
1507 typename Trait::StringList *parentLinks = nullptr);
1508
1509 void
1510 parseStream(typename Trait::TextStream &stream,
1511 const typename Trait::String &workingPath,
1512 const typename Trait::String &fileName,
1513 bool recursive,
1514 std::shared_ptr<Document<Trait>> doc,
1515 const typename Trait::StringList &ext,
1516 typename Trait::StringList *parentLinks = nullptr);
1517
1518 void
1519 clearCache();
1520
1521 enum class BlockType {
1522 Unknown,
1523 EmptyLine,
1524 Text,
1525 List,
1526 ListWithFirstEmptyLine,
1527 CodeIndentedBySpaces,
1528 Code,
1529 Blockquote,
1530 Heading,
1531 SomethingInList,
1532 FensedCodeInList,
1533 Footnote
1534 }; // enum BlockType
1535
1536 struct ListIndent {
1537 long long int m_level = -1;
1538 long long int m_indent = -1;
1539 }; // struct ListIndent
1540
1541 BlockType
1542 whatIsTheLine(typename Trait::InternalString &str,
1543 bool inList = false,
1544 bool inListWithFirstEmptyLine = false,
1545 bool fensedCodeInList = false,
1546 typename Trait::String *startOfCode = nullptr,
1547 ListIndent *indent = nullptr,
1548 bool emptyLinePreceded = false,
1549 bool calcIndent = false,
1550 const std::vector<long long int> *indents = nullptr);
1551
1552 long long int
1553 parseFragment(MdBlock<Trait> &fr,
1554 std::shared_ptr<Block<Trait>> parent,
1555 std::shared_ptr<Document<Trait>> doc,
1556 typename Trait::StringList &linksToParse,
1557 const typename Trait::String &workingPath,
1558 const typename Trait::String &fileName,
1559 bool collectRefLinks,
1560 RawHtmlBlock<Trait> &html);
1561
1562 long long int
1563 parseText(MdBlock<Trait> &fr,
1564 std::shared_ptr<Block<Trait>> parent,
1565 std::shared_ptr<Document<Trait>> doc,
1566 typename Trait::StringList &linksToParse,
1567 const typename Trait::String &workingPath,
1568 const typename Trait::String &fileName,
1569 bool collectRefLinks,
1570 RawHtmlBlock<Trait> &html);
1571
1572 long long int
1573 parseBlockquote(MdBlock<Trait> &fr,
1574 std::shared_ptr<Block<Trait>> parent,
1575 std::shared_ptr<Document<Trait>> doc,
1576 typename Trait::StringList &linksToParse,
1577 const typename Trait::String &workingPath,
1578 const typename Trait::String &fileName,
1579 bool collectRefLinks,
1580 RawHtmlBlock<Trait> &html);
1581
1582 long long int
1583 parseList(MdBlock<Trait> &fr,
1584 std::shared_ptr<Block<Trait>> parent,
1585 std::shared_ptr<Document<Trait>> doc,
1586 typename Trait::StringList &linksToParse,
1587 const typename Trait::String &workingPath,
1588 const typename Trait::String &fileName,
1589 bool collectRefLinks,
1590 RawHtmlBlock<Trait> &html);
1591
1592 long long int
1593 parseCode(MdBlock<Trait> &fr,
1594 std::shared_ptr<Block<Trait>> parent,
1595 bool collectRefLinks);
1596
1597 long long int
1598 parseCodeIndentedBySpaces(MdBlock<Trait> &fr,
1599 std::shared_ptr<Block<Trait>> parent,
1600 bool collectRefLinks,
1601 int indent,
1602 const typename Trait::String &syntax,
1603 long long int emptyColumn,
1604 long long int startLine,
1605 bool fensedCode,
1606 const WithPosition &startDelim = {},
1607 const WithPosition &endDelim = {},
1608 const WithPosition &syntaxPos = {});
1609
1610 long long int
1611 parseListItem(MdBlock<Trait> &fr,
1612 std::shared_ptr<Block<Trait>> parent,
1613 std::shared_ptr<Document<Trait>> doc,
1614 typename Trait::StringList &linksToParse,
1615 const typename Trait::String &workingPath,
1616 const typename Trait::String &fileName,
1617 bool collectRefLinks,
1618 RawHtmlBlock<Trait> &html,
1619 std::shared_ptr<ListItem<Trait>> *resItem = nullptr);
1620
1621 void
1622 parseHeading(MdBlock<Trait> &fr,
1623 std::shared_ptr<Block<Trait>> parent,
1624 std::shared_ptr<Document<Trait>> doc,
1625 typename Trait::StringList &linksToParse,
1626 const typename Trait::String &workingPath,
1627 const typename Trait::String &fileName,
1628 bool collectRefLinks);
1629
1630 void
1631 parseFootnote(MdBlock<Trait> &fr,
1632 std::shared_ptr<Block<Trait>> parent,
1633 std::shared_ptr<Document<Trait>> doc,
1634 typename Trait::StringList &linksToParse,
1635 const typename Trait::String &workingPath,
1636 const typename Trait::String &fileName,
1637 bool collectRefLinks);
1638
1639 void
1640 parseTable(MdBlock<Trait> &fr,
1641 std::shared_ptr<Block<Trait>> parent,
1642 std::shared_ptr<Document<Trait>> doc,
1643 typename Trait::StringList &linksToParse,
1644 const typename Trait::String &workingPath,
1645 const typename Trait::String &fileName,
1646 bool collectRefLinks,
1647 int columnsCount);
1648
1649 long long int
1650 parseParagraph(MdBlock<Trait> &fr,
1651 std::shared_ptr<Block<Trait>> parent,
1652 std::shared_ptr<Document<Trait>> doc,
1653 typename Trait::StringList &linksToParse,
1654 const typename Trait::String &workingPath,
1655 const typename Trait::String &fileName,
1656 bool collectRefLinks,
1657 RawHtmlBlock<Trait> &html);
1658
1659 long long int
1660 parseFormattedTextLinksImages(MdBlock<Trait> &fr,
1661 std::shared_ptr<Block<Trait>> parent,
1662 std::shared_ptr<Document<Trait>> doc,
1663 typename Trait::StringList &linksToParse,
1664 const typename Trait::String &workingPath,
1665 const typename Trait::String &fileName,
1666 bool collectRefLinks,
1667 bool ignoreLineBreak,
1668 RawHtmlBlock<Trait> &html,
1669 bool inLink);
1670
1671 struct ParserContext {
1672 typename Trait::template Vector<MdBlock<Trait>> m_splitted;
1673 typename MdBlock<Trait>::Data m_fragment;
1674 bool m_emptyLineInList = false;
1675 bool m_fensedCodeInList = false;
1676 long long int m_emptyLinesCount = 0;
1677 long long int m_lineCounter = 0;
1678 std::vector<long long int> m_indents;
1679 ListIndent m_indent;
1680 RawHtmlBlock<Trait> m_html;
1681 long long int m_emptyLinesBefore = 0;
1682 MdLineData::CommentDataMap m_htmlCommentData;
1683 typename Trait::String m_startOfCode;
1684 typename Trait::String m_startOfCodeInList;
1685 BlockType m_type = BlockType::EmptyLine;
1686 BlockType m_lineType = BlockType::Unknown;
1687 BlockType m_prevLineType = BlockType::Unknown;
1688 }; // struct ParserContext
1689
1690 std::pair<long long int, bool>
1691 parseFirstStep(ParserContext &ctx,
1692 StringListStream<Trait> &stream,
1693 std::shared_ptr<Block<Trait>> parent,
1694 std::shared_ptr<Document<Trait>> doc,
1695 typename Trait::StringList &linksToParse,
1696 const typename Trait::String &workingPath,
1697 const typename Trait::String &fileName,
1698 bool collectRefLinks);
1699
1700 void
1701 parseSecondStep(ParserContext &ctx,
1702 std::shared_ptr<Block<Trait>> parent,
1703 std::shared_ptr<Document<Trait>> doc,
1704 typename Trait::StringList &linksToParse,
1705 const typename Trait::String &workingPath,
1706 const typename Trait::String &fileName,
1707 bool collectRefLinks,
1708 bool top,
1709 bool dontProcessLastFreeHtml);
1710
1711 std::pair<RawHtmlBlock<Trait>, long long int>
1712 parse(StringListStream<Trait> &stream,
1713 std::shared_ptr<Block<Trait>> parent,
1714 std::shared_ptr<Document<Trait>> doc,
1715 typename Trait::StringList &linksToParse,
1716 const typename Trait::String &workingPath,
1717 const typename Trait::String &fileName,
1718 bool collectRefLinks,
1719 bool top = false,
1720 bool dontProcessLastFreeHtml = false,
1721 bool stopOnMayBreakList = false);
1722
1723 std::pair<long long int, bool>
1724 parseFragment(ParserContext &ctx,
1725 std::shared_ptr<Block<Trait>> parent,
1726 std::shared_ptr<Document<Trait>> doc,
1727 typename Trait::StringList &linksToParse,
1728 const typename Trait::String &workingPath,
1729 const typename Trait::String &fileName,
1730 bool collectRefLinks);
1731
1732 void
1733 eatFootnote(ParserContext &ctx,
1734 StringListStream<Trait> &stream,
1735 std::shared_ptr<Block<Trait>> parent,
1736 std::shared_ptr<Document<Trait>> doc,
1737 typename Trait::StringList &linksToParse,
1738 const typename Trait::String &workingPath,
1739 const typename Trait::String &fileName,
1740 bool collectRefLinks);
1741
1742 void
1743 finishHtml(ParserContext &ctx,
1744 std::shared_ptr<Block<Trait>> parent,
1745 std::shared_ptr<Document<Trait>> doc,
1746 bool collectRefLinks,
1747 bool top,
1748 bool dontProcessLastFreeHtml);
1749
1750 void
1751 makeLineMain(ParserContext &ctx,
1752 const typename Trait::InternalString &line,
1753 long long int emptyLinesCount,
1754 const ListIndent &currentIndent,
1755 long long int ns,
1756 long long int currentLineNumber);
1757
1758 std::pair<long long int, bool>
1759 parseFragmentAndMakeNextLineMain(ParserContext &ctx,
1760 std::shared_ptr<Block<Trait>> parent,
1761 std::shared_ptr<Document<Trait>> doc,
1762 typename Trait::StringList &linksToParse,
1763 const typename Trait::String &workingPath,
1764 const typename Trait::String &fileName,
1765 bool collectRefLinks,
1766 const typename Trait::InternalString &line,
1767 const ListIndent &currentIndent,
1768 long long int ns,
1769 long long int currentLineNumber);
1770
1771 bool
1772 isListType(BlockType t);
1773
1774 std::pair<typename Trait::InternalString, bool>
1775 readLine(ParserContext &ctx, StringListStream<Trait> &stream);
1776
1777 std::shared_ptr<Image<Trait>>
1778 makeImage(const typename Trait::String &url,
1779 const typename MdBlock<Trait>::Data &text,
1780 TextParsingOpts<Trait> &po,
1781 bool doNotCreateTextOnFail,
1782 long long int startLine,
1783 long long int startPos,
1784 long long int lastLine,
1785 long long int lastPos,
1786 const WithPosition &textPos,
1787 const WithPosition &urlPos);
1788
1789 std::shared_ptr<Link<Trait>>
1790 makeLink(const typename Trait::String &url,
1791 const typename MdBlock<Trait>::Data &text,
1792 TextParsingOpts<Trait> &po,
1793 bool doNotCreateTextOnFail,
1794 long long int startLine,
1795 long long int startPos,
1796 long long int lastLine,
1797 long long int lastPos,
1798 const WithPosition &textPos,
1799 const WithPosition &urlPos);
1800
1801 struct Delimiter {
1802 enum DelimiterType {
1803 // (
1804 ParenthesesOpen,
1805 // )
1806 ParenthesesClose,
1807 // [
1808 SquareBracketsOpen,
1809 // ]
1810 SquareBracketsClose,
1811 // ![
1812 ImageOpen,
1813 // ~~
1814 Strikethrough,
1815 // *
1816 Emphasis1,
1817 // _
1818 Emphasis2,
1819 // `
1820 InlineCode,
1821 // <
1822 Less,
1823 // >
1824 Greater,
1825 // $
1826 Math,
1827 HorizontalLine,
1828 H1,
1829 H2,
1830 Unknown
1831 }; // enum DelimiterType
1832
1833 DelimiterType m_type = Unknown;
1834 long long int m_line = -1;
1835 long long int m_pos = -1;
1836 long long int m_len = 0;
1837 bool m_isWordBefore = false;
1838 bool m_backslashed = false;
1839 bool m_leftFlanking = false;
1840 bool m_rightFlanking = false;
1841 bool m_skip = false;
1842 }; // struct Delimiter
1843
1844 using Delims = typename Trait::template Vector<Delimiter>;
1845
1846 bool
1847 createShortcutImage(const typename MdBlock<Trait>::Data &text,
1848 TextParsingOpts<Trait> &po,
1849 long long int startLine,
1850 long long int startPos,
1851 long long int lastLineForText,
1852 long long int lastPosForText,
1853 typename Delims::iterator lastIt,
1854 const typename MdBlock<Trait>::Data &linkText,
1855 bool doNotCreateTextOnFail,
1856 const WithPosition &textPos,
1857 const WithPosition &linkTextPos);
1858
1859 typename Delims::iterator
1860 checkForImage(typename Delims::iterator it,
1861 typename Delims::iterator last,
1862 TextParsingOpts<Trait> &po);
1863
1864 bool
1865 createShortcutLink(const typename MdBlock<Trait>::Data &text,
1866 TextParsingOpts<Trait> &po,
1867 long long int startLine,
1868 long long int startPos,
1869 long long int lastLineForText,
1870 long long int lastPosForText,
1871 typename Delims::iterator lastIt,
1872 const typename MdBlock<Trait>::Data &linkText,
1873 bool doNotCreateTextOnFail,
1874 const WithPosition &textPos,
1875 const WithPosition &linkTextPos);
1876
1877 typename Delims::iterator
1878 checkForLink(typename Delims::iterator it,
1879 typename Delims::iterator last,
1880 TextParsingOpts<Trait> &po);
1881
1882 Delims
1883 collectDelimiters(const typename MdBlock<Trait>::Data &fr);
1884
1885 std::pair<typename Trait::String, bool>
1886 readHtmlTag(typename Delims::iterator it, TextParsingOpts<Trait> &po);
1887
1888 typename Delims::iterator
1889 findIt(typename Delims::iterator it,
1890 typename Delims::iterator last,
1891 TextParsingOpts<Trait> &po);
1892
1893 typename Delims::iterator
1894 eatRawHtmlTillEmptyLine(typename Delims::iterator it,
1895 typename Delims::iterator last,
1896 long long int line,
1897 long long int pos,
1898 TextParsingOpts<Trait> &po,
1899 int htmlRule,
1900 bool onLine,
1901 bool continueEating = false);
1902
1903 void
1904 finishRule1HtmlTag(typename Delims::iterator it,
1905 typename Delims::iterator last,
1906 TextParsingOpts<Trait> &po,
1907 bool skipFirst);
1908
1909 void
1910 finishRule2HtmlTag(typename Delims::iterator it,
1911 typename Delims::iterator last,
1912 TextParsingOpts<Trait> &po);
1913
1914 void
1915 finishRule3HtmlTag(typename Delims::iterator it,
1916 typename Delims::iterator last,
1917 TextParsingOpts<Trait> &po);
1918
1919 void
1920 finishRule4HtmlTag(typename Delims::iterator it,
1921 typename Delims::iterator last,
1922 TextParsingOpts<Trait> &po);
1923
1924 void
1925 finishRule5HtmlTag(typename Delims::iterator it,
1926 typename Delims::iterator last,
1927 TextParsingOpts<Trait> &po);
1928
1929 void
1930 finishRule6HtmlTag(typename Delims::iterator it,
1931 typename Delims::iterator last,
1932 TextParsingOpts<Trait> &po);
1933
1934 void
1935 finishRule7HtmlTag(typename Delims::iterator it,
1936 typename Delims::iterator last,
1937 TextParsingOpts<Trait> &po);
1938
1939 typename Delims::iterator
1940 finishRawHtmlTag(typename Delims::iterator it,
1941 typename Delims::iterator last,
1942 TextParsingOpts<Trait> &po,
1943 bool skipFirst);
1944
1945 int
1946 htmlTagRule(typename Delims::iterator it,
1947 typename Delims::iterator last,
1948 TextParsingOpts<Trait> &po);
1949
1950 typename Delims::iterator
1951 checkForRawHtml(typename Delims::iterator it,
1952 typename Delims::iterator last,
1953 TextParsingOpts<Trait> &po);
1954
1955 typename Delims::iterator
1956 checkForMath(typename Delims::iterator it,
1957 typename Delims::iterator last,
1958 TextParsingOpts<Trait> &po);
1959
1960 typename Delims::iterator
1961 checkForAutolinkHtml(typename Delims::iterator it,
1962 typename Delims::iterator last,
1963 TextParsingOpts<Trait> &po,
1964 bool updatePos);
1965
1966 typename Delims::iterator
1967 checkForInlineCode(typename Delims::iterator it,
1968 typename Delims::iterator last,
1969 TextParsingOpts<Trait> &po);
1970
1971 std::pair<typename MdBlock<Trait>::Data, typename Delims::iterator>
1972 readTextBetweenSquareBrackets(typename Delims::iterator start,
1973 typename Delims::iterator it,
1974 typename Delims::iterator last,
1975 TextParsingOpts<Trait> &po,
1976 bool doNotCreateTextOnFail,
1977 WithPosition *pos = nullptr);
1978
1979 std::pair<typename MdBlock<Trait>::Data, typename Delims::iterator>
1980 checkForLinkText(typename Delims::iterator it,
1981 typename Delims::iterator last,
1982 TextParsingOpts<Trait> &po,
1983 WithPosition *pos = nullptr);
1984
1985 std::pair<typename MdBlock<Trait>::Data, typename Delims::iterator>
1986 checkForLinkLabel(typename Delims::iterator it,
1987 typename Delims::iterator last,
1988 TextParsingOpts<Trait> &po,
1989 WithPosition *pos = nullptr);
1990
1991 std::tuple<typename Trait::String, typename Trait::String, typename Delims::iterator, bool>
1992 checkForInlineLink(typename Delims::iterator it,
1993 typename Delims::iterator last,
1994 TextParsingOpts<Trait> &po,
1995 WithPosition *urlPos = nullptr);
1996
1997 inline std::tuple<typename Trait::String, typename Trait::String, typename Delims::iterator, bool>
1998 checkForRefLink(typename Delims::iterator it,
1999 typename Delims::iterator last,
2000 TextParsingOpts<Trait> &po,
2001 WithPosition *urlPos = nullptr);
2002
2003 typename Trait::String
2004 toSingleLine(const typename MdBlock<Trait>::Data &d);
2005
2006 template<class Func>
2007 typename Delims::iterator
2008 checkShortcut(typename Delims::iterator it,
2009 typename Delims::iterator last,
2010 TextParsingOpts<Trait> &po,
2011 Func functor)
2012 {
2013 const auto start = it;
2014
2015 typename MdBlock<Trait>::Data text;
2016
2017 WithPosition labelPos;
2018 std::tie(text, it) = checkForLinkLabel(start, last, po, &labelPos);
2019
2020 if (it != start && !toSingleLine(text).simplified().isEmpty()) {
2021 if ((this->*functor)(text, po, start->m_line, start->m_pos, start->m_line,
2022 start->m_pos + start->m_len, it, {}, false, labelPos, {})) {
2023 return it;
2024 }
2025 }
2026
2027 return start;
2028 }
2029
2030 bool
2031 isSequence(typename Delims::iterator it,
2032 long long int itLine,
2033 long long int itPos,
2034 typename Delimiter::DelimiterType t);
2035
2036 std::pair<typename Delims::iterator, typename Delims::iterator>
2037 readSequence(typename Delims::iterator first,
2038 typename Delims::iterator it,
2039 typename Delims::iterator last,
2040 long long int &pos,
2041 long long int &length,
2042 long long int &itCount,
2043 long long int &lengthFromIt,
2044 long long int &itCountFromIt);
2045
2046 typename Delims::iterator
2047 readSequence(typename Delims::iterator it,
2048 typename Delims::iterator last,
2049 long long int &line,
2050 long long int &pos,
2051 long long int &len,
2052 long long int &itCount);
2053
2054 int
2055 emphasisToInt(typename Delimiter::DelimiterType t);
2056
2057 void
2058 createStyles(std::vector<std::pair<Style, long long int>> & styles,
2059 typename Delimiter::DelimiterType t,
2060 long long int style);
2061
2062 std::vector<std::pair<Style, long long int>>
2063 createStyles(typename Delimiter::DelimiterType t,
2064 const std::vector<long long int> &styles,
2065 long long int lastStyle);
2066
2067 std::tuple<bool, std::vector<std::pair<Style, long long int>>, long long int, long long int>
2068 isStyleClosed(typename Delims::iterator first,
2069 typename Delims::iterator it,
2070 typename Delims::iterator last,
2071 typename Delims::iterator &stackBottom,
2072 TextParsingOpts<Trait> &po);
2073
2074 typename Delims::iterator
2075 incrementIterator(typename Delims::iterator it,
2076 typename Delims::iterator last,
2077 long long int count);
2078
2079 typename Delims::iterator
2080 checkForStyle(typename Delims::iterator first,
2081 typename Delims::iterator it,
2082 typename Delims::iterator last,
2083 typename Delims::iterator &stackBottom,
2084 TextParsingOpts<Trait> &po);
2085
2086 bool
2087 isNewBlockIn(MdBlock<Trait> &fr,
2088 long long int startLine,
2089 long long int endLine);
2090
2091 void
2092 makeInlineCode(long long int startLine,
2093 long long int startPos,
2094 long long int lastLine,
2095 long long int lastPos,
2096 TextParsingOpts<Trait> &po,
2097 typename Delims::iterator startDelimIt,
2098 typename Delims::iterator endDelimIt);
2099
2101 defaultParagraphOptimization() const
2102 {
2103 return (m_fullyOptimizeParagraphs ? OptimizeParagraphType::Full :
2105 }
2106
2107private:
2108 //! Used in tests.
2109 friend struct PrivateAccess;
2110
2111private:
2112 typename Trait::StringList m_parsedFiles;
2113 TextPluginsMap<Trait> m_textPlugins;
2114 bool m_fullyOptimizeParagraphs = true;
2115
2117}; // class Parser
2118
2119//
2120// Parser
2121//
2122
2123template<class Trait>
2124inline std::shared_ptr<Document<Trait>>
2125Parser<Trait>::parse(const typename Trait::String &fileName,
2126 bool recursive,
2127 const typename Trait::StringList &ext,
2128 bool fullyOptimizeParagraphs)
2129{
2130 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2131
2132 std::shared_ptr<Document<Trait>> doc(new Document<Trait>);
2133
2134 parseFile(fileName, recursive, doc, ext);
2135
2136 clearCache();
2137
2138 return doc;
2139}
2140
2141template<class Trait>
2142inline std::shared_ptr<Document<Trait>>
2143Parser<Trait>::parse(typename Trait::TextStream &stream,
2144 const typename Trait::String &path,
2145 const typename Trait::String &fileName,
2146 bool fullyOptimizeParagraphs)
2147{
2148 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2149
2150 std::shared_ptr<Document<Trait>> doc(new Document<Trait>);
2151
2152 parseStream(stream, path, fileName, false, doc, typename Trait::StringList());
2153
2154 clearCache();
2155
2156 return doc;
2157}
2158
2159template<class Trait>
2161
2162#ifdef MD4QT_QT_SUPPORT
2163
2164//! Wrapper for QTextStream.
2165template<>
2167{
2168public:
2170 : m_stream(stream)
2171 , m_lastBuf(false)
2172 , m_pos(0)
2173 {
2174 }
2175
2176 bool
2177 atEnd() const
2178 {
2179 return (m_lastBuf && m_pos == m_buf.size());
2180 }
2181
2182 QString
2184 {
2185 QString line;
2186 bool rFound = false;
2187
2188 while (!atEnd()) {
2189 const auto c = getChar();
2190
2191 if (rFound && c != QLatin1Char('\n')) {
2192 --m_pos;
2193
2194 return line;
2195 }
2196
2197 if (c == QLatin1Char('\r')) {
2198 rFound = true;
2199
2200 continue;
2201 } else if (c == QLatin1Char('\n')) {
2202 return line;
2203 }
2204
2205 if (!c.isNull()) {
2206 line.push_back(c);
2207 }
2208 }
2209
2210 return line;
2211 }
2212
2213private:
2214 void
2215 fillBuf()
2216 {
2217 m_buf = m_stream.read(512);
2218
2219 if (m_stream.atEnd()) {
2220 m_lastBuf = true;
2221 }
2222
2223 m_pos = 0;
2224 }
2225
2226 QChar
2227 getChar()
2228 {
2229 if (m_pos < m_buf.size()) {
2230 return m_buf.at(m_pos++);
2231 } else if (!atEnd()) {
2232 fillBuf();
2233
2234 return getChar();
2235 } else {
2236 return QChar();
2237 }
2238 }
2239
2240private:
2241 QTextStream &m_stream;
2242 QString m_buf;
2243 bool m_lastBuf;
2244 long long int m_pos;
2245}; // class TextStream
2246
2247#endif
2248
2249#ifdef MD4QT_ICU_STL_SUPPORT
2250
2251//! Wrapper for std::istream.
2252template<>
2254{
2255public:
2256 TextStream(std::istream &stream)
2257 : m_pos(0)
2258 {
2259 std::vector<unsigned char> content;
2260
2261 stream.seekg(0, std::ios::end);
2262 const auto ssize = stream.tellg();
2263 content.resize((size_t)ssize + 1);
2264 stream.seekg(0, std::ios::beg);
2265 stream.read((char *)&content[0], ssize);
2266 content[(size_t)ssize] = 0;
2267
2268 const auto z = std::count(content.cbegin(), content.cend(), 0);
2269
2270 if (z > 1) {
2271 std::vector<unsigned char> tmp;
2272 tmp.resize(content.size() + (z - 1) * 2);
2273
2274 for (size_t i = 0, j = 0; i < content.size() - 1; ++i, ++j) {
2275 if (content[i] == 0) {
2276 // 0xFFFD - replacement character in UTF-8.
2277 tmp[j++] = 0xEF;
2278 tmp[j++] = 0xBF;
2279 tmp[j] = 0xBD;
2280 } else {
2281 tmp[j] = content[i];
2282 }
2283 }
2284
2285 tmp[tmp.size() - 1] = 0;
2286
2287 std::swap(content, tmp);
2288 }
2289
2290 m_str = UnicodeString::fromUTF8((char *)&content[0]);
2291 }
2292
2293 bool
2294 atEnd() const
2295 {
2296 return m_pos == m_str.size();
2297 }
2298
2301 {
2302 UnicodeString line;
2303
2304 bool rFound = false;
2305
2306 while (!atEnd()) {
2307 const auto c = getChar();
2308
2309 if (rFound && c != UnicodeChar('\n')) {
2310 --m_pos;
2311
2312 return line;
2313 }
2314
2315 if (c == UnicodeChar('\r')) {
2316 rFound = true;
2317
2318 continue;
2319 } else if (c == UnicodeChar('\n')) {
2320 return line;
2321 }
2322
2323 if (!c.isNull()) {
2324 line.push_back(c);
2325 }
2326 }
2327
2328 return line;
2329 }
2330
2331private:
2333 getChar()
2334 {
2335 if (!atEnd()) {
2336 return m_str[m_pos++];
2337 } else {
2338 return UnicodeChar();
2339 }
2340 }
2341
2342private:
2343 UnicodeString m_str;
2344 long long int m_pos;
2345};
2346
2347#endif
2348
2349//! \return Is HTML comment closed?
2350template<class Trait>
2351inline bool
2352checkForEndHtmlComments(const typename Trait::String &line,
2353 long long int pos)
2354{
2355 const long long int e = line.indexOf(Trait::latin1ToString("-->"), pos);
2356
2357 if (e != -1) {
2358 return isHtmlComment<Trait>(line.sliced(0, e + 3));
2359 }
2360
2361 return false;
2362}
2363
2364//! Collect information about HTML comments.
2365template<class Trait>
2366inline void
2367checkForHtmlComments(const typename Trait::InternalString &line,
2370{
2371 long long int p = 0, l = stream.currentStreamPos();
2372
2373 const auto &str = line.asString();
2374
2375 while ((p = str.indexOf(Trait::latin1ToString(s_startComment), p)) != -1) {
2376 bool addNegative = false;
2377
2378 auto c = str.sliced(p);
2379
2380 if (c.startsWith(Trait::latin1ToString("<!-->"))) {
2381 res.insert({line.virginPos(p), {0, true}});
2382
2383 p += 5;
2384
2385 continue;
2386 } else if (c.startsWith(Trait::latin1ToString("<!--->"))) {
2387 res.insert({line.virginPos(p), {1, true}});
2388
2389 p += 6;
2390
2391 continue;
2392 }
2393
2395 res.insert({line.virginPos(p), {2, true}});
2396 } else {
2397 addNegative = true;
2398
2399 for (; l < stream.size(); ++l) {
2400 c.push_back(Trait::latin1ToChar(' '));
2401 c.push_back(stream.lineAt(l).asString());
2402
2404 res.insert({line.virginPos(p), {2, true}});
2405
2406 addNegative = false;
2407
2408 break;
2409 }
2410 }
2411 }
2412
2413 if (addNegative) {
2414 res.insert({line.virginPos(p), {-1, false}});
2415 }
2416
2417 ++p;
2418 }
2419}
2420
2421template<class Trait>
2422inline std::pair<long long int, bool>
2424 std::shared_ptr<Block<Trait>> parent,
2425 std::shared_ptr<Document<Trait>> doc,
2426 typename Trait::StringList &linksToParse,
2427 const typename Trait::String &workingPath,
2428 const typename Trait::String &fileName,
2429 bool collectRefLinks)
2430{
2431 auto clearCtx = [&ctx] () {
2432 ctx.m_fragment.clear();
2433 ctx.m_type = BlockType::EmptyLine;
2434 ctx.m_emptyLineInList = false;
2435 ctx.m_fensedCodeInList = false;
2436 ctx.m_emptyLinesCount = 0;
2437 ctx.m_lineCounter = 0;
2438 ctx.m_indents.clear();
2439 ctx.m_indent = {-1, -1};
2440 ctx.m_startOfCode.clear();
2441 ctx.m_startOfCodeInList.clear();
2442 };
2443
2444 if (!ctx.m_fragment.empty()) {
2445 MdBlock<Trait> block = {ctx.m_fragment, ctx.m_emptyLinesBefore, ctx.m_emptyLinesCount > 0};
2446
2447 const auto line = parseFragment(block, parent, doc, linksToParse, workingPath,
2448 fileName, collectRefLinks, ctx.m_html);
2449
2450 assert(line != ctx.m_fragment.front().second.m_lineNumber);
2451
2452 if (line > 0) {
2453 if (ctx.m_html.m_html) {
2454 if (!collectRefLinks) {
2455 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2456 }
2457
2458 resetHtmlTag<Trait>(ctx.m_html);
2459 }
2460
2461 const auto it = ctx.m_fragment.cbegin() + (line - ctx.m_fragment.cbegin()->second.m_lineNumber);
2462
2463 MdBlock<Trait> tmp = {{}, ctx.m_emptyLinesBefore, false};
2464 std::copy(ctx.m_fragment.cbegin(), it, std::back_inserter(tmp.m_data));
2465
2466 long long int emptyLines = 0;
2467
2468 while (!tmp.m_data.empty() && tmp.m_data.back().first.asString().simplified().isEmpty()) {
2469 tmp.m_data.pop_back();
2470 tmp.m_emptyLineAfter = true;
2471 ++emptyLines;
2472 }
2473
2474 if (!tmp.m_data.empty()) {
2475 ctx.m_splitted.push_back(tmp);
2476 }
2477
2478 const auto retLine = it->second.m_lineNumber;
2479 const auto retMayBreakList = it->second.m_mayBreakList;
2480
2481 clearCtx();
2482
2483 ctx.m_emptyLinesBefore = emptyLines;
2484
2485 return {retLine, retMayBreakList};
2486 }
2487
2488 ctx.m_splitted.push_back({ctx.m_fragment, ctx.m_emptyLinesBefore, ctx.m_emptyLinesCount > 0});
2489 }
2490
2491 clearCtx();
2492
2493 return {-1, false};
2494}
2495
2496//! Replace tabs with spaces (just for internal simpler use).
2497template<class Trait>
2498inline void
2499replaceTabs(typename Trait::InternalString &s)
2500{
2501 unsigned char size = 4;
2502 long long int len = s.length();
2503
2504 for (long long int i = 0; i < len; ++i, --size) {
2505 if (s[i] == Trait::latin1ToChar('\t')) {
2506 s.replaceOne(i, 1, typename Trait::String(size, Trait::latin1ToChar(' ')));
2507
2508 len += size - 1;
2509 i += size - 1;
2510 size = 5;
2511 }
2512
2513 if (size == 1) {
2514 size = 5;
2515 }
2516 }
2517}
2518
2519template<class Trait>
2520inline void
2522 StringListStream<Trait> &stream,
2523 std::shared_ptr<Block<Trait>> parent,
2524 std::shared_ptr<Document<Trait>> doc,
2525 typename Trait::StringList &linksToParse,
2526 const typename Trait::String &workingPath,
2527 const typename Trait::String &fileName,
2528 bool collectRefLinks)
2529{
2530 long long int emptyLinesCount = 0;
2531 bool wasEmptyLine = false;
2532
2533 while (!stream.atEnd()) {
2534 const auto currentLineNumber = stream.currentLineNumber();
2535
2536 typename Trait::InternalString line;
2537 bool mayBreak;
2538
2539 std::tie(line, mayBreak) = readLine(ctx, stream);
2540
2541 replaceTabs<Trait>(line);
2542
2543 const auto ns = skipSpaces<Trait>(0, line.asString());
2544
2545 if (ns == line.length() || line.asString().startsWith(Trait::latin1ToString(" "))) {
2546 if (ns == line.length()) {
2547 ++emptyLinesCount;
2548 wasEmptyLine = true;
2549 } else {
2550 emptyLinesCount = 0;
2551 }
2552
2553 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2554 } else if (!wasEmptyLine) {
2555 if (isFootnote<Trait>(line.sliced(ns).asString())) {
2556 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2557
2558 ctx.m_lineType = BlockType::Footnote;
2559
2560 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2561
2562 continue;
2563 } else {
2564 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2565 }
2566 } else {
2567 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2568
2569 ctx.m_lineType =
2570 whatIsTheLine(line, false, false, false, &ctx.m_startOfCodeInList, &ctx.m_indent,
2571 ctx.m_lineType == BlockType::EmptyLine, true, &ctx.m_indents);
2572
2573 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2574
2575 if (ctx.m_type == BlockType::Footnote) {
2576 wasEmptyLine = false;
2577
2578 continue;
2579 } else {
2580 break;
2581 }
2582 }
2583 }
2584
2585 if (stream.atEnd() && !ctx.m_fragment.empty()) {
2586 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2587 }
2588}
2589
2590template<class Trait>
2591inline void
2592Parser<Trait>::finishHtml(ParserContext &ctx,
2593 std::shared_ptr<Block<Trait>> parent,
2594 std::shared_ptr<Document<Trait>> doc,
2595 bool collectRefLinks,
2596 bool top,
2597 bool dontProcessLastFreeHtml)
2598{
2599 if (!collectRefLinks || top) {
2600 if (ctx.m_html.m_html->isFreeTag()) {
2601 if (!dontProcessLastFreeHtml) {
2602 if (ctx.m_html.m_parent) {
2603 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2604
2605 updateLastPosInList(ctx.m_html);
2606 } else {
2607 parent->appendItem(ctx.m_html.m_html);
2608 }
2609 }
2610 } else {
2611 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
2612 p->appendItem(ctx.m_html.m_html);
2613 p->setStartColumn(ctx.m_html.m_html->startColumn());
2614 p->setStartLine(ctx.m_html.m_html->startLine());
2615 p->setEndColumn(ctx.m_html.m_html->endColumn());
2616 p->setEndLine(ctx.m_html.m_html->endLine());
2617 doc->appendItem(p);
2618 }
2619 }
2620
2621 if (!dontProcessLastFreeHtml) {
2622 resetHtmlTag(ctx.m_html);
2623 }
2624
2625 ctx.m_html.m_toAdjustLastPos.clear();
2626}
2627
2628template<class Trait>
2629inline void
2630Parser<Trait>::makeLineMain(ParserContext &ctx,
2631 const typename Trait::InternalString &line,
2632 long long int emptyLinesCount,
2633 const ListIndent &currentIndent,
2634 long long int ns,
2635 long long int currentLineNumber)
2636{
2637 if (ctx.m_html.m_htmlBlockType >= 6) {
2638 ctx.m_html.m_continueHtml = (emptyLinesCount <= 0);
2639 }
2640
2641 ctx.m_type = ctx.m_lineType;
2642
2643 switch (ctx.m_type) {
2644 case BlockType::List:
2645 case BlockType::ListWithFirstEmptyLine: {
2646 if (ctx.m_indents.empty())
2647 ctx.m_indents.push_back(currentIndent.m_indent);
2648
2649 ctx.m_indent = currentIndent;
2650 } break;
2651
2652 case BlockType::Code:
2653 ctx.m_startOfCode = startSequence<Trait>(line.asString());
2654 break;
2655
2656 default:
2657 break;
2658 }
2659
2660 if (!line.isEmpty() && ns < line.length()) {
2661 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2662 }
2663
2664 ctx.m_lineCounter = 1;
2665 ctx.m_emptyLinesCount = 0;
2666 ctx.m_emptyLinesBefore = emptyLinesCount;
2667}
2668
2669template<class Trait>
2670inline std::pair<long long int, bool>
2671Parser<Trait>::parseFragmentAndMakeNextLineMain(ParserContext &ctx,
2672 std::shared_ptr<Block<Trait>> parent,
2673 std::shared_ptr<Document<Trait>> doc,
2674 typename Trait::StringList &linksToParse,
2675 const typename Trait::String &workingPath,
2676 const typename Trait::String &fileName,
2677 bool collectRefLinks,
2678 const typename Trait::InternalString &line,
2679 const ListIndent &currentIndent,
2680 long long int ns,
2681 long long int currentLineNumber)
2682{
2683 const auto empty = ctx.m_emptyLinesCount;
2684
2685 const auto ret = parseFragment(ctx, parent, doc, linksToParse, workingPath,
2686 fileName, collectRefLinks);
2687
2688 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2689
2690 return ret;
2691}
2692
2693template<class Trait>
2694inline bool
2695Parser<Trait>::isListType(BlockType t)
2696{
2697 switch (t) {
2698 case BlockType::List:
2699 case BlockType::ListWithFirstEmptyLine:
2700 return true;
2701
2702 default:
2703 return false;
2704 }
2705}
2706
2707template<class Trait>
2708std::pair<typename Trait::InternalString, bool>
2709Parser<Trait>::readLine(typename Parser<Trait>::ParserContext &ctx,
2711{
2712 ctx.m_htmlCommentData.clear();
2713
2714 auto line = stream.readLine();
2715
2716 static const char16_t c_zeroReplaceWith[2] = {0xFFFD, 0};
2717
2718 line.first.replace(typename Trait::Char(0), Trait::utf16ToString(&c_zeroReplaceWith[0]));
2719
2720 checkForHtmlComments(line.first, stream, ctx.m_htmlCommentData);
2721
2722 return line;
2723}
2724
2725template<class Trait>
2726inline std::pair<long long int, bool>
2727Parser<Trait>::parseFirstStep(ParserContext &ctx,
2729 std::shared_ptr<Block<Trait>> parent,
2730 std::shared_ptr<Document<Trait>> doc,
2731 typename Trait::StringList &linksToParse,
2732 const typename Trait::String &workingPath,
2733 const typename Trait::String &fileName,
2734 bool collectRefLinks)
2735{
2736 while (!stream.atEnd()) {
2737 const auto currentLineNumber = stream.currentLineNumber();
2738
2739 typename Trait::InternalString line;
2740 bool mayBreak;
2741
2742 std::tie(line, mayBreak) = readLine(ctx, stream);
2743
2744 if (ctx.m_lineType != BlockType::Unknown) {
2745 ctx.m_prevLineType = ctx.m_lineType;
2746 }
2747
2748 ctx.m_lineType = whatIsTheLine(line,
2749 (ctx.m_emptyLineInList || isListType(ctx.m_type)),
2750 ctx.m_prevLineType == BlockType::ListWithFirstEmptyLine,
2751 ctx.m_fensedCodeInList,
2752 &ctx.m_startOfCodeInList,
2753 &ctx.m_indent,
2754 ctx.m_lineType == BlockType::EmptyLine,
2755 true,
2756 &ctx.m_indents);
2757
2758 if (isListType(ctx.m_type) && ctx.m_lineType == BlockType::FensedCodeInList) {
2759 ctx.m_fensedCodeInList = !ctx.m_fensedCodeInList;
2760 }
2761
2762 const auto currentIndent = ctx.m_indent;
2763
2764 const auto ns = skipSpaces<Trait>(0, line.asString());
2765
2766 const auto indentInListValue = indentInList(&ctx.m_indents, ns, true);
2767
2768 if (isListType(ctx.m_lineType) && !ctx.m_fensedCodeInList && ctx.m_indent.m_level > -1) {
2769 if (ctx.m_indent.m_level < (long long int)ctx.m_indents.size()) {
2770 ctx.m_indents.erase(ctx.m_indents.cbegin() + ctx.m_indent.m_level, ctx.m_indents.cend());
2771 }
2772
2773 ctx.m_indents.push_back(ctx.m_indent.m_indent);
2774 }
2775
2776 if (ctx.m_type == BlockType::CodeIndentedBySpaces && ns > 3) {
2777 ctx.m_lineType = BlockType::CodeIndentedBySpaces;
2778 }
2779
2780 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2 &&
2781 !isListType(ctx.m_lineType)) {
2782 if (ctx.m_emptyLinesCount > 0) {
2783 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2784 parent,
2785 doc,
2786 linksToParse,
2787 workingPath,
2788 fileName,
2789 collectRefLinks,
2790 line,
2791 currentIndent,
2792 ns,
2793 currentLineNumber);
2794
2795 if (l.first != -1) {
2796 return l;
2797 }
2798
2799 continue;
2800 } else {
2801 ctx.m_emptyLineInList = false;
2802 ctx.m_emptyLinesCount = 0;
2803 }
2804 }
2805
2806 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2) {
2807 ctx.m_type = BlockType::List;
2808 }
2809
2810 // Footnote.
2811 if (ctx.m_lineType == BlockType::Footnote) {
2812 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2813 parent,
2814 doc,
2815 linksToParse,
2816 workingPath,
2817 fileName,
2818 collectRefLinks,
2819 line,
2820 currentIndent,
2821 ns,
2822 currentLineNumber);
2823
2824 if (l.first != -1) {
2825 return l;
2826 }
2827
2828 eatFootnote(ctx, stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2829
2830 continue;
2831 }
2832
2833 // First line of the fragment.
2834 if (ns != line.length() && ctx.m_type == BlockType::EmptyLine) {
2835 makeLineMain(ctx, line, ctx.m_emptyLinesCount, currentIndent, ns, currentLineNumber);
2836
2837 continue;
2838 } else if (ns == line.length() && ctx.m_type == BlockType::EmptyLine) {
2839 ++ctx.m_emptyLinesCount;
2840 continue;
2841 }
2842
2843 ++ctx.m_lineCounter;
2844
2845 // Got new empty line.
2846 if (ns == line.length()) {
2847 ++ctx.m_emptyLinesCount;
2848
2849 switch (ctx.m_type) {
2850 case BlockType::Blockquote: {
2851 const auto l = parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName,
2852 collectRefLinks);
2853
2854 if (l.first != -1) {
2855 return l;
2856 }
2857
2858 continue;
2859 }
2860
2861 case BlockType::Text:
2862 case BlockType::CodeIndentedBySpaces:
2863 continue;
2864 break;
2865
2866 case BlockType::Code: {
2867 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2868 ctx.m_emptyLinesCount = 0;
2869
2870 continue;
2871 }
2872
2873 case BlockType::List:
2874 case BlockType::ListWithFirstEmptyLine: {
2875 ctx.m_emptyLineInList = true;
2876
2877 continue;
2878 }
2879
2880 default:
2881 break;
2882 }
2883 }
2884 // Empty new line in list.
2885 else if (ctx.m_emptyLineInList) {
2886 if (indentInListValue || isListType(ctx.m_lineType) || ctx.m_lineType == BlockType::SomethingInList) {
2887 for (long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2888 ctx.m_fragment.push_back({typename Trait::String(),
2889 {currentLineNumber - ctx.m_emptyLinesCount + i, {}, false}});
2890 }
2891
2892 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2893
2894 ctx.m_emptyLineInList = false;
2895 ctx.m_emptyLinesCount = 0;
2896
2897 continue;
2898 } else {
2899 const auto empty = ctx.m_emptyLinesCount;
2900
2901 const auto l = parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName,
2902 collectRefLinks);
2903
2904 if (l.first != -1) {
2905 return l;
2906 }
2907
2908 ctx.m_lineType = whatIsTheLine(line, false, false, false, nullptr, nullptr,
2909 true, false, &ctx.m_indents);
2910
2911 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2912
2913 continue;
2914 }
2915 } else if (ctx.m_emptyLinesCount > 0) {
2916 if (ctx.m_type == BlockType::CodeIndentedBySpaces &&
2917 ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2918 const auto indent = skipSpaces<Trait>(0, ctx.m_fragment.front().first.asString());
2919
2920 for (long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2921 ctx.m_fragment.push_back({typename Trait::String(indent, Trait::latin1ToChar(' ')),
2922 {currentLineNumber - ctx.m_emptyLinesCount + i, {}, false}});
2923 }
2924
2925 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2926 ctx.m_emptyLinesCount = 0;
2927 } else {
2928 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2929 parent,
2930 doc,
2931 linksToParse,
2932 workingPath,
2933 fileName,
2934 collectRefLinks,
2935 line,
2936 currentIndent,
2937 ns,
2938 currentLineNumber);
2939
2940 if (l.first != -1) {
2941 return l;
2942 }
2943 }
2944
2945 continue;
2946 }
2947
2948 // Something new and first block is not a code block or a list, blockquote.
2949 if (ctx.m_type != ctx.m_lineType && ctx.m_type != BlockType::Code &&
2950 !isListType(ctx.m_type) && ctx.m_type != BlockType::Blockquote) {
2951 if (ctx.m_type == BlockType::Text && ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2952 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2953 }
2954 else {
2955 if (ctx.m_type == BlockType::Text && isListType(ctx.m_lineType)) {
2956 if (ctx.m_lineType != BlockType::ListWithFirstEmptyLine) {
2957 int num = 0;
2958
2959 if (isOrderedList<Trait>(line.asString(), &num)) {
2960 if (num != 1) {
2961 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2962
2963 continue;
2964 }
2965 }
2966 } else {
2967 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2968
2969 continue;
2970 }
2971 }
2972
2973 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2974 parent,
2975 doc,
2976 linksToParse,
2977 workingPath,
2978 fileName,
2979 collectRefLinks,
2980 line,
2981 currentIndent,
2982 ns,
2983 currentLineNumber);
2984
2985 if (l.first != -1) {
2986 return l;
2987 }
2988 }
2989 }
2990 // End of code block.
2991 else if (ctx.m_type == BlockType::Code && ctx.m_type == ctx.m_lineType &&
2992 !ctx.m_startOfCode.isEmpty() &&
2993 startSequence<Trait>(line.asString()).contains(ctx.m_startOfCode) &&
2994 isCodeFences<Trait>(line.asString(), true)) {
2995 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2996
2997 if (!stream.atEnd()) {
2998 typename Trait::InternalString line;
2999
3000 std::tie(line, std::ignore) = readLine(ctx, stream);
3001
3002 if (line.asString().simplified().isEmpty()) {
3003 ++ctx.m_emptyLinesCount;
3004 }
3005
3006 stream.setLineNumber(stream.currentLineNumber() - 1);
3007 } else {
3008 ++ctx.m_emptyLinesCount;
3009 }
3010
3011 const auto l = parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName,
3012 collectRefLinks);
3013
3014 if (l.first != -1) {
3015 return l;
3016 }
3017 }
3018 // Not a continue of list.
3019 else if (ctx.m_type != ctx.m_lineType && isListType(ctx.m_type) &&
3020 ctx.m_lineType != BlockType::SomethingInList &&
3021 ctx.m_lineType != BlockType::FensedCodeInList && !isListType(ctx.m_lineType)) {
3022 const auto l = parseFragmentAndMakeNextLineMain(ctx,
3023 parent,
3024 doc,
3025 linksToParse,
3026 workingPath,
3027 fileName,
3028 collectRefLinks,
3029 line,
3030 currentIndent,
3031 ns,
3032 currentLineNumber);
3033
3034 if (l.first != -1) {
3035 return l;
3036 }
3037 } else if (ctx.m_type == BlockType::Heading) {
3038 const auto l = parseFragmentAndMakeNextLineMain(ctx,
3039 parent,
3040 doc,
3041 linksToParse,
3042 workingPath,
3043 fileName,
3044 collectRefLinks,
3045 line,
3046 currentIndent,
3047 ns,
3048 currentLineNumber);
3049
3050 if (l.first != -1) {
3051 return l;
3052 }
3053 } else {
3054 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
3055 }
3056
3057 ctx.m_emptyLinesCount = 0;
3058 }
3059
3060 if (!ctx.m_fragment.empty()) {
3061 if (ctx.m_type == BlockType::Code && !ctx.m_html.m_html && !ctx.m_html.m_continueHtml) {
3062 ctx.m_fragment.push_back({ctx.m_startOfCode, {-1, {}, false}});
3063 }
3064
3065 const auto l = parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName,
3066 collectRefLinks);
3067
3068 if (l.first != -1) {
3069 return l;
3070 }
3071 }
3072
3073 return {-1, false};
3074}
3075
3076template<class Trait>
3077inline void
3078Parser<Trait>::parseSecondStep(ParserContext &ctx,
3079 std::shared_ptr<Block<Trait>> parent,
3080 std::shared_ptr<Document<Trait>> doc,
3081 typename Trait::StringList &linksToParse,
3082 const typename Trait::String &workingPath,
3083 const typename Trait::String &fileName,
3084 bool collectRefLinks,
3085 bool top,
3086 bool dontProcessLastFreeHtml)
3087{
3088 if (top) {
3089 resetHtmlTag(ctx.m_html);
3090
3091 for (long long int i = 0; i < (long long int)ctx.m_splitted.size(); ++i) {
3092 parseFragment(ctx.m_splitted[i], parent, doc, linksToParse, workingPath, fileName, false,
3093 ctx.m_html);
3094
3095 if (ctx.m_html.m_htmlBlockType >= 6) {
3096 ctx.m_html.m_continueHtml = (!ctx.m_splitted[i].m_emptyLineAfter);
3097 }
3098
3099 if (ctx.m_html.m_html && !ctx.m_html.m_continueHtml) {
3100 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
3101 } else if (!ctx.m_html.m_html) {
3102 ctx.m_html.m_toAdjustLastPos.clear();
3103 }
3104 }
3105 }
3106
3107 if (ctx.m_html.m_html) {
3108 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
3109 }
3110}
3111
3112template<class Trait>
3113inline std::pair<RawHtmlBlock<Trait>, long long int>
3115 std::shared_ptr<Block<Trait>> parent,
3116 std::shared_ptr<Document<Trait>> doc,
3117 typename Trait::StringList &linksToParse,
3118 const typename Trait::String &workingPath,
3119 const typename Trait::String &fileName,
3120 bool collectRefLinks,
3121 bool top,
3122 bool dontProcessLastFreeHtml,
3123 bool stopOnMayBreakList)
3124{
3125 ParserContext ctx;
3126
3127 auto clearCtx = [&]()
3128 {
3129 ctx.m_fragment.clear();
3130 ctx.m_type = BlockType::EmptyLine;
3131 ctx.m_lineCounter = 0;
3132 };
3133
3134 auto line = parseFirstStep(ctx, stream, parent, doc, linksToParse, workingPath, fileName,
3135 collectRefLinks);
3136
3137 clearCtx();
3138
3139 while (line.first != -1 && !(stopOnMayBreakList && line.second)) {
3140 stream.setLineNumber(line.first);
3141
3142 line = parseFirstStep(ctx, stream, parent, doc, linksToParse, workingPath, fileName,
3143 collectRefLinks);
3144
3145 clearCtx();
3146 }
3147
3148 parseSecondStep(ctx, parent, doc, linksToParse, workingPath, fileName,
3149 collectRefLinks, top, dontProcessLastFreeHtml);
3150
3151 return {ctx.m_html, line.first};
3152}
3153
3154#ifdef MD4QT_QT_SUPPORT
3155
3156template<>
3157inline void
3158Parser<QStringTrait>::parseFile(const QString &fileName,
3159 bool recursive,
3160 std::shared_ptr<Document<QStringTrait>> doc,
3161 const QStringList &ext,
3162 QStringList *parentLinks)
3163{
3164 QFileInfo fi(fileName);
3165
3166 if (fi.exists() && ext.contains(fi.suffix().toLower())) {
3167 QFile f(fileName);
3168
3169 if (f.open(QIODevice::ReadOnly)) {
3170 QTextStream s(f.readAll());
3171 f.close();
3172
3173 parseStream(s, fi.absolutePath(), fi.fileName(), recursive, doc, ext, parentLinks);
3174 }
3175 }
3176}
3177
3178#endif
3179
3180#ifdef MD4QT_ICU_STL_SUPPORT
3181
3182template<>
3183inline void
3184Parser<UnicodeStringTrait>::parseFile(const UnicodeString &fileName,
3185 bool recursive,
3186 std::shared_ptr<Document<UnicodeStringTrait>> doc,
3187 const std::vector<UnicodeString> &ext,
3188 std::vector<UnicodeString> *parentLinks)
3189{
3190 if (UnicodeStringTrait::fileExists(fileName)) {
3191 std::string fn;
3192 fileName.toUTF8String(fn);
3193
3194 try {
3195 auto e = UnicodeString::fromUTF8(std::filesystem::u8path(fn).extension().u8string());
3196
3197 if (!e.isEmpty()) {
3198 e.remove(0, 1);
3199 }
3200
3201 if (std::find(ext.cbegin(), ext.cend(), e.toLower()) != ext.cend()) {
3202 auto path = std::filesystem::canonical(std::filesystem::u8path(fn));
3203 std::ifstream file(path.c_str(), std::ios::in | std::ios::binary);
3204
3205 if (file.good()) {
3206 const auto fileNameS = path.filename().u8string();
3207 auto workingDirectory = path.remove_filename().u8string();
3208
3209 if (!workingDirectory.empty()) {
3210 workingDirectory.erase(workingDirectory.size() - 1, 1);
3211 }
3212
3213 std::replace(workingDirectory.begin(), workingDirectory.end(), '\\', '/');
3214
3215 parseStream(file, UnicodeString::fromUTF8(workingDirectory),
3216 UnicodeString::fromUTF8(fileNameS), recursive, doc, ext, parentLinks);
3217
3218 file.close();
3219 }
3220 }
3221 } catch (const std::exception &) {
3222 }
3223 }
3224}
3225
3226#endif
3227
3228//! Resolve links in the document.
3229template<class Trait>
3230void
3231resolveLinks(typename Trait::StringList &linksToParse,
3232 std::shared_ptr<Document<Trait>> doc)
3233{
3234 for (auto it = linksToParse.begin(), last = linksToParse.end(); it != last; ++it) {
3235 auto nextFileName = *it;
3236
3237 if (nextFileName.startsWith(Trait::latin1ToString("#"))) {
3238 const auto lit = doc->labeledLinks().find(nextFileName);
3239
3240 if (lit != doc->labeledLinks().cend()) {
3241 nextFileName = lit->second->url();
3242 } else {
3243 continue;
3244 }
3245 }
3246
3247 if (Trait::fileExists(nextFileName)) {
3248 *it = Trait::absoluteFilePath(nextFileName);
3249 }
3250 }
3251}
3252
3253template<class Trait>
3254inline void
3255Parser<Trait>::parseStream(typename Trait::TextStream &s,
3256 const typename Trait::String &workingPath,
3257 const typename Trait::String &fileName,
3258 bool recursive,
3259 std::shared_ptr<Document<Trait>> doc,
3260 const typename Trait::StringList &ext,
3261 typename Trait::StringList *parentLinks)
3262{
3263 typename Trait::StringList linksToParse;
3264
3265 const auto path = workingPath.isEmpty() ? typename Trait::String(fileName) :
3266 typename Trait::String(workingPath + Trait::latin1ToString("/") + fileName);
3267
3268 doc->appendItem(std::shared_ptr<Anchor<Trait>>(new Anchor<Trait>(path)));
3269
3270 typename MdBlock<Trait>::Data data;
3271
3272 {
3273 TextStream<Trait> stream(s);
3274
3275 long long int i = 0;
3276
3277 while (!stream.atEnd()) {
3278 data.push_back(std::pair<typename Trait::InternalString, MdLineData>(stream.readLine(), {i}));
3279 ++i;
3280 }
3281 }
3282
3283 StringListStream<Trait> stream(data);
3284
3285 parse(stream, doc, doc, linksToParse, workingPath, fileName, true, true);
3286
3287 m_parsedFiles.push_back(path);
3288
3289 resolveLinks<Trait>(linksToParse, doc);
3290
3291 // Parse all links if parsing is recursive.
3292 if (recursive && !linksToParse.empty()) {
3293 const auto tmpLinks = linksToParse;
3294
3295 while (!linksToParse.empty()) {
3296 auto nextFileName = linksToParse.front();
3297 linksToParse.erase(linksToParse.cbegin());
3298
3299 if (parentLinks) {
3300 const auto pit = std::find(parentLinks->cbegin(), parentLinks->cend(), nextFileName);
3301
3302 if (pit != parentLinks->cend()) {
3303 continue;
3304 }
3305 }
3306
3307 if (nextFileName.startsWith(Trait::latin1ToString("#"))) {
3308 continue;
3309 }
3310
3311 const auto pit = std::find(m_parsedFiles.cbegin(), m_parsedFiles.cend(), nextFileName);
3312
3313 if (pit == m_parsedFiles.cend()) {
3314 if (!doc->isEmpty() && doc->items().back()->type() != ItemType::PageBreak) {
3315 doc->appendItem(std::shared_ptr<PageBreak<Trait>>(new PageBreak<Trait>));
3316 }
3317
3318 parseFile(nextFileName, recursive, doc, ext, &linksToParse);
3319 }
3320 }
3321
3322 if (parentLinks) {
3323 std::copy(tmpLinks.cbegin(), tmpLinks.cend(), std::back_inserter(*parentLinks));
3324 }
3325 }
3326}
3327
3328//! \return Position of first character in list item.
3329template<class Trait>
3330inline long long int
3331posOfListItem(const typename Trait::String &s,
3332 bool ordered)
3333{
3334 long long int p = 0;
3335
3336 for (; p < s.size(); ++p) {
3337 if (!s[p].isSpace()) {
3338 break;
3339 }
3340 }
3341
3342 if (ordered) {
3343 for (; p < s.size(); ++p) {
3344 if (!s[p].isDigit()) {
3345 break;
3346 }
3347 }
3348 }
3349
3350 ++p;
3351
3352 long long int sc = 0;
3353
3354 for (; p < s.size(); ++p) {
3355 if (!s[p].isSpace()) {
3356 break;
3357 } else {
3358 ++sc;
3359 }
3360 }
3361
3362 if (p == s.length() || sc > 4) {
3363 p = p - sc + 1;
3364 } else if (sc == 0) {
3365 ++p;
3366 }
3367
3368 return p;
3369}
3370
3371//! \return Level in indents for the given position.
3372inline long long int
3373listLevel(const std::vector<long long int> &indents,
3374 long long int pos)
3375{
3376 long long int level = indents.size();
3377
3378 for (auto it = indents.crbegin(), last = indents.crend(); it != last; ++it) {
3379 if (pos >= *it) {
3380 break;
3381 } else {
3382 --level;
3383 }
3384 }
3385
3386 return level;
3387}
3388
3389template<class Trait>
3390inline typename Parser<Trait>::BlockType
3391Parser<Trait>::whatIsTheLine(typename Trait::InternalString &str,
3392 bool inList,
3393 bool inListWithFirstEmptyLine,
3394 bool fensedCodeInList,
3395 typename Trait::String *startOfCode,
3396 ListIndent *indent,
3397 bool emptyLinePreceded,
3398 bool calcIndent,
3399 const std::vector<long long int> *indents)
3400{
3401 replaceTabs<Trait>(str);
3402
3403 const auto first = skipSpaces<Trait>(0, str.asString());
3404
3405 if (first < str.length()) {
3406 auto s = str.sliced(first);
3407
3408 const bool isBlockquote = s.asString().startsWith(Trait::latin1ToString(">"));
3409 const bool indentIn = indentInList(indents, first, false);
3410 bool isHeading = false;
3411
3412 if (first < 4 && isFootnote<Trait>(s.asString())) {
3413 return BlockType::Footnote;
3414 }
3415
3416 if (s.asString().startsWith(Trait::latin1ToString("#")) &&
3417 (indent ? first - indent->m_indent < 4 : first < 4)) {
3418 long long int c = 0;
3419
3420 while (c < s.length() && s[c] == Trait::latin1ToChar('#')) {
3421 ++c;
3422 }
3423
3424 if (c <= 6 && ((c < s.length() && s[c].isSpace()) || c == s.length())) {
3425 isHeading = true;
3426 }
3427 }
3428
3429 if (inList) {
3430 bool isFirstLineEmpty = false;
3431 const auto orderedList = isOrderedList<Trait>(str.asString(), nullptr, nullptr, nullptr,
3432 &isFirstLineEmpty);
3433 const bool fensedCode = isCodeFences<Trait>(s.asString());
3434 const auto codeIndentedBySpaces = emptyLinePreceded && first >= 4 &&
3435 !indentInList(indents, first, true);
3436
3437 if (fensedCodeInList) {
3438 if (indentInList(indents, first, true)) {
3439 if (fensedCode) {
3440 if (startOfCode && startSequence<Trait>(s.asString()).contains(*startOfCode)) {
3441 return BlockType::FensedCodeInList;
3442 }
3443 }
3444
3445 return BlockType::SomethingInList;
3446 }
3447 }
3448
3449 if (fensedCode && indentIn) {
3450 if (startOfCode) {
3451 *startOfCode = startSequence<Trait>(s.asString());
3452 }
3453
3454 return BlockType::FensedCodeInList;
3455 } else if ((((s.asString().startsWith(Trait::latin1ToString("-")) ||
3456 s.asString().startsWith(Trait::latin1ToString("+")) ||
3457 s.asString().startsWith(Trait::latin1ToString("*"))) &&
3458 ((s.length() > 1 && s[1] == Trait::latin1ToChar(' ')) || s.length() == 1)) ||
3459 orderedList) && (first < 4 || indentIn)) {
3460 if (codeIndentedBySpaces) {
3461 return BlockType::CodeIndentedBySpaces;
3462 }
3463
3464 if (indent && calcIndent) {
3465 indent->m_indent = posOfListItem<Trait>(str.asString(), orderedList);
3466 indent->m_level = (indents ? listLevel(*indents, first) : -1);
3467 }
3468
3469 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3470 return BlockType::ListWithFirstEmptyLine;
3471 } else {
3472 return BlockType::List;
3473 }
3474 } else if (indentInList(indents, first, true)) {
3475 return BlockType::SomethingInList;
3476 }
3477 else {
3478 if (!isHeading && !isBlockquote &&
3479 !(fensedCode && first < 4) && !emptyLinePreceded && !inListWithFirstEmptyLine) {
3480 return BlockType::SomethingInList;
3481 }
3482 }
3483 } else {
3484 bool isFirstLineEmpty = false;
3485
3486 const auto orderedList = isOrderedList<Trait>(str.asString(), nullptr, nullptr, nullptr,
3487 &isFirstLineEmpty);
3488 const bool isHLine = first < 4 && isHorizontalLine<Trait>(s.asString());
3489
3490 if (!isHLine &&
3491 (((s.asString().startsWith(Trait::latin1ToString("-")) || s.asString().startsWith(Trait::latin1ToString("+")) ||
3492 s.asString().startsWith(Trait::latin1ToString("*"))) &&
3493 ((s.length() > 1 && s[1] == Trait::latin1ToChar(' ')) || s.length() == 1)) ||
3494 orderedList) && first < 4) {
3495 if (indent && calcIndent) {
3496 indent->m_indent = posOfListItem<Trait>(str.asString(), orderedList);
3497 indent->m_level = (indents ? listLevel(*indents, first) : -1);
3498 }
3499
3500 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3501 return BlockType::ListWithFirstEmptyLine;
3502 } else {
3503 return BlockType::List;
3504 }
3505 }
3506 }
3507
3508 if (str.asString().startsWith(typename Trait::String(4, Trait::latin1ToChar(' ')))) {
3509 return BlockType::CodeIndentedBySpaces;
3510 } else if (isCodeFences<Trait>(str.asString())) {
3511 return BlockType::Code;
3512 } else if (isBlockquote) {
3513 return BlockType::Blockquote;
3514 } else if (isHeading) {
3515 return BlockType::Heading;
3516 }
3517 } else {
3518 return BlockType::EmptyLine;
3519 }
3520
3521 return BlockType::Text;
3522}
3523
3524template<class Trait>
3525inline long long int
3526Parser<Trait>::parseFragment(MdBlock<Trait> &fr,
3527 std::shared_ptr<Block<Trait>> parent,
3528 std::shared_ptr<Document<Trait>> doc,
3529 typename Trait::StringList &linksToParse,
3530 const typename Trait::String &workingPath,
3531 const typename Trait::String &fileName,
3532 bool collectRefLinks,
3533 RawHtmlBlock<Trait> &html)
3534{
3535 if (html.m_continueHtml) {
3536 return parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3537 } else {
3538 if (html.m_html) {
3539 if (!collectRefLinks) {
3540 parent->appendItem(html.m_html);
3541 }
3542
3543 resetHtmlTag(html);
3544 }
3545
3546 switch (whatIsTheLine(fr.m_data.front().first)) {
3547 case BlockType::Footnote:
3548 parseFootnote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3549 break;
3550
3551 case BlockType::Text:
3552 return parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3553 break;
3554
3555 case BlockType::Blockquote:
3556 return parseBlockquote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3557 break;
3558
3559 case BlockType::Code:
3560 return parseCode(fr, parent, collectRefLinks);
3561 break;
3562
3563 case BlockType::CodeIndentedBySpaces: {
3564 int indent = 1;
3565
3566 if (fr.m_data.front().first.asString().startsWith(Trait::latin1ToString(" "))) {
3567 indent = 4;
3568 }
3569
3570 return parseCodeIndentedBySpaces(fr, parent, collectRefLinks, indent, {}, -1, -1, false);
3571 } break;
3572
3573 case BlockType::Heading:
3574 parseHeading(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3575 break;
3576
3577 case BlockType::List:
3578 case BlockType::ListWithFirstEmptyLine:
3579 return parseList(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3580
3581 default:
3582 break;
3583 }
3584 }
3585
3586 return -1;
3587}
3588
3589template<class Trait>
3590inline void
3591Parser<Trait>::clearCache()
3592{
3593 m_parsedFiles.clear();
3594}
3595
3596//! \return Number of columns in table, if the given string is a table header.
3597template<class Trait>
3598inline int
3599isTableHeader(const typename Trait::String &s)
3600{
3601 if (s.contains(Trait::latin1ToChar('|'))) {
3602 int c = 0;
3603
3604 const auto tmp = s.simplified();
3605 const auto p = tmp.startsWith(Trait::latin1ToString("|")) ? 1 : 0;
3606 const auto n = tmp.size() - p - (tmp.endsWith(Trait::latin1ToString("|")) && tmp.size() > 1 ? 1 : 0);
3607 const auto v = tmp.sliced(p, n);
3608
3609 bool backslash = false;
3610
3611 for (long long int i = 0; i < v.size(); ++i) {
3612 bool now = false;
3613
3614 if (v[i] == Trait::latin1ToChar('\\') && !backslash) {
3615 backslash = true;
3616 now = true;
3617 } else if (v[i] == Trait::latin1ToChar('|') && !backslash) {
3618 ++c;
3619 }
3620
3621 if (!now) {
3622 backslash = false;
3623 }
3624 }
3625
3626 ++c;
3627
3628 return c;
3629 } else {
3630 return 0;
3631 }
3632}
3633
3634template<class Trait>
3635inline long long int
3636Parser<Trait>::parseText(MdBlock<Trait> &fr,
3637 std::shared_ptr<Block<Trait>> parent,
3638 std::shared_ptr<Document<Trait>> doc,
3639 typename Trait::StringList &linksToParse,
3640 const typename Trait::String &workingPath,
3641 const typename Trait::String &fileName,
3642 bool collectRefLinks,
3643 RawHtmlBlock<Trait> &html)
3644{
3645 const auto h = isTableHeader<Trait>(fr.m_data.front().first.asString());
3646 const auto c = fr.m_data.size() > 1 ? isTableAlignment<Trait>(fr.m_data[1].first.asString()) : 0;
3647
3648 if (c && h && c == h && !html.m_continueHtml) {
3649 parseTable(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, c);
3650
3651 if (!fr.m_data.empty()) {
3652 return fr.m_data.front().second.m_lineNumber;
3653 } else {
3654 return -1;
3655 }
3656 } else {
3657 return parseParagraph(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3658 }
3659}
3660
3661//! Find and remove heading label.
3662template<class Trait>
3663inline std::pair<typename Trait::String, WithPosition>
3664findAndRemoveHeaderLabel(typename Trait::InternalString &s)
3665{
3666 const auto start = s.asString().indexOf(Trait::latin1ToString("{#"));
3667
3668 if (start >= 0) {
3669 long long int p = start + 2;
3670
3671 for (; p < s.length(); ++p) {
3672 if (s[p] == Trait::latin1ToChar('}')) {
3673 break;
3674 }
3675 }
3676
3677 if (p < s.length() && s[p] == Trait::latin1ToChar('}')) {
3678 WithPosition pos;
3679 pos.setStartColumn(s.virginPos(start));
3680 pos.setEndColumn(s.virginPos(p));
3681
3682 const auto label = s.sliced(start, p - start + 1).asString();
3683 s.remove(start, p - start + 1);
3684 return {label, pos};
3685 }
3686 }
3687
3688 return {};
3689}
3690
3691//! Convert string to label.
3692template<class Trait>
3693inline typename Trait::String
3694stringToLabel(const typename Trait::String &s)
3695{
3696 typename Trait::String res;
3697
3698 for (long long int i = 0; i < s.length(); ++i) {
3699 const auto c = s[i];
3700
3701 if (c.isLetter() || c.isDigit() || c == Trait::latin1ToChar('-') ||
3702 c == Trait::latin1ToChar('_')) {
3703 res.push_back(c);
3704 } else if (c.isSpace()) {
3705 res.push_back(Trait::latin1ToString("-"));
3706 }
3707 }
3708
3709 return res;
3710}
3711
3712//! Convert Paragraph to label.
3713template<class Trait>
3714inline typename Trait::String
3716{
3717 typename Trait::String l;
3718
3719 if (!p) {
3720 return l;
3721 }
3722
3723 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
3724 switch ((*it)->type()) {
3725 case ItemType::Text: {
3726 auto t = static_cast<Text<Trait> *>(it->get());
3727 const auto text = t->text();
3728 l.push_back(stringToLabel<Trait>(text));
3729 } break;
3730
3731 case ItemType::Image: {
3732 auto i = static_cast<Image<Trait> *>(it->get());
3733
3734 if (!i->p()->isEmpty()) {
3735 l.push_back(paragraphToLabel(i->p().get()));
3736 } else if (!i->text().isEmpty()) {
3737 l.push_back(stringToLabel<Trait>(i->text()));
3738 }
3739 } break;
3740
3741 case ItemType::Link: {
3742 auto link = static_cast<Link<Trait> *>(it->get());
3743
3744 if (!link->p()->isEmpty()) {
3745 l.push_back(paragraphToLabel(link->p().get()));
3746 } else if (!link->text().isEmpty()) {
3747 l.push_back(stringToLabel<Trait>(link->text()));
3748 }
3749 } break;
3750
3751 case ItemType::Code: {
3752 auto c = static_cast<Code<Trait> *>(it->get());
3753
3754 if (!c->text().isEmpty()) {
3755 l.push_back(stringToLabel<Trait>(c->text()));
3756 }
3757 } break;
3758
3759 default:
3760 break;
3761 }
3762 }
3763
3764 return l;
3765}
3766
3767//! Find and remove closing sequence of "#" in heading.
3768template<class Trait>
3769inline WithPosition
3770findAndRemoveClosingSequence(typename Trait::InternalString &s)
3771{
3772 long long int end = -1;
3773 long long int start = -1;
3774
3775 for (long long int i = s.length() - 1; i >= 0; --i) {
3776 if (!s[i].isSpace() && s[i] != Trait::latin1ToChar('#') && end == -1) {
3777 return {};
3778 }
3779
3780 if (s[i] == Trait::latin1ToChar('#')) {
3781 if (end == -1) {
3782 end = i;
3783 }
3784
3785 if (i - 1 >= 0) {
3786 if (s[i - 1].isSpace()) {
3787 start = i;
3788 break;
3789 } else if (s[i - 1] != Trait::latin1ToChar('#')) {
3790 return {};
3791 }
3792 } else {
3793 start = 0;
3794 }
3795 }
3796 }
3797
3798 WithPosition ret;
3799
3800 if (start != -1 && end != -1) {
3801 ret.setStartColumn(s.virginPos(start));
3802 ret.setEndColumn(s.virginPos(end));
3803
3804 s.remove(start, end - start + 1);
3805 }
3806
3807 return ret;
3808}
3809
3810template<class Trait>
3811inline void
3812Parser<Trait>::parseHeading(MdBlock<Trait> &fr,
3813 std::shared_ptr<Block<Trait>> parent,
3814 std::shared_ptr<Document<Trait>> doc,
3815 typename Trait::StringList &linksToParse,
3816 const typename Trait::String &workingPath,
3817 const typename Trait::String &fileName,
3818 bool collectRefLinks)
3819{
3820 if (!fr.m_data.empty() && !collectRefLinks) {
3821 auto line = fr.m_data.front().first;
3822
3823 std::shared_ptr<Heading<Trait>> h(new Heading<Trait>);
3824 h->setStartColumn(line.virginPos(skipSpaces<Trait>(0, line.asString())));
3825 h->setStartLine(fr.m_data.front().second.m_lineNumber);
3826 h->setEndColumn(line.virginPos(line.length() - 1));
3827 h->setEndLine(h->startLine());
3828
3829 long long int pos = 0;
3830 pos = skipSpaces<Trait>(pos, line.asString());
3831
3832 if (pos > 0) {
3833 line = line.sliced(pos);
3834 }
3835
3836 pos = 0;
3837 int lvl = 0;
3838
3839 while (pos < line.length() && line[pos] == Trait::latin1ToChar('#')) {
3840 ++lvl;
3841 ++pos;
3842 }
3843
3844 WithPosition startDelim = {h->startColumn(), h->startLine(),
3845 line.virginPos(pos - 1), h->startLine()};
3846
3847 pos = skipSpaces<Trait>(pos, line.asString());
3848
3849 if (pos > 0) {
3850 fr.m_data.front().first = line.sliced(pos);
3851 }
3852
3853 auto label = findAndRemoveHeaderLabel<Trait>(fr.m_data.front().first);
3854
3855 typename Heading<Trait>::Delims delims = {startDelim};
3856
3857 auto endDelim = findAndRemoveClosingSequence<Trait>(fr.m_data.front().first);
3858
3859 if (endDelim.startColumn() != -1) {
3860 endDelim.setStartLine(fr.m_data.front().second.m_lineNumber);
3861 endDelim.setEndLine(endDelim.startLine());
3862
3863 delims.push_back(endDelim);
3864 }
3865
3866 h->setDelims(delims);
3867
3868 h->setLevel(lvl);
3869
3870 if (!label.first.isEmpty()) {
3871 h->setLabel(label.first.sliced(1, label.first.length() - 2) + Trait::latin1ToString("/") +
3872 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString("/") :
3873 Trait::latin1ToString("")) + fileName);
3874
3875 label.second.setStartLine(fr.m_data.front().second.m_lineNumber);
3876 label.second.setEndLine(label.second.startLine());
3877
3878 h->setLabelPos(label.second);
3879 }
3880
3881 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
3882
3883 typename MdBlock<Trait>::Data tmp;
3885 tmp.push_back(fr.m_data.front());
3886 MdBlock<Trait> block = {tmp, 0};
3887
3889
3890 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
3891 false, false, html, false);
3892
3893 fr.m_data.erase(fr.m_data.cbegin());
3894
3895 if (p->items().size() && p->items().at(0)->type() == ItemType::Paragraph) {
3896 h->setText(std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0)));
3897 } else {
3898 h->setText(p);
3899 }
3900
3901 if (h->isLabeled()) {
3902 doc->insertLabeledHeading(h->label(), h);
3903 h->labelVariants().push_back(h->label());
3904 } else {
3905 typename Trait::String label = Trait::latin1ToString("#") +
3906 paragraphToLabel(h->text().get());
3907
3908 const auto path = Trait::latin1ToString("/") +
3909 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString("/") :
3910 Trait::latin1ToString("")) + fileName;
3911
3912 h->setLabel(label + path);
3913 h->labelVariants().push_back(h->label());
3914
3915 doc->insertLabeledHeading(label + path, h);
3916
3917 if (label != label.toLower()) {
3918 doc->insertLabeledHeading(label.toLower() + path, h);
3919 h->labelVariants().push_back(label.toLower() + path);
3920 }
3921 }
3922
3923 parent->appendItem(h);
3924 }
3925}
3926
3927//! Prepare data in table cell for parsing.
3928template<class Trait>
3929inline typename Trait::InternalString
3930prepareTableData(typename Trait::InternalString s)
3931{
3932 s.replace(Trait::latin1ToString("\\|"), Trait::latin1ToString("|"));
3933
3934 return s;
3935}
3936
3937//! Split table's row on cells.
3938template<class Trait>
3939inline std::pair<typename Trait::InternalStringList, std::vector<long long int>>
3940splitTableRow(const typename Trait::InternalString &s)
3941{
3942 typename Trait::InternalStringList res;
3943 std::vector<long long int> columns;
3944
3945 bool backslash = false;
3946 long long int start = 0;
3947
3948 for (long long int i = 0; i < s.length(); ++i) {
3949 bool now = false;
3950
3951 if (s[i] == Trait::latin1ToChar('\\') && !backslash) {
3952 backslash = true;
3953 now = true;
3954 } else if (s[i] == Trait::latin1ToChar('|') && !backslash) {
3955 res.push_back(prepareTableData<Trait>(s.sliced(start, i - start)));
3956 columns.push_back(s.virginPos(i));
3957 start = i + 1;
3958 }
3959
3960 if (!now) {
3961 backslash = false;
3962 }
3963 }
3964
3965 res.push_back(prepareTableData<Trait>(s.sliced(start, s.length() - start)));
3966
3967 return {res, columns};
3968}
3969
3970template<class Trait>
3971inline void
3972Parser<Trait>::parseTable(MdBlock<Trait> &fr,
3973 std::shared_ptr<Block<Trait>> parent,
3974 std::shared_ptr<Document<Trait>> doc,
3975 typename Trait::StringList &linksToParse,
3976 const typename Trait::String &workingPath,
3977 const typename Trait::String &fileName,
3978 bool collectRefLinks,
3979 int columnsCount)
3980{
3981 static const char sep = '|';
3982
3983 if (fr.m_data.size() >= 2) {
3984 std::shared_ptr<Table<Trait>> table(new Table<Trait>);
3985 table->setStartColumn(fr.m_data.front().first.virginPos(0));
3986 table->setStartLine(fr.m_data.front().second.m_lineNumber);
3987 table->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
3988 table->setEndLine(fr.m_data.back().second.m_lineNumber);
3989
3990 auto parseTableRow = [&](const typename MdBlock<Trait>::Line &lineData) -> bool {
3991 const auto &row = lineData.first;
3992
3993 if (row.asString().startsWith(Trait::latin1ToString(" "))) {
3994 return false;
3995 }
3996
3997 auto line = row;
3998 auto p = skipSpaces<Trait>(0, line.asString());
3999
4000 if (p == line.length()) {
4001 return false;
4002 }
4003
4004 if (line[p] == Trait::latin1ToChar(sep)) {
4005 line.remove(0, p + 1);
4006 }
4007
4008 for (p = line.length() - 1; p >= 0; --p) {
4009 if (!line[p].isSpace()) {
4010 break;
4011 }
4012 }
4013
4014 if (p < 0) {
4015 return false;
4016 }
4017
4018 if (line[p] == Trait::latin1ToChar(sep)) {
4019 line.remove(p, line.length() - p);
4020 }
4021
4022 auto columns = splitTableRow<Trait>(line);
4023 columns.second.insert(columns.second.begin(), row.virginPos(0));
4024 columns.second.push_back(row.virginPos(row.length() - 1));
4025
4026 std::shared_ptr<TableRow<Trait>> tr(new TableRow<Trait>);
4027 tr->setStartColumn(row.virginPos(0));
4028 tr->setStartLine(lineData.second.m_lineNumber);
4029 tr->setEndColumn(row.virginPos(row.length() - 1));
4030 tr->setEndLine(lineData.second.m_lineNumber);
4031
4032 int col = 0;
4033
4034 for (auto it = columns.first.begin(), last = columns.first.end(); it != last; ++it, ++col) {
4035 if (col == columnsCount) {
4036 break;
4037 }
4038
4039 std::shared_ptr<TableCell<Trait>> c(new TableCell<Trait>);
4040 c->setStartColumn(columns.second.at(col));
4041 c->setStartLine(lineData.second.m_lineNumber);
4042 c->setEndColumn(columns.second.at(col + 1));
4043 c->setEndLine(lineData.second.m_lineNumber);
4044
4045 if (!it->isEmpty()) {
4046 it->replace(Trait::latin1ToString("&#124;"), Trait::latin1ToChar(sep));
4047
4048 typename MdBlock<Trait>::Data fragment;
4049 fragment.push_back({*it, lineData.second});
4050 MdBlock<Trait> block = {fragment, 0};
4051
4052 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
4053
4055
4056 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
4057 collectRefLinks, false, html, false);
4058
4059 if (!p->isEmpty()) {
4060 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it ) {
4061 switch ((*it)->type()) {
4062 case ItemType::Paragraph: {
4063 const auto pp = std::static_pointer_cast<Paragraph<Trait>>(*it);
4064
4065 for (auto it = pp->items().cbegin(), last = pp->items().cend(); it != last; ++it) {
4066 c->appendItem((*it));
4067 }
4068 }
4069 break;
4070
4071 default:
4072 c->appendItem((*it));
4073 break;
4074 }
4075 }
4076 }
4077
4078 if (html.m_html.get()) {
4079 c->appendItem(html.m_html);
4080 }
4081 }
4082
4083 tr->appendCell(c);
4084 }
4085
4086 if (!tr->isEmpty())
4087 table->appendRow(tr);
4088
4089 return true;
4090 };
4091
4092 {
4093 auto fmt = fr.m_data.at(1).first;
4094
4095 auto columns = fmt.split(typename Trait::InternalString(Trait::latin1ToChar(sep)));
4096
4097 for (auto it = columns.begin(), last = columns.end(); it != last; ++it) {
4098 *it = it->simplified();
4099
4100 if (!it->isEmpty()) {
4102
4103 if (it->asString().endsWith(Trait::latin1ToString(":")) &&
4104 it->asString().startsWith(Trait::latin1ToString(":"))) {
4106 } else if (it->asString().endsWith(Trait::latin1ToString(":"))) {
4108 }
4109
4110 table->setColumnAlignment(table->columnsCount(), a);
4111 }
4112 }
4113 }
4114
4115 fr.m_data.erase(fr.m_data.cbegin() + 1);
4116
4117 long long int r = 0;
4118
4119 for (const auto &line : std::as_const(fr.m_data)) {
4120 if (!parseTableRow(line)) {
4121 break;
4122 }
4123
4124 ++r;
4125 }
4126
4127 fr.m_data.erase(fr.m_data.cbegin(), fr.m_data.cbegin() + r);
4128
4129 if (!table->isEmpty() && !collectRefLinks) {
4130 parent->appendItem(table);
4131 }
4132 }
4133}
4134
4135//! \return Is the given string a heading's service sequence?
4136template<class Trait>
4137inline bool
4138isH(const typename Trait::String &s,
4139 const typename Trait::Char &c)
4140{
4141 long long int p = skipSpaces<Trait>(0, s);
4142
4143 if (p > 3) {
4144 return false;
4145 }
4146
4147 const auto start = p;
4148
4149 for (; p < s.size(); ++p) {
4150 if (s[p] != c) {
4151 break;
4152 }
4153 }
4154
4155 if (p - start < 1) {
4156 return false;
4157 }
4158
4159 for (; p < s.size(); ++p) {
4160 if (!s[p].isSpace()) {
4161 return false;
4162 }
4163 }
4164
4165 return true;
4166}
4167
4168//! \return Is the given string a heading's service sequence of level 1?
4169template<class Trait>
4170inline bool
4171isH1(const typename Trait::String &s)
4172{
4173 return isH<Trait>(s, Trait::latin1ToChar('='));
4174}
4175
4176//! \return Is the given string a heading's service sequence of level 2?
4177template<class Trait>
4178inline bool
4179isH2(const typename Trait::String &s)
4180{
4181 return isH<Trait>(s, Trait::latin1ToChar('-'));
4182}
4183
4184//! \return Previous position in the block.
4185template<class Trait>
4186inline std::pair<long long int, long long int>
4188 long long int pos,
4189 long long int line)
4190{
4191 if (pos > 0) {
4192 return {pos - 1, line};
4193 }
4194
4195 for (long long int i = 0; i < static_cast<long long int>(fr.m_data.size()); ++i) {
4196 if (fr.m_data.at(i).second.m_lineNumber == line) {
4197 if (i > 0) {
4198 return {fr.m_data.at(i - 1).first.virginPos(fr.m_data.at(i - 1).first.length() - 1),
4199 line - 1};
4200 }
4201 }
4202 }
4203
4204 return {pos, line};
4205}
4206
4207//! \return Next position in the block.
4208template<class Trait>
4209inline std::pair<long long int, long long int>
4211 long long int pos,
4212 long long int line)
4213{
4214 for (long long int i = 0; i < static_cast<long long int>(fr.m_data.size()); ++i) {
4215 if (fr.m_data.at(i).second.m_lineNumber == line) {
4216 if (fr.m_data.at(i).first.virginPos(fr.m_data.at(i).first.length() - 1) >= pos + 1) {
4217 return {pos + 1, line};
4218 } else if (i + 1 < static_cast<long long int>(fr.m_data.size())) {
4219 return {fr.m_data.at(i + 1).first.virginPos(0), fr.m_data.at(i + 1).second.m_lineNumber};
4220 } else {
4221 return {pos, line};
4222 }
4223 }
4224 }
4225
4226 return {pos, line};
4227}
4228
4229template<class Trait>
4230inline long long int
4231Parser<Trait>::parseParagraph(MdBlock<Trait> &fr,
4232 std::shared_ptr<Block<Trait>> parent,
4233 std::shared_ptr<Document<Trait>> doc,
4234 typename Trait::StringList &linksToParse,
4235 const typename Trait::String &workingPath,
4236 const typename Trait::String &fileName,
4237 bool collectRefLinks,
4238 RawHtmlBlock<Trait> &html)
4239{
4240 return parseFormattedTextLinksImages(fr, parent, doc, linksToParse, workingPath, fileName,
4241 collectRefLinks, false, html, false);
4242}
4243
4244template<class Trait>
4246 static bool
4247 isFreeTag(std::shared_ptr<RawHtml<Trait>> html)
4248 {
4249 return html->isFreeTag();
4250 }
4251
4252 static void
4253 setFreeTag(std::shared_ptr<RawHtml<Trait>> html, bool on)
4254 {
4255 html->setFreeTag(on);
4256 }
4257};
4258
4259template<class Trait>
4260inline typename Parser<Trait>::Delims
4262{
4263 Delims d;
4264
4265 for (long long int line = 0; line < (long long int)fr.size(); ++line) {
4266 const typename Trait::String &str = fr.at(line).first.asString();
4267 const auto p = skipSpaces<Trait>(0, str);
4268 const auto withoutSpaces = str.sliced(p);
4269
4270 if (isHorizontalLine<Trait>(withoutSpaces) && p < 4) {
4271 d.push_back({Delimiter::HorizontalLine, line, 0, str.length(), false, false, false});
4272 } else if (isH1<Trait>(withoutSpaces) && p < 4) {
4273 d.push_back({Delimiter::H1, line, 0, str.length(), false, false, false});
4274 } else if (isH2<Trait>(withoutSpaces) && p < 4) {
4275 d.push_back({Delimiter::H2, line, 0, str.length(), false, false, false});
4276 } else {
4277 bool backslash = false;
4278 bool word = false;
4279
4280 for (long long int i = p; i < str.size(); ++i) {
4281 bool now = false;
4282
4283 if (str[i] == Trait::latin1ToChar('\\') && !backslash) {
4284 backslash = true;
4285 now = true;
4286 }
4287 // * or _
4288 else if ((str[i] == Trait::latin1ToChar('_') || str[i] == Trait::latin1ToChar('*')) && !backslash) {
4289 typename Trait::String style;
4290
4291 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() : true);
4292 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4293 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4294 const bool alNumBefore = (i > 0 ? str[i - 1].isLetterOrNumber() : false);
4295
4296 const auto ch = str[i];
4297
4298 while (i < str.length() && str[i] == ch) {
4299 style.push_back(str[i]);
4300 ++i;
4301 }
4302
4303 typename Delimiter::DelimiterType dt = Delimiter::Unknown;
4304
4305 if (ch == Trait::latin1ToChar('*')) {
4306 dt = Delimiter::Emphasis1;
4307 } else {
4308 dt = Delimiter::Emphasis2;
4309 }
4310
4311 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() : true);
4312 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) : true);
4313 const bool alNumAfter = (i < str.length() ? str[i].isLetterOrNumber() : false);
4314 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore))
4315 && !(ch == Trait::latin1ToChar('_') && alNumBefore && alNumAfter);
4316 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)))
4317 && !(ch == Trait::latin1ToChar('_') && alNumBefore && alNumAfter);
4318
4319 if (leftFlanking || rightFlanking) {
4320 for (auto j = 0; j < style.length(); ++j) {
4321 d.push_back({dt, line, i - style.length() + j, 1,
4322 word, false, leftFlanking, rightFlanking});
4323 }
4324
4325 word = false;
4326 } else {
4327 word = true;
4328 }
4329
4330 --i;
4331 }
4332 // ~
4333 else if (str[i] == Trait::latin1ToChar('~') && !backslash) {
4334 typename Trait::String style;
4335
4336 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() : true);
4337 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4338 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4339
4340 while (i < str.length() && str[i] == Trait::latin1ToChar('~')) {
4341 style.push_back(str[i]);
4342 ++i;
4343 }
4344
4345 if (style.length() <= 2) {
4346 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() : true);
4347 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) : true);
4348 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore));
4349 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)));
4350
4351 if (leftFlanking || rightFlanking) {
4352 d.push_back({Delimiter::Strikethrough,
4353 line,
4354 i - style.length(),
4355 style.length(),
4356 word,
4357 false,
4358 leftFlanking,
4359 rightFlanking});
4360
4361 word = false;
4362 } else {
4363 word = true;
4364 }
4365 } else {
4366 word = true;
4367 }
4368
4369 --i;
4370 }
4371 // [
4372 else if (str[i] == Trait::latin1ToChar('[') && !backslash) {
4373 d.push_back({Delimiter::SquareBracketsOpen, line, i, 1, word, false});
4374
4375 word = false;
4376 }
4377 // !
4378 else if (str[i] == Trait::latin1ToChar('!') && !backslash) {
4379 if (i + 1 < str.length()) {
4380 if (str[i + 1] == Trait::latin1ToChar('[')) {
4381 d.push_back({Delimiter::ImageOpen, line, i, 2, word, false});
4382
4383 ++i;
4384
4385 word = false;
4386 } else {
4387 word = true;
4388 }
4389 } else {
4390 word = true;
4391 }
4392 }
4393 // (
4394 else if (str[i] == Trait::latin1ToChar('(') && !backslash) {
4395 d.push_back({Delimiter::ParenthesesOpen, line, i, 1, word, false});
4396
4397 word = false;
4398 }
4399 // ]
4400 else if (str[i] == Trait::latin1ToChar(']') && !backslash) {
4401 d.push_back({Delimiter::SquareBracketsClose, line, i, 1, word, false});
4402
4403 word = false;
4404 }
4405 // )
4406 else if (str[i] == Trait::latin1ToChar(')') && !backslash) {
4407 d.push_back({Delimiter::ParenthesesClose, line, i, 1, word, false});
4408
4409 word = false;
4410 }
4411 // <
4412 else if (str[i] == Trait::latin1ToChar('<') && !backslash) {
4413 d.push_back({Delimiter::Less, line, i, 1, word, false});
4414
4415 word = false;
4416 }
4417 // >
4418 else if (str[i] == Trait::latin1ToChar('>') && !backslash) {
4419 d.push_back({Delimiter::Greater, line, i, 1, word, false});
4420
4421 word = false;
4422 }
4423 // `
4424 else if (str[i] == Trait::latin1ToChar('`')) {
4425 typename Trait::String code;
4426
4427 while (i < str.length() && str[i] == Trait::latin1ToChar('`')) {
4428 code.push_back(str[i]);
4429 ++i;
4430 }
4431
4432 d.push_back({Delimiter::InlineCode,
4433 line,
4434 i - code.length() - (backslash ? 1 : 0),
4435 code.length() + (backslash ? 1 : 0),
4436 word,
4437 backslash});
4438
4439 word = false;
4440
4441 --i;
4442 }
4443 // $
4444 else if (str[i] == Trait::latin1ToChar('$')) {
4445 typename Trait::String m;
4446
4447 while (i < str.length() && str[i] == Trait::latin1ToChar('$')) {
4448 m.push_back(str[i]);
4449 ++i;
4450 }
4451
4452 if (m.length() <= 2 && !backslash) {
4453 d.push_back({Delimiter::Math, line, i - m.length(), m.length(),
4454 false, false, false, false});
4455 }
4456
4457 word = false;
4458
4459 --i;
4460 } else {
4461 word = true;
4462 }
4463
4464 if (!now) {
4465 backslash = false;
4466 }
4467 }
4468 }
4469 }
4470
4471 return d;
4472}
4473
4474//! \return Is the given string a line break.
4475template<class Trait>
4476inline bool
4477isLineBreak(const typename Trait::String &s)
4478{
4479 long long int count = 0, pos = s.length() - 1, end = s.length() - 1;
4480
4481 while ((pos = Trait::lastIndexOf(s, Trait::latin1ToString("\\"), pos)) != -1 && pos == end) {
4482 --end;
4483 --pos;
4484 ++count;
4485 }
4486
4487 return (s.endsWith(Trait::latin1ToString(" ")) || (count % 2 != 0));
4488}
4489
4490//! \return Length of line break.
4491template<class Trait>
4492inline long long int
4493lineBreakLength(const typename Trait::String &s)
4494{
4495 return (s.endsWith(Trait::latin1ToString(" ")) ? 2 : 1);
4496}
4497
4498//! Remove line break from the end of string.
4499template<class Trait>
4500inline typename Trait::String
4501removeLineBreak(const typename Trait::String &s)
4502{
4503 if (s.endsWith(Trait::latin1ToString("\\"))) {
4504 return s.sliced(0, s.size() - 1);
4505 } else {
4506 return s;
4507 }
4508}
4509
4510//! Initialize item with style information and set it as last item.
4511template<class Trait>
4512inline void
4514 std::shared_ptr<ItemWithOpts<Trait>> item)
4515{
4516 item->openStyles() = po.m_openStyles;
4517 po.m_openStyles.clear();
4518 po.m_lastItemWithStyle = item;
4519}
4520
4521//! Make text item.
4522template<class Trait>
4523inline void
4524makeTextObject(const typename Trait::String &text,
4526 long long int startPos,
4527 long long int startLine,
4528 long long int endPos,
4529 long long int endLine,
4530 bool doRemoveSpacesAtEnd = false)
4531{
4532 if (endPos < 0 && endLine - 1 >= 0) {
4533 endPos = po.m_fr.m_data.at(endLine - 1).first.length() - 1;
4534 --endLine;
4535 }
4536
4537 if (endPos == po.m_fr.m_data.at(endLine).first.length() - 1) {
4538 doRemoveSpacesAtEnd = true;
4539 }
4540
4542
4543 if (doRemoveSpacesAtEnd) {
4545 }
4546
4547 if (startPos == 0) {
4548 if (s.length()) {
4549 const auto p = skipSpaces<Trait>(0, s);
4550
4551 if (p > 0) {
4552 s.remove(0, p);
4553 }
4554 }
4555 }
4556
4557 if (!s.isEmpty()) {
4558 std::shared_ptr<Text<Trait>> t;
4559
4560 if (!po.m_collectRefLinks) {
4561 po.m_rawTextData.push_back({text, startPos, startLine});
4562
4563 t.reset(new Text<Trait>);
4564 t->setText(s);
4565 t->setOpts(po.m_opts);
4566 t->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
4567 t->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
4568 t->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(endPos, true));
4569 t->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4570
4572
4573 po.m_parent->setEndColumn(t->endColumn());
4574 po.m_parent->setEndLine(t->endLine());
4575 }
4576
4577 po.m_wasRefLink = false;
4578 po.m_firstInParagraph = false;
4579 po.m_headingAllowed = true;
4580
4581 if (!po.m_collectRefLinks) {
4582 po.m_parent->appendItem(t);
4583
4584 po.m_lastText = t;
4585 }
4586 } else {
4587 po.m_pos = startPos;
4588 }
4589}
4590
4591//! Make text item with line break.
4592template<class Trait>
4593inline void
4594makeTextObjectWithLineBreak(const typename Trait::String &text,
4596 long long int startPos,
4597 long long int startLine,
4598 long long int endPos,
4599 long long int endLine)
4600{
4601 makeTextObject(text, po, startPos, startLine, endPos, endLine, true);
4602
4603 std::shared_ptr<LineBreak<Trait>> hr;
4604
4605 if (!po.m_collectRefLinks) {
4606 hr.reset(new LineBreak<Trait>);
4607 hr->setText(po.m_fr.m_data.at(endLine).first.asString().sliced(endPos + 1));
4608 hr->setStartColumn(po.m_fr.m_data.at(endLine).first.virginPos(endPos + 1));
4609 hr->setStartLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4610 hr->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(po.m_fr.m_data.at(endLine).first.length() - 1));
4611 hr->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4612 po.m_parent->setEndColumn(hr->endColumn());
4613 po.m_parent->setEndLine(hr->endLine());
4614 }
4615
4616 po.m_wasRefLink = false;
4617 po.m_firstInParagraph = false;
4618
4619 if (!po.m_collectRefLinks) {
4620 po.m_parent->appendItem(hr);
4621 }
4622}
4623
4624//! Check for table in paragraph.
4625template<class Trait>
4626inline void
4628 long long int lastLine)
4629{
4630 if (!po.m_opts) {
4631 long long int i = po.m_pos > 0 ? po.m_line + 1 : po.m_line;
4632
4633 for (; i <= lastLine; ++i) {
4634 const auto h = isTableHeader<Trait>(po.m_fr.m_data[i].first.asString());
4635 const auto c = i + 1 < static_cast<long long int>(po.m_fr.m_data.size()) ?
4636 isTableAlignment<Trait>(po.m_fr.m_data[i + 1].first.asString()) : 0;
4637
4638 if (h && c && c == h) {
4640 po.m_startTableLine = i;
4641 po.m_columnsCount = c;
4642 po.m_lastTextLine = i - 1;
4643 po.m_lastTextPos = po.m_fr.m_data[po.m_lastTextLine].first.length();
4644
4645 return;
4646 }
4647 }
4648 }
4649
4650 po.m_lastTextLine = po.m_fr.m_data.size() - 1;
4651 po.m_lastTextPos = po.m_fr.m_data.back().first.length();
4652}
4653
4654//! Make text item.
4655template<class Trait>
4656inline void
4658 // Inclusive. Don't pass lastLine > actual line position with 0 lastPos. Pass as is,
4659 // i.e. if line length is 18 and you need whole line then pass lastLine = index of line,
4660 // and lastPos = 18, or you may crash here if you will pass lastLine = index of line + 1
4661 // and lastPos = 0...
4662 long long int lastLine,
4663 // Not inclusive
4664 long long int lastPos,
4666{
4667 if (po.m_line > lastLine) {
4668 return;
4669 } else if (po.m_line == lastLine && po.m_pos >= lastPos) {
4670 return;
4671 }
4672
4673 typename Trait::String text;
4674
4675 const auto isLastChar = po.m_pos >= po.m_fr.m_data.at(po.m_line).first.length();
4676 long long int startPos = (isLastChar ? 0 : po.m_pos);
4677 long long int startLine = (isLastChar ? po.m_line + 1 : po.m_line);
4678
4679 bool lineBreak =
4680 (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4681 (po.m_line == lastLine ? (lastPos == po.m_fr.m_data.at(po.m_line).first.length() &&
4682 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString())) :
4683 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString())));
4684
4685 // makeTOWLB
4686 auto makeTOWLB = [&]() {
4687 if (po.m_line != (long long int)(po.m_fr.m_data.size() - 1)) {
4688 const auto &line = po.m_fr.m_data.at(po.m_line).first.asString();
4689
4690 makeTextObjectWithLineBreak(text, po, startPos, startLine,
4691 line.length() - lineBreakLength<Trait>(line) - 1, po.m_line);
4692
4693 startPos = 0;
4694 startLine = po.m_line + 1;
4695
4696 text.clear();
4697 }
4698 }; // makeTOWLB
4699
4700 if (lineBreak) {
4701 text.push_back(removeLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString(po.m_pos)));
4702
4703 makeTOWLB();
4704 } else {
4705 const auto length = (po.m_line == lastLine ?
4706 lastPos - po.m_pos : po.m_fr.m_data.at(po.m_line).first.length() - po.m_pos);
4707 const auto s = po.m_fr.m_data.at(po.m_line).first.virginSubString(po.m_pos, length);
4708 text.push_back(s);
4709
4710 po.m_pos = (po.m_line == lastLine ? lastPos : po.m_fr.m_data.at(po.m_line).first.length());
4711
4712 makeTextObject(text,
4713 po,
4714 startPos,
4715 startLine,
4716 po.m_line == lastLine ? lastPos - 1 : po.m_fr.m_data.at(po.m_line).first.length() - 1,
4717 po.m_line);
4718
4719 text.clear();
4720 }
4721
4722 if (po.m_line != lastLine) {
4723 ++po.m_line;
4724
4725 for (; po.m_line < lastLine; ++po.m_line) {
4726 startPos = 0;
4727 startLine = po.m_line;
4728
4729 lineBreak = (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4730 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.asString()));
4731
4732 const auto s = (lineBreak ? removeLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString()) :
4733 po.m_fr.m_data.at(po.m_line).first.virginSubString());
4734 text.push_back(s);
4735
4736 if (lineBreak) {
4737 makeTOWLB();
4738 } else {
4739 makeTextObject(text, po, 0, po.m_line,
4740 po.m_fr.m_data.at(po.m_line).first.length() - 1, po.m_line);
4741 }
4742
4743 text.clear();
4744 }
4745
4746 lineBreak = (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4747 lastPos == po.m_fr.m_data.at(po.m_line).first.length() &&
4748 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.asString()));
4749
4750 auto s = po.m_fr.m_data.at(po.m_line).first.virginSubString(0, lastPos);
4751
4752 po.m_pos = lastPos;
4753
4754 if (!lineBreak) {
4755 text.push_back(s);
4756
4757 makeTextObject(text, po, 0, lastLine, lastPos - 1, lastLine);
4758 } else {
4760 text.push_back(s);
4761
4762 makeTOWLB();
4763 }
4764 }
4765}
4766
4767//! Skip spaces.
4768template<class Trait>
4769inline void
4770skipSpacesInHtml(long long int &l,
4771 long long int &p,
4772 const typename MdBlock<Trait>::Data &fr)
4773{
4774 while (l < (long long int)fr.size()) {
4775 p = skipSpaces<Trait>(p, fr[l].first.asString());
4776
4777 if (p < fr[l].first.length()) {
4778 return;
4779 }
4780
4781 p = 0;
4782 ++l;
4783 }
4784}
4785
4786//! Read HTML attribute value.
4787template<class Trait>
4788inline std::pair<bool, bool>
4790 long long int &p,
4791 const typename MdBlock<Trait>::Data &fr)
4792{
4793 static const typename Trait::String notAllowed = Trait::latin1ToString("\"`=<'");
4794
4795 const auto start = p;
4796
4797 for (; p < fr[l].first.length(); ++p) {
4798 if (fr[l].first[p].isSpace()) {
4799 break;
4800 } else if (notAllowed.contains(fr[l].first[p])) {
4801 return {false, false};
4802 } else if (fr[l].first[p] == Trait::latin1ToChar('>')) {
4803 return {p - start > 0, p - start > 0};
4804 }
4805 }
4806
4807 return {p - start > 0, p - start > 0};
4808}
4809
4810//! Read HTML attribute value.
4811template<class Trait>
4812inline std::pair<bool, bool>
4813readHtmlAttrValue(long long int &l,
4814 long long int &p,
4815 const typename MdBlock<Trait>::Data &fr)
4816{
4817 if (p < fr[l].first.length() && fr[l].first[p] != Trait::latin1ToChar('"') &&
4818 fr[l].first[p] != Trait::latin1ToChar('\'')) {
4819 return readUnquotedHtmlAttrValue<Trait>(l, p, fr);
4820 }
4821
4822 const auto s = fr[l].first[p];
4823
4824 ++p;
4825
4826 if (p >= fr[l].first.length()) {
4827 return {false, false};
4828 }
4829
4830 for (; l < (long long int)fr.size(); ++l) {
4831 bool doBreak = false;
4832
4833 for (; p < fr[l].first.length(); ++p) {
4834 const auto ch = fr[l].first[p];
4835
4836 if (ch == s) {
4837 doBreak = true;
4838
4839 break;
4840 }
4841 }
4842
4843 if (doBreak) {
4844 break;
4845 }
4846
4847 p = 0;
4848 }
4849
4850 if (l >= (long long int)fr.size()) {
4851 return {false, false};
4852 }
4853
4854 if (p >= fr[l].first.length()) {
4855 return {false, false};
4856 }
4857
4858 if (fr[l].first[p] != s) {
4859 return {false, false};
4860 }
4861
4862 ++p;
4863
4864 return {true, true};
4865}
4866
4867//! Read HTML attribute.
4868template<class Trait>
4869inline std::pair<bool, bool>
4870readHtmlAttr(long long int &l,
4871 long long int &p,
4872 const typename MdBlock<Trait>::Data &fr,
4873 bool checkForSpace)
4874{
4875 long long int tl = l, tp = p;
4876
4877 skipSpacesInHtml<Trait>(l, p, fr);
4878
4879 if (l >= (long long int)fr.size()) {
4880 return {false, false};
4881 }
4882
4883 // /
4884 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('/')) {
4885 return {false, true};
4886 }
4887
4888 // >
4889 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('>')) {
4890 return {false, true};
4891 }
4892
4893 if (checkForSpace) {
4894 if (tl == l && tp == p) {
4895 return {false, false};
4896 }
4897 }
4898
4899 const auto start = p;
4900
4901 for (; p < fr[l].first.length(); ++p) {
4902 const auto ch = fr[l].first[p];
4903
4904 if (ch.isSpace() || ch == Trait::latin1ToChar('>') || ch == Trait::latin1ToChar('=')) {
4905 break;
4906 }
4907 }
4908
4909 const typename Trait::String name = fr[l].first.asString().sliced(start, p - start).toLower();
4910
4911 if (!name.startsWith(Trait::latin1ToString("_")) && !name.startsWith(Trait::latin1ToString(":")) &&
4912 !name.isEmpty() && !(name[0].unicode() >= 97 && name[0].unicode() <= 122)) {
4913 return {false, false};
4914 }
4915
4916 static const typename Trait::String allowedInName =
4917 Trait::latin1ToString("abcdefghijklmnopqrstuvwxyz0123456789_.:-");
4918
4919 for (long long int i = 1; i < name.length(); ++i) {
4920 if (!allowedInName.contains(name[i])) {
4921 return {false, false};
4922 }
4923 }
4924
4925 // >
4926 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('>')) {
4927 return {false, true};
4928 }
4929
4930 tl = l;
4931 tp = p;
4932
4933 skipSpacesInHtml<Trait>(l, p, fr);
4934
4935 if (l >= (long long int)fr.size()) {
4936 return {false, false};
4937 }
4938
4939 // =
4940 if (p < fr[l].first.length()) {
4941 if (fr[l].first[p] != Trait::latin1ToChar('=')) {
4942 l = tl;
4943 p = tp;
4944
4945 return {true, true};
4946 } else {
4947 ++p;
4948 }
4949 } else {
4950 return {true, false};
4951 }
4952
4953 skipSpacesInHtml<Trait>(l, p, fr);
4954
4955 if (l >= (long long int)fr.size()) {
4956 return {false, false};
4957 }
4958
4959 return readHtmlAttrValue<Trait>(l, p, fr);
4960}
4961
4962//! \return Is HTML tag at the given position?
4963template<class Trait>
4964inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
4965isHtmlTag(long long int line, long long int pos, TextParsingOpts<Trait> &po, int rule);
4966
4967//! \return Is after the given position only HTML tags?
4968template<class Trait>
4969inline bool
4971 long long int pos,
4973 int rule)
4974{
4975 static const std::set<typename Trait::String> s_rule1Finish = {Trait::latin1ToString("/pre"),
4976 Trait::latin1ToString("/script"),
4977 Trait::latin1ToString("/style"),
4978 Trait::latin1ToString("/textarea")};
4979
4980 auto p = skipSpaces<Trait>(pos, po.m_fr.m_data[line].first.asString());
4981
4982 while (p < po.m_fr.m_data[line].first.length()) {
4983 bool ok = false;
4984
4985 long long int l;
4986 typename Trait::String tag;
4987
4988 std::tie(ok, l, p, std::ignore, tag) = isHtmlTag(line, p, po, rule);
4989
4990 ++p;
4991
4992 if (rule != 1) {
4993 if (!ok) {
4994 return false;
4995 }
4996
4997 if (l > line) {
4998 return true;
4999 }
5000 } else {
5001 if (s_rule1Finish.find(tag.toLower()) != s_rule1Finish.cend() && l == line) {
5002 return true;
5003 }
5004
5005 if (l > line) {
5006 return false;
5007 }
5008 }
5009
5010 p = skipSpaces<Trait>(p, po.m_fr.m_data[line].first.asString());
5011 }
5012
5013 if (p >= po.m_fr.m_data[line].first.length()) {
5014 return true;
5015 }
5016
5017 return false;
5018}
5019
5020//! \return Is setext heading in the lines?
5021template<class Trait>
5022inline bool
5024 long long int startLine,
5025 long long int endLine)
5026{
5027 for (; startLine <= endLine; ++startLine) {
5028 const auto pos = skipSpaces<Trait>(0, po.m_fr.m_data.at(startLine).first.asString());
5029 const auto line = po.m_fr.m_data.at(startLine).first.asString().sliced(pos);
5030
5031 if ((isH1<Trait>(line) || isH2<Trait>(line)) && pos < 4) {
5032 return true;
5033 }
5034 }
5035
5036 return false;
5037}
5038
5039//! \return Is HTML tag at the given position?
5040template<class Trait>
5041inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
5042isHtmlTag(long long int line,
5043 long long int pos,
5045 int rule)
5046{
5047 if (po.m_fr.m_data[line].first[pos] != Trait::latin1ToChar('<')) {
5048 return {false, line, pos, false, {}};
5049 }
5050
5051 typename Trait::String tag;
5052
5053 long long int l = line;
5054 long long int p = pos + 1;
5055 bool first = false;
5056
5057 {
5058 const auto tmp = skipSpaces<Trait>(0, po.m_fr.m_data[l].first.asString());
5059 first = (tmp == pos);
5060 }
5061
5062 if (p >= po.m_fr.m_data[l].first.length()) {
5063 return {false, line, pos, first, tag};
5064 }
5065
5066 bool closing = false;
5067
5068 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
5069 closing = true;
5070
5071 tag.push_back(Trait::latin1ToChar('/'));
5072
5073 ++p;
5074 }
5075
5076 const auto start = p;
5077
5078 // tag
5079 for (; p < po.m_fr.m_data[l].first.length(); ++p) {
5080 const auto ch = po.m_fr.m_data[l].first[p];
5081
5082 if (ch.isSpace() || ch == Trait::latin1ToChar('>') || ch == Trait::latin1ToChar('/')) {
5083 break;
5084 }
5085 }
5086
5087 tag.push_back(po.m_fr.m_data[l].first.asString().sliced(start, p - start));
5088
5089 if (p < po.m_fr.m_data[l].first.length() && po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
5090 if (p + 1 < po.m_fr.m_data[l].first.length() &&
5091 po.m_fr.m_data[l].first[p + 1] == Trait::latin1ToChar('>')) {
5092 long long int tmp = 0;
5093
5094 if (rule == 7) {
5095 tmp = skipSpaces<Trait>(p + 2, po.m_fr.m_data[l].first.asString());
5096 }
5097
5098 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5099 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 2, po, rule == 1)));
5100
5101 if (!isSetextHeadingBetween(po, line, l)) {
5102 return {true, l, p + 1, onLine, tag};
5103 } else {
5104 return {false, line, pos, first, tag};
5105 }
5106 } else {
5107 return {false, line, pos, first, tag};
5108 }
5109 }
5110
5111 if (p < po.m_fr.m_data[l].first.length() && po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
5112 long long int tmp = 0;
5113
5114 if (rule == 7) {
5115 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
5116 }
5117
5118 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5119 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
5120
5121 if (!isSetextHeadingBetween(po, line, l)) {
5122 return {true, l, p, onLine, tag};
5123 } else {
5124 return {false, line, pos, first, tag};
5125 }
5126 }
5127
5128 skipSpacesInHtml<Trait>(l, p, po.m_fr.m_data);
5129
5130 if (l >= (long long int)po.m_fr.m_data.size()) {
5131 return {false, line, pos, first, tag};
5132 }
5133
5134 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
5135 long long int tmp = 0;
5136
5137 if (rule == 7) {
5138 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
5139 }
5140
5141 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5142 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
5143
5144 if (!isSetextHeadingBetween(po, line, l)) {
5145 return {true, l, p, onLine, tag};
5146 } else {
5147 return {false, line, pos, first, tag};
5148 }
5149 }
5150
5151 bool attr = true;
5152 bool firstAttr = true;
5153
5154 while (attr) {
5155 bool ok = false;
5156
5157 std::tie(attr, ok) = readHtmlAttr<Trait>(l, p, po.m_fr.m_data, !firstAttr);
5158
5159 firstAttr = false;
5160
5161 if (closing && attr) {
5162 return {false, line, pos, first, tag};
5163 }
5164
5165 if (!ok) {
5166 return {false, line, pos, first, tag};
5167 }
5168 }
5169
5170 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
5171 ++p;
5172 } else {
5173 skipSpacesInHtml<Trait>(l, p, po.m_fr.m_data);
5174
5175 if (l >= (long long int)po.m_fr.m_data.size()) {
5176 return {false, line, pos, first, tag};
5177 }
5178 }
5179
5180 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
5181 long long int tmp = 0;
5182
5183 if (rule == 7) {
5184 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
5185 }
5186
5187 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5188 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
5189
5190 if (!isSetextHeadingBetween(po, line, l)) {
5191 return {true, l, p, onLine, tag};
5192 } else {
5193 return {false, line, pos, first, tag};
5194 }
5195 }
5196
5197 return {false, line, pos, first, {}};
5198}
5199
5200//! Read HTML tag.
5201template<class Trait>
5202inline std::pair<typename Trait::String, bool>
5203Parser<Trait>::readHtmlTag(typename Delims::iterator it,
5204 TextParsingOpts<Trait> &po)
5205{
5206 long long int i = it->m_pos + 1;
5207 const auto start = i;
5208
5209 if (start >= po.m_fr.m_data[it->m_line].first.length()) {
5210 return {{}, false};
5211 }
5212
5213 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5214 const auto ch = po.m_fr.m_data[it->m_line].first[i];
5215
5216 if (ch.isSpace() || ch == Trait::latin1ToChar('>')) {
5217 break;
5218 }
5219 }
5220
5221 return {po.m_fr.m_data[it->m_line].first.asString().sliced(start, i - start),
5222 i < po.m_fr.m_data[it->m_line].first.length() ?
5223 po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('>') : false};
5224}
5225
5226template<class Trait>
5227inline typename Parser<Trait>::Delims::iterator
5228Parser<Trait>::findIt(typename Delims::iterator it,
5229 typename Delims::iterator last,
5230 TextParsingOpts<Trait> &po)
5231{
5232 auto ret = it;
5233
5234 for (; it != last; ++it) {
5235 if ((it->m_line == po.m_line && it->m_pos < po.m_pos) || it->m_line < po.m_line) {
5236 ret = it;
5237 } else {
5238 break;
5239 }
5240 }
5241
5242 return ret;
5243}
5244
5245//! Read HTML data.
5246template<class Trait>
5247inline void
5248eatRawHtml(long long int line,
5249 long long int pos,
5250 long long int toLine,
5251 long long int toPos,
5253 bool finish,
5254 int htmlRule,
5255 bool onLine,
5256 bool continueEating = false)
5257{
5258 MD_UNUSED(htmlRule)
5259
5260 if (line <= toLine) {
5261 typename Trait::String h = po.m_html.m_html->text();
5262
5263 if (!h.isEmpty() && !continueEating) {
5264 for (long long int i = 0; i < po.m_fr.m_emptyLinesBefore; ++i) {
5265 h.push_back(Trait::latin1ToChar('\n'));
5266 }
5267 }
5268
5269 const auto first = po.m_fr.m_data[line].first.asString().sliced(
5270 pos,
5271 (line == toLine ? (toPos >= 0 ? toPos - pos : po.m_fr.m_data[line].first.length() - pos) :
5272 po.m_fr.m_data[line].first.length() - pos));
5273
5274 if (!h.isEmpty() && !first.isEmpty() && po.m_html.m_html->endLine() != po.m_fr.m_data[line].second.m_lineNumber) {
5275 h.push_back(Trait::latin1ToChar('\n'));
5276 }
5277
5278 if (!first.isEmpty()) {
5279 h.push_back(first);
5280 }
5281
5282 ++line;
5283
5284 for (; line < toLine; ++line) {
5285 h.push_back(Trait::latin1ToChar('\n'));
5286 h.push_back(po.m_fr.m_data[line].first.asString());
5287 }
5288
5289 if (line == toLine && toPos != 0) {
5290 h.push_back(Trait::latin1ToChar('\n'));
5291 h.push_back(po.m_fr.m_data[line].first.asString().sliced(0, toPos > 0 ?
5292 toPos : po.m_fr.m_data[line].first.length()));
5293 }
5294
5295 auto endColumn = toPos;
5296 auto endLine = toLine;
5297
5298 if (endColumn == 0 && endLine > 0) {
5299 --endLine;
5300 endColumn = po.m_fr.m_data.at(endLine).first.length();
5301 }
5302
5303 po.m_html.m_html->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(endColumn >= 0 ?
5304 endColumn - 1 : po.m_fr.m_data.at(endLine).first.length() - 1));
5305 po.m_html.m_html->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
5306
5307 po.m_line = (toPos >= 0 ? toLine : toLine + 1);
5308 po.m_pos = (toPos >= 0 ? toPos : 0);
5309
5310 if (po.m_line + 1 < static_cast<long long int>(po.m_fr.m_data.size()) &&
5311 po.m_pos >= po.m_fr.m_data.at(po.m_line).first.length()) {
5312 ++po.m_line;
5313 po.m_pos = 0;
5314 }
5315
5316 po.m_html.m_html->setText(h);
5317 }
5318
5320
5321 if (onLine) {
5322 po.m_headingAllowed = false;
5323 po.m_checkLineOnNewType = true;
5324 }
5325
5326 if (finish) {
5327 if (!po.m_collectRefLinks) {
5328 po.m_parent->appendItem(po.m_html.m_html);
5329 po.m_parent->setEndColumn(po.m_html.m_html->endColumn());
5330 po.m_parent->setEndLine(po.m_html.m_html->endLine());
5331 initLastItemWithOpts<Trait>(po, po.m_html.m_html);
5332 po.m_html.m_html->setOpts(po.m_opts);
5333 po.m_lastText = nullptr;
5334 } else {
5335 po.m_tmpHtml = po.m_html.m_html;
5336 }
5337
5338 const auto online = po.m_html.m_onLine;
5339
5340 resetHtmlTag(po.m_html, &po);
5341
5342 if (online) {
5344 }
5345 } else {
5346 po.m_html.m_continueHtml = true;
5347 }
5348}
5349
5350template<class Trait>
5351inline typename Parser<Trait>::Delims::iterator
5352Parser<Trait>::eatRawHtmlTillEmptyLine(typename Delims::iterator it,
5353 typename Delims::iterator last,
5354 long long int line,
5355 long long int pos,
5356 TextParsingOpts<Trait> &po,
5357 int htmlRule,
5358 bool onLine,
5359 bool continueEating)
5360{
5361 long long int emptyLine = line;
5362
5363 if (po.m_fr.m_emptyLinesBefore > 0 && po.m_html.m_html && po.m_html.m_continueHtml) {
5364 po.m_html.m_continueHtml = false;
5365 return it;
5366 }
5367
5368 for (auto it = po.m_fr.m_data.cbegin() + line, last = po.m_fr.m_data.cend(); it != last; ++it) {
5369 if (it->first.asString().simplified().isEmpty()) {
5370 break;
5371 }
5372
5373 ++emptyLine;
5374 }
5375
5376 if (emptyLine < static_cast<long long int>(po.m_fr.m_data.size())) {
5377 eatRawHtml(line, pos, emptyLine, 0, po, true, htmlRule, onLine, continueEating);
5378
5379 return findIt(it, last, po);
5380 } else {
5381 eatRawHtml(line, pos, po.m_fr.m_data.size() - 1, -1, po, false, htmlRule, onLine, continueEating);
5382
5383 if (it != last) {
5384 return std::prev(last);
5385 } else {
5386 return last;
5387 }
5388 }
5389}
5390
5391template<class Trait>
5392inline bool
5393Parser<Trait>::isNewBlockIn(MdBlock<Trait> &fr,
5394 long long int startLine,
5395 long long int endLine)
5396{
5397 for (auto i = startLine + 1; i <= endLine; ++i) {
5398 const auto type = whatIsTheLine(fr.m_data[i].first);
5399
5400 switch (type) {
5410 return true;
5411
5412 default:
5413 break;
5414 }
5415
5416 const auto ns = skipSpaces<Trait>(0, fr.m_data[i].first.asString());
5417
5418 if (ns < 4) {
5419 const auto s = fr.m_data[i].first.asString().sliced(ns);
5420
5421 if (isHorizontalLine<Trait>(s) || isH1<Trait>(s) || isH2<Trait>(s)) {
5422 return true;
5423 }
5424 }
5425 }
5426
5427 return false;
5428}
5429
5430template<class Trait>
5431inline void
5432Parser<Trait>::finishRule1HtmlTag(typename Delims::iterator it,
5433 typename Delims::iterator last,
5434 TextParsingOpts<Trait> &po,
5435 bool skipFirst)
5436{
5437 static const std::set<typename Trait::String> s_finish = {Trait::latin1ToString("/pre"),
5438 Trait::latin1ToString("/script"),
5439 Trait::latin1ToString("/style"),
5440 Trait::latin1ToString("/textarea")};
5441
5442 if (it != last) {
5443 bool ok = false;
5444 long long int l = -1, p = -1;
5445
5446 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less && skipFirst) {
5447 std::tie(ok, l, p, po.m_html.m_onLine, std::ignore) =
5448 isHtmlTag(it->m_line, it->m_pos, po, 1);
5449 }
5450
5451 if (po.m_html.m_onLine) {
5452 for (it = (skipFirst && it != last ? std::next(it) : it); it != last; ++it) {
5453 if (it->m_type == Delimiter::Less) {
5454 typename Trait::String tag;
5455 bool closed = false;
5456
5457 std::tie(tag, closed) = readHtmlTag(it, po);
5458
5459 if (closed) {
5460 if (s_finish.find(tag.toLower()) != s_finish.cend()) {
5461 eatRawHtml(po.m_line, po.m_pos, it->m_line, -1, po,
5462 true, 1, po.m_html.m_onLine);
5463
5464 return;
5465 }
5466 }
5467 }
5468 }
5469 } else if (ok && !isNewBlockIn(po.m_fr, it->m_line, l)) {
5470 eatRawHtml(po.m_line, po.m_pos, l, p + 1, po, true, 1, false);
5471
5472 return;
5473 } else {
5474 resetHtmlTag(po.m_html, &po);
5475
5476 return;
5477 }
5478 }
5479
5480 if (po.m_html.m_onLine) {
5481 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 1, po.m_html.m_onLine);
5482 } else {
5483 resetHtmlTag(po.m_html, &po);
5484 }
5485}
5486
5487template<class Trait>
5488inline void
5489Parser<Trait>::finishRule2HtmlTag(typename Delims::iterator it,
5490 typename Delims::iterator last,
5491 TextParsingOpts<Trait> &po)
5492{
5493 if (it != last) {
5494 const auto start = it;
5495
5496 MdLineData::CommentData commentData = {2, true};
5497 bool onLine = po.m_html.m_onLine;
5498
5499 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5500 long long int i = po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos);
5501
5502 commentData = po.m_fr.m_data[it->m_line].second.m_htmlCommentData[i];
5503
5504 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5505 po.m_html.m_onLine = onLine;
5506 }
5507
5508 if (commentData.first != -1 && commentData.second) {
5509 for (; it != last; ++it) {
5510 if (it->m_type == Delimiter::Greater) {
5511 auto p = it->m_pos;
5512
5513 bool doContinue = false;
5514
5515 for (char i = 0; i < commentData.first; ++i) {
5516 if (!(p > 0 && po.m_fr.m_data[it->m_line].first[p - 1] == Trait::latin1ToChar('-'))) {
5517 doContinue = true;
5518
5519 break;
5520 }
5521
5522 --p;
5523 }
5524
5525 if (doContinue) {
5526 continue;
5527 }
5528
5529 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5530 eatRawHtml(po.m_line, po.m_pos, it->m_line,
5531 onLine ? po.m_fr.m_data[it->m_line].first.length() : it->m_pos + 1,
5532 po, true, 2, onLine);
5533 } else {
5534 resetHtmlTag(po.m_html, &po);
5535 }
5536
5537 return;
5538 }
5539 }
5540 }
5541 }
5542
5543 if (po.m_html.m_onLine) {
5544 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 2, po.m_html.m_onLine);
5545 } else {
5546 resetHtmlTag(po.m_html, &po);
5547 }
5548}
5549
5550template<class Trait>
5551inline void
5552Parser<Trait>::finishRule3HtmlTag(typename Delims::iterator it,
5553 typename Delims::iterator last,
5554 TextParsingOpts<Trait> &po)
5555{
5556 bool onLine = po.m_html.m_onLine;
5557
5558 if (it != last) {
5559 const auto start = it;
5560
5561 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5562 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5563 po.m_html.m_onLine = onLine;
5564 }
5565
5566 for (; it != last; ++it) {
5567 if (it->m_type == Delimiter::Greater) {
5568 if (it->m_pos > 0 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar('?')) {
5569 long long int i = it->m_pos + 1;
5570
5571 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5572 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5573 break;
5574 }
5575 }
5576
5577 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5578 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 3, onLine);
5579 } else {
5580 resetHtmlTag(po.m_html, &po);
5581 }
5582
5583 return;
5584 }
5585 }
5586 }
5587 }
5588
5589 if (po.m_html.m_onLine) {
5590 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 3, onLine);
5591 } else {
5592 resetHtmlTag(po.m_html, &po);
5593 }
5594}
5595
5596template<class Trait>
5597inline void
5598Parser<Trait>::finishRule4HtmlTag(typename Delims::iterator it,
5599 typename Delims::iterator last,
5600 TextParsingOpts<Trait> &po)
5601{
5602 if (it != last) {
5603 const auto start = it;
5604
5605 bool onLine = po.m_html.m_onLine;
5606
5607 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5608 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5609 po.m_html.m_onLine = onLine;
5610 }
5611
5612 for (; it != last; ++it) {
5613 if (it->m_type == Delimiter::Greater) {
5614 long long int i = it->m_pos + 1;
5615
5616 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5617 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5618 break;
5619 }
5620 }
5621
5622 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5623 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 4, onLine);
5624 } else {
5625 resetHtmlTag(po.m_html, &po);
5626 }
5627
5628 return;
5629 }
5630 }
5631 }
5632
5633 if (po.m_html.m_onLine) {
5634 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 4, true);
5635 } else {
5636 resetHtmlTag(po.m_html, &po);
5637 }
5638}
5639
5640template<class Trait>
5641inline void
5642Parser<Trait>::finishRule5HtmlTag(typename Delims::iterator it,
5643 typename Delims::iterator last,
5644 TextParsingOpts<Trait> &po)
5645{
5646 if (it != last) {
5647 const auto start = it;
5648
5649 bool onLine = po.m_html.m_onLine;
5650
5651 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5652 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5653 po.m_html.m_onLine = onLine;
5654 }
5655
5656 for (; it != last; ++it) {
5657 if (it->m_type == Delimiter::Greater) {
5658 if (it->m_pos > 1 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar(']') &&
5659 po.m_fr.m_data[it->m_line].first[it->m_pos - 2] == Trait::latin1ToChar(']')) {
5660 long long int i = it->m_pos + 1;
5661
5662 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5663 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5664 break;
5665 }
5666 }
5667
5668 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5669 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 5, onLine);
5670 } else {
5671 resetHtmlTag(po.m_html, &po);
5672 }
5673
5674 return;
5675 }
5676 }
5677 }
5678 }
5679
5680 if (po.m_html.m_onLine) {
5681 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 5, true);
5682 } else {
5683 resetHtmlTag(po.m_html, &po);
5684 }
5685}
5686
5687template<class Trait>
5688inline void
5689Parser<Trait>::finishRule6HtmlTag(typename Delims::iterator it,
5690 typename Delims::iterator last,
5691 TextParsingOpts<Trait> &po)
5692{
5693 po.m_html.m_onLine = (it != last ?
5694 it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()) : true);
5695
5696 if (po.m_html.m_onLine) {
5697 eatRawHtmlTillEmptyLine(it, last, po.m_line, po.m_pos, po, 6, true, true);
5698 } else {
5699 const auto nit = std::find_if(std::next(it), last, [](const auto &d) {
5700 return (d.m_type == Delimiter::Greater);
5701 });
5702
5703 if (nit != last && !isNewBlockIn(po.m_fr, it->m_line, nit->m_line)) {
5704 eatRawHtml(po.m_line, po.m_pos, nit->m_line, nit->m_pos + nit->m_len, po,
5705 true, 6, false);
5706 }
5707 }
5708}
5709
5710template<class Trait>
5711inline void
5712Parser<Trait>::finishRule7HtmlTag(typename Delims::iterator it,
5713 typename Delims::iterator last,
5714 TextParsingOpts<Trait> &po)
5715{
5716 if (po.m_html.m_onLine) {
5717 eatRawHtmlTillEmptyLine(it, last, po.m_line, po.m_pos, po, 7, true, true);
5718 } else if (it != last) {
5719 const auto start = it;
5720 long long int l = -1, p = -1;
5721 bool onLine = false;
5722 bool ok = false;
5723
5724 std::tie(ok, l, p, onLine, std::ignore) = isHtmlTag(it->m_line, it->m_pos, po, 7);
5725
5726 onLine = onLine && it->m_line == 0 && l == start->m_line;
5727
5728 if (ok) {
5729 eatRawHtml(po.m_line, po.m_pos, l, ++p, po, !onLine, 7, onLine);
5730
5731 po.m_html.m_onLine = onLine;
5732
5733 if (onLine) {
5734 eatRawHtmlTillEmptyLine(it, last, po.m_line, po.m_pos, po, 7, onLine, true);
5735 }
5736 }
5737 } else {
5738 resetHtmlTag(po.m_html, &po);
5739 }
5740}
5741
5742template<class Trait>
5743inline typename Parser<Trait>::Delims::iterator
5744Parser<Trait>::finishRawHtmlTag(typename Delims::iterator it,
5745 typename Delims::iterator last,
5746 TextParsingOpts<Trait> &po,
5747 bool skipFirst)
5748{
5749 po.m_detected = TextParsingOpts<Trait>::Detected::HTML;
5750
5751 switch (po.m_html.m_htmlBlockType) {
5752 case 1:
5753 finishRule1HtmlTag(it, last, po, skipFirst);
5754 break;
5755
5756 case 2:
5757 finishRule2HtmlTag(it, last, po);
5758 break;
5759
5760 case 3:
5761 finishRule3HtmlTag(it, last, po);
5762 break;
5763
5764 case 4:
5765 finishRule4HtmlTag(it, last, po);
5766 break;
5767
5768 case 5:
5769 finishRule5HtmlTag(it, last, po);
5770 break;
5771
5772 case 6:
5773 finishRule6HtmlTag(it, last, po);
5774 break;
5775
5776 case 7:
5777 finishRule7HtmlTag(it, last, po);
5778 break;
5779
5780 default:
5781 po.m_detected = TextParsingOpts<Trait>::Detected::Nothing;
5782 break;
5783 }
5784
5785 return findIt(it, last, po);
5786}
5787
5788template<class Trait>
5789inline int
5790Parser<Trait>::htmlTagRule(typename Delims::iterator it,
5791 typename Delims::iterator last,
5792 TextParsingOpts<Trait> &po)
5793{
5794 MD_UNUSED(last)
5795
5796 typename Trait::String tag;
5797
5798 std::tie(tag, std::ignore) = readHtmlTag(it, po);
5799
5800 if (tag.startsWith(Trait::latin1ToString("![CDATA["))) {
5801 return 5;
5802 }
5803
5804 tag = tag.toLower();
5805
5806 static const typename Trait::String s_validHtmlTagLetters =
5807 Trait::latin1ToString("abcdefghijklmnopqrstuvwxyz0123456789-");
5808
5809 bool closing = false;
5810
5811 if (tag.startsWith(Trait::latin1ToString("/"))) {
5812 tag.remove(0, 1);
5813 closing = true;
5814 }
5815
5816 if (tag.endsWith(Trait::latin1ToString("/"))) {
5817 tag.remove(tag.size() - 1, 1);
5818 }
5819
5820 if (tag.isEmpty()) {
5821 return -1;
5822 }
5823
5824 if (!tag.startsWith(Trait::latin1ToString("!")) &&
5825 !tag.startsWith(Trait::latin1ToString("?")) &&
5826 !(tag[0].unicode() >= 97 && tag[0].unicode() <= 122)) {
5827 return -1;
5828 }
5829
5830 static const std::set<typename Trait::String> s_rule1 = {Trait::latin1ToString("pre"),
5831 Trait::latin1ToString("script"),
5832 Trait::latin1ToString("style"),
5833 Trait::latin1ToString("textarea")};
5834
5835 if (!closing && s_rule1.find(tag) != s_rule1.cend()) {
5836 return 1;
5837 } else if (tag.startsWith(Trait::latin1ToString("!--"))) {
5838 return 2;
5839 } else if (tag.startsWith(Trait::latin1ToString("?"))) {
5840 return 3;
5841 } else if (tag.startsWith(Trait::latin1ToString("!")) && tag.size() > 1 &&
5842 ((tag[1].unicode() >= 65 && tag[1].unicode() <= 90) ||
5843 (tag[1].unicode() >= 97 && tag[1].unicode() <= 122))) {
5844 return 4;
5845 } else {
5846 static const std::set<typename Trait::String> s_rule6 = {
5847 Trait::latin1ToString("address"), Trait::latin1ToString("article"), Trait::latin1ToString("aside"), Trait::latin1ToString("base"),
5848 Trait::latin1ToString("basefont"), Trait::latin1ToString("blockquote"), Trait::latin1ToString("body"), Trait::latin1ToString("caption"),
5849 Trait::latin1ToString("center"), Trait::latin1ToString("col"), Trait::latin1ToString("colgroup"), Trait::latin1ToString("dd"),
5850 Trait::latin1ToString("details"), Trait::latin1ToString("dialog"), Trait::latin1ToString("dir"), Trait::latin1ToString("div"),
5851 Trait::latin1ToString("dl"), Trait::latin1ToString("dt"), Trait::latin1ToString("fieldset"), Trait::latin1ToString("figcaption"),
5852 Trait::latin1ToString("figure"), Trait::latin1ToString("footer"), Trait::latin1ToString("form"), Trait::latin1ToString("frame"),
5853 Trait::latin1ToString("frameset"), Trait::latin1ToString("h1"), Trait::latin1ToString("h2"), Trait::latin1ToString("h3"),
5854 Trait::latin1ToString("h4"), Trait::latin1ToString("h5"), Trait::latin1ToString("h6"), Trait::latin1ToString("head"),
5855 Trait::latin1ToString("header"), Trait::latin1ToString("hr"), Trait::latin1ToString("html"), Trait::latin1ToString("iframe"),
5856 Trait::latin1ToString("legend"), Trait::latin1ToString("li"), Trait::latin1ToString("link"), Trait::latin1ToString("main"),
5857 Trait::latin1ToString("menu"), Trait::latin1ToString("menuitem"), Trait::latin1ToString("nav"), Trait::latin1ToString("noframes"),
5858 Trait::latin1ToString("ol"), Trait::latin1ToString("optgroup"), Trait::latin1ToString("option"), Trait::latin1ToString("p"),
5859 Trait::latin1ToString("param"), Trait::latin1ToString("section"), Trait::latin1ToString("search"), Trait::latin1ToString("summary"),
5860 Trait::latin1ToString("table"), Trait::latin1ToString("tbody"), Trait::latin1ToString("td"), Trait::latin1ToString("tfoot"),
5861 Trait::latin1ToString("th"), Trait::latin1ToString("thead"), Trait::latin1ToString("title"), Trait::latin1ToString("tr"),
5862 Trait::latin1ToString("track"), Trait::latin1ToString("ul")};
5863
5864 for (long long int i = 1; i < tag.size(); ++i) {
5865 if (!s_validHtmlTagLetters.contains(tag[i])) {
5866 return -1;
5867 }
5868 }
5869
5870 if (s_rule6.find(tag) != s_rule6.cend()) {
5871 return 6;
5872 } else {
5873 bool tag = false;
5874
5875 std::tie(tag, std::ignore, std::ignore, std::ignore, std::ignore) =
5876 isHtmlTag(it->m_line, it->m_pos, po, 7);
5877
5878 if (tag) {
5879 return 7;
5880 }
5881 }
5882 }
5883
5884 return -1;
5885}
5886
5887template<class Trait>
5888inline typename Parser<Trait>::Delims::iterator
5889Parser<Trait>::checkForRawHtml(typename Delims::iterator it,
5890 typename Delims::iterator last,
5891 TextParsingOpts<Trait> &po)
5892{
5893 const auto rule = htmlTagRule(it, last, po);
5894
5895 if (rule == -1) {
5896 resetHtmlTag(po.m_html, &po);
5897
5898 po.m_firstInParagraph = false;
5899
5900 return it;
5901 }
5902
5903 po.m_html.m_htmlBlockType = rule;
5904 po.m_html.m_html.reset(new RawHtml<Trait>);
5905 po.m_html.m_html->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
5906 po.m_html.m_html->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
5907
5908 return finishRawHtmlTag(it, last, po, true);
5909}
5910
5911template<class Trait>
5912inline typename Parser<Trait>::Delims::iterator
5913Parser<Trait>::checkForMath(typename Delims::iterator it,
5914 typename Delims::iterator last,
5915 TextParsingOpts<Trait> &po)
5916{
5917 po.m_wasRefLink = false;
5918 po.m_firstInParagraph = false;
5919 po.m_headingAllowed = true;
5920
5921 const auto end = std::find_if(std::next(it), last, [&](const auto &d) {
5922 return (d.m_type == Delimiter::Math && d.m_len == it->m_len);
5923 });
5924
5925 if (end != last && end->m_line <= po.m_lastTextLine) {
5926 typename Trait::String math;
5927
5928 if (it->m_line == end->m_line) {
5929 math = po.m_fr.m_data[it->m_line].first.asString().sliced(
5930 it->m_pos + it->m_len, end->m_pos - (it->m_pos + it->m_len));
5931 } else {
5932 math = po.m_fr.m_data[it->m_line].first.asString().sliced(it->m_pos + it->m_len);
5933
5934 for (long long int i = it->m_line + 1; i < end->m_line; ++i) {
5935 math.push_back(Trait::latin1ToChar('\n'));
5936 math.push_back(po.m_fr.m_data[i].first.asString());
5937 }
5938
5939 math.push_back(Trait::latin1ToChar('\n'));
5940 math.push_back(po.m_fr.m_data[end->m_line].first.asString().sliced(0, end->m_pos));
5941 }
5942
5943 if (!po.m_collectRefLinks) {
5944 std::shared_ptr<Math<Trait>> m(new Math<Trait>);
5945
5946 auto startLine = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
5947 auto startColumn = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len);
5948
5949 if (it->m_pos + it->m_len >= po.m_fr.m_data.at(it->m_line).first.length()) {
5950 std::tie(startColumn, startLine) = nextPosition(po.m_fr, startColumn, startLine);
5951 }
5952
5953 auto endColumn = po.m_fr.m_data.at(end->m_line).first.virginPos(end->m_pos);
5954 auto endLine = po.m_fr.m_data.at(end->m_line).second.m_lineNumber;
5955
5956 if (endColumn == 0) {
5957 std::tie(endColumn, endLine) = prevPosition(po.m_fr, endColumn, endLine);
5958 } else {
5959 --endColumn;
5960 }
5961
5962 m->setStartColumn(startColumn);
5963 m->setStartLine(startLine);
5964 m->setEndColumn(endColumn);
5965 m->setEndLine(endLine);
5966 m->setInline(it->m_len == 1);
5967 m->setStartDelim({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
5968 po.m_fr.m_data[it->m_line].second.m_lineNumber,
5969 po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
5970 po.m_fr.m_data[it->m_line].second.m_lineNumber});
5971 m->setEndDelim({po.m_fr.m_data[end->m_line].first.virginPos(end->m_pos),
5972 po.m_fr.m_data[end->m_line].second.m_lineNumber,
5973 po.m_fr.m_data[end->m_line].first.virginPos(end->m_pos + end->m_len - 1),
5974 po.m_fr.m_data[end->m_line].second.m_lineNumber});
5975 m->setFensedCode(false);
5976
5977 initLastItemWithOpts<Trait>(po, m);
5978
5979 if (math.startsWith(Trait::latin1ToString("`")) &&
5980 math.endsWith(Trait::latin1ToString("`")) &&
5981 !math.endsWith(Trait::latin1ToString("\\`")) &&
5982 math.length() > 1) {
5983 math = math.sliced(1, math.length() - 2);
5984 }
5985
5986 m->setExpr(math);
5987
5988 po.m_parent->appendItem(m);
5989
5990 po.m_pos = end->m_pos + end->m_len;
5991 po.m_line = end->m_line;
5992 po.m_lastText = nullptr;
5993 }
5994
5995 return end;
5996 }
5997
5998 return it;
5999}
6000
6001template<class Trait>
6002inline typename Parser<Trait>::Delims::iterator
6003Parser<Trait>::checkForAutolinkHtml(typename Delims::iterator it,
6004 typename Delims::iterator last,
6005 TextParsingOpts<Trait> &po,
6006 bool updatePos)
6007{
6008 const auto nit = std::find_if(std::next(it), last, [](const auto &d) {
6009 return (d.m_type == Delimiter::Greater);
6010 });
6011
6012 if (nit != last) {
6013 if (nit->m_line == it->m_line) {
6014 const auto url = po.m_fr.m_data.at(it->m_line).first.asString().sliced(
6015 it->m_pos + 1, nit->m_pos - it->m_pos - 1);
6016
6017 bool isUrl = true;
6018
6019 for (long long int i = 0; i < url.size(); ++i) {
6020 if (url[i].isSpace()) {
6021 isUrl = false;
6022
6023 break;
6024 }
6025 }
6026
6027 if (isUrl) {
6028 if (!isValidUrl<Trait>(url) && !isEmail<Trait>(url)) {
6029 isUrl = false;
6030 }
6031 }
6032
6033 if (isUrl) {
6034 if (!po.m_collectRefLinks) {
6035 std::shared_ptr<Link<Trait>> lnk(new Link<Trait>);
6036 lnk->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
6037 lnk->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
6038 lnk->setEndColumn(po.m_fr.m_data.at(nit->m_line).first.virginPos(nit->m_pos + nit->m_len - 1));
6039 lnk->setEndLine(po.m_fr.m_data.at(nit->m_line).second.m_lineNumber);
6040 lnk->setUrl(url);
6041 lnk->setOpts(po.m_opts);
6042 lnk->setTextPos({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + 1),
6043 po.m_fr.m_data[it->m_line].second.m_lineNumber,
6044 po.m_fr.m_data[nit->m_line].first.virginPos(nit->m_pos - 1),
6045 po.m_fr.m_data[nit->m_line].second.m_lineNumber});
6046 lnk->setUrlPos(lnk->textPos());
6047 po.m_parent->appendItem(lnk);
6048 }
6049
6050 po.m_wasRefLink = false;
6051 po.m_firstInParagraph = false;
6052 po.m_lastText = nullptr;
6053
6054 if (updatePos) {
6055 po.m_pos = nit->m_pos + nit->m_len;
6056 po.m_line = nit->m_line;
6057 }
6058
6059 return nit;
6060 } else {
6061 return checkForRawHtml(it, last, po);
6062 }
6063 } else {
6064 return checkForRawHtml(it, last, po);
6065 }
6066 } else {
6067 return checkForRawHtml(it, last, po);
6068 }
6069}
6070
6071template<class Trait>
6072inline void
6073Parser<Trait>::makeInlineCode(long long int startLine,
6074 long long int startPos,
6075 long long int lastLine,
6076 long long int lastPos,
6077 TextParsingOpts<Trait> &po,
6078 typename Delims::iterator startDelimIt,
6079 typename Delims::iterator endDelimIt)
6080{
6081 typename Trait::String c;
6082
6083 for (; po.m_line <= lastLine; ++po.m_line) {
6084 c.push_back(po.m_fr.m_data.at(po.m_line).first.asString().sliced(
6085 po.m_pos, (po.m_line == lastLine ? lastPos - po.m_pos :
6086 po.m_fr.m_data.at(po.m_line).first.length() - po.m_pos)));
6087
6088 if (po.m_line < lastLine) {
6089 c.push_back(Trait::latin1ToChar(' '));
6090 }
6091
6092 po.m_pos = 0;
6093 }
6094
6095 po.m_line = lastLine;
6096
6097 if (c[0] == Trait::latin1ToChar(' ') && c[c.size() - 1] == Trait::latin1ToChar(' ') &&
6098 skipSpaces<Trait>(0, c) < c.size()) {
6099 c.remove(0, 1);
6100 c.remove(c.size() - 1, 1);
6101 ++startPos;
6102 --lastPos;
6103 }
6104
6105 if (!c.isEmpty()) {
6106 auto code = std::make_shared<Code<Trait>>(c, false, true);
6107
6108 code->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
6109 code->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
6110 code->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
6111 code->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
6112 code->setStartDelim({po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
6113 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)),
6114 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber,
6115 po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
6116 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)) +
6117 startDelimIt->m_len - 1 - (startDelimIt->m_backslashed ? 1 : 0),
6118 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber});
6119 code->setEndDelim(
6120 {po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
6121 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0)),
6122 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber,
6123 po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
6124 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0) +
6125 endDelimIt->m_len - 1 - (endDelimIt->m_backslashed ? 1 : 0)),
6126 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber});
6127 code->setOpts(po.m_opts);
6128
6129 initLastItemWithOpts<Trait>(po, code);
6130
6131 po.m_parent->appendItem(code);
6132 }
6133
6134 po.m_wasRefLink = false;
6135 po.m_firstInParagraph = false;
6136 po.m_lastText = nullptr;
6137}
6138
6139template<class Trait>
6140inline typename Parser<Trait>::Delims::iterator
6141Parser<Trait>::checkForInlineCode(typename Delims::iterator it,
6142 typename Delims::iterator last,
6143 TextParsingOpts<Trait> &po)
6144{
6145 const auto len = it->m_len;
6146 const auto start = it;
6147
6148 po.m_wasRefLink = false;
6149 po.m_firstInParagraph = false;
6150 po.m_headingAllowed = true;
6151
6152 ++it;
6153
6154 for (; it != last; ++it) {
6155 if (it->m_line <= po.m_lastTextLine) {
6156 const auto p = skipSpaces<Trait>(0, po.m_fr.m_data.at(it->m_line).first.asString());
6157 const auto withoutSpaces = po.m_fr.m_data.at(it->m_line).first.asString().sliced(p);
6158
6159 if ((it->m_type == Delimiter::HorizontalLine && withoutSpaces[0] == Trait::latin1ToChar('-')) ||
6160 it->m_type == Delimiter::H1 || it->m_type == Delimiter::H2) {
6161 break;
6162 } else if (it->m_type == Delimiter::InlineCode && (it->m_len - (it->m_backslashed ? 1 : 0)) == len) {
6163 makeText(start->m_line, start->m_pos, po);
6164
6165 if (!po.m_collectRefLinks) {
6166 po.m_pos = start->m_pos + start->m_len;
6167
6168 makeInlineCode(start->m_line, start->m_pos + start->m_len, it->m_line,
6169 it->m_pos + (it->m_backslashed ? 1 : 0), po, start, it);
6170
6171 po.m_line = it->m_line;
6172 po.m_pos = it->m_pos + it->m_len;
6173 }
6174
6175 return it;
6176 }
6177 } else {
6178 break;
6179 }
6180 }
6181
6182 makeText(start->m_line, start->m_pos + start->m_len, po);
6183
6184 return start;
6185}
6186
6187template<class Trait>
6188inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::iterator>
6189Parser<Trait>::readTextBetweenSquareBrackets(typename Delims::iterator start,
6190 typename Delims::iterator it,
6191 typename Delims::iterator last,
6192 TextParsingOpts<Trait> &po,
6193 bool doNotCreateTextOnFail,
6194 WithPosition *pos)
6195{
6196 if (it != last && it->m_line <= po.m_lastTextLine) {
6197 if (start->m_line == it->m_line) {
6198 const auto p = start->m_pos + start->m_len;
6199 const auto n = it->m_pos - p;
6200
6201 if (pos) {
6202 long long int startPos, startLine, endPos, endLine;
6203 std::tie(startPos, startLine) = nextPosition(po.m_fr,
6204 po.m_fr.m_data[start->m_line].first.virginPos(
6205 start->m_pos + start->m_len - 1),
6206 po.m_fr.m_data[start->m_line].second.m_lineNumber);
6207 std::tie(endPos, endLine) =
6208 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6209 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6210
6211 *pos = {startPos, startLine, endPos, endLine};
6212 }
6213
6214 return {{{po.m_fr.m_data.at(start->m_line).first.sliced(p, n),
6215 {po.m_fr.m_data.at(start->m_line).second.m_lineNumber}}}, it};
6216 } else {
6217 if (it->m_line - start->m_line < 3) {
6218 typename MdBlock<Trait>::Data res;
6219 res.push_back({po.m_fr.m_data.at(start->m_line).first.sliced(
6220 start->m_pos + start->m_len), po.m_fr.m_data.at(start->m_line).second});
6221
6222 long long int i = start->m_line + 1;
6223
6224 for (; i <= it->m_line; ++i) {
6225 if (i == it->m_line) {
6226 res.push_back({po.m_fr.m_data.at(i).first.sliced(0, it->m_pos),
6227 po.m_fr.m_data.at(i).second});
6228 } else {
6229 res.push_back({po.m_fr.m_data.at(i).first, po.m_fr.m_data.at(i).second});
6230 }
6231 }
6232
6233 if (pos) {
6234 long long int startPos, startLine, endPos, endLine;
6235 std::tie(startPos, startLine) = nextPosition(po.m_fr,
6236 po.m_fr.m_data[start->m_line].first.virginPos(
6237 start->m_pos + start->m_len - 1),
6238 po.m_fr.m_data[start->m_line].second.m_lineNumber);
6239 std::tie(endPos, endLine) =
6240 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6241 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6242
6243 *pos = {startPos, startLine, endPos, endLine};
6244 }
6245
6246 return {res, it};
6247 } else {
6248 if (!doNotCreateTextOnFail) {
6249 makeText(start->m_line, start->m_pos + start->m_len, po);
6250 }
6251
6252 return {{}, start};
6253 }
6254 }
6255 } else {
6256 if (!doNotCreateTextOnFail) {
6257 makeText(start->m_line, start->m_pos + start->m_len, po);
6258 }
6259
6260 return {{}, start};
6261 }
6262}
6263
6264template<class Trait>
6265inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::iterator>
6266Parser<Trait>::checkForLinkText(typename Delims::iterator it,
6267 typename Delims::iterator last,
6268 TextParsingOpts<Trait> &po,
6269 WithPosition *pos)
6270{
6271 const auto start = it;
6272
6273 long long int brackets = 0;
6274
6275 const bool collectRefLinks = po.m_collectRefLinks;
6276 po.m_collectRefLinks = true;
6277 long long int l = po.m_line, p = po.m_pos;
6278
6279 for (it = std::next(it); it != last; ++it) {
6280 bool quit = false;
6281
6282 switch (it->m_type) {
6283 case Delimiter::SquareBracketsClose: {
6284 if (!brackets)
6285 quit = true;
6286 else
6287 --brackets;
6288 } break;
6289
6290 case Delimiter::SquareBracketsOpen:
6291 case Delimiter::ImageOpen:
6292 ++brackets;
6293 break;
6294
6295 case Delimiter::InlineCode:
6296 it = checkForInlineCode(it, last, po);
6297 break;
6298
6299 case Delimiter::Less:
6300 it = checkForAutolinkHtml(it, last, po, false);
6301 break;
6302
6303 default:
6304 break;
6305 }
6306
6307 if (quit) {
6308 break;
6309 }
6310 }
6311
6312 const auto r = readTextBetweenSquareBrackets(start, it, last, po, false, pos);
6313
6314 po.m_collectRefLinks = collectRefLinks;
6315 resetHtmlTag(po.m_html, &po);
6316 po.m_line = l;
6317 po.m_pos = p;
6318
6319 return r;
6320}
6321
6322template<class Trait>
6323inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::iterator>
6324Parser<Trait>::checkForLinkLabel(typename Delims::iterator it,
6325 typename Delims::iterator last,
6326 TextParsingOpts<Trait> &po,
6327 WithPosition *pos)
6328{
6329 const auto start = it;
6330
6331 for (it = std::next(it); it != last; ++it) {
6332 bool quit = false;
6333
6334 switch (it->m_type) {
6335 case Delimiter::SquareBracketsClose: {
6336 quit = true;
6337 } break;
6338
6339 case Delimiter::SquareBracketsOpen:
6340 case Delimiter::ImageOpen: {
6341 it = last;
6342 quit = true;
6343 } break;
6344
6345 default:
6346 break;
6347 }
6348
6349 if (quit)
6350 break;
6351 }
6352
6353 return readTextBetweenSquareBrackets(start, it, last, po, true, pos);
6354}
6355
6356template<class Trait>
6357inline typename Trait::String
6358Parser<Trait>::toSingleLine(const typename MdBlock<Trait>::Data &d)
6359{
6360 typename Trait::String res;
6361 bool first = true;
6362
6363 for (const auto &s : d) {
6364 if (!first) {
6365 res.push_back(Trait::latin1ToChar(' '));
6366 }
6367 res.push_back(s.first.asString().simplified());
6368 first = false;
6369 }
6370
6371 return res;
6372}
6373
6374template<class Trait>
6375inline std::shared_ptr<Link<Trait>>
6376Parser<Trait>::makeLink(const typename Trait::String &url,
6377 const typename MdBlock<Trait>::Data &text,
6378 TextParsingOpts<Trait> &po,
6379 bool doNotCreateTextOnFail,
6380 long long int startLine,
6381 long long int startPos,
6382 long long int lastLine,
6383 long long int lastPos,
6384 const WithPosition &textPos,
6385 const WithPosition &urlPos)
6386{
6387 MD_UNUSED(doNotCreateTextOnFail)
6388
6389 typename Trait::String u = (url.startsWith(Trait::latin1ToString("#")) ?
6390 url : removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(url)));
6391
6392 if (!u.isEmpty()) {
6393 if (!u.startsWith(Trait::latin1ToString("#"))) {
6394 const auto checkForFile = [&](typename Trait::String &url,
6395 const typename Trait::String &ref = {}) -> bool {
6396 if (Trait::fileExists(url)) {
6397 url = Trait::absoluteFilePath(url);
6398
6399 if (!po.m_collectRefLinks) {
6400 po.m_linksToParse.push_back(url);
6401 }
6402
6403 if (!ref.isEmpty()) {
6404 url = ref + Trait::latin1ToString("/") + url;
6405 }
6406
6407 return true;
6408 } else if (Trait::fileExists(url, po.m_workingPath)) {
6409 url = Trait::absoluteFilePath(po.m_workingPath + Trait::latin1ToString("/") + url);
6410
6411 if (!po.m_collectRefLinks) {
6412 po.m_linksToParse.push_back(url);
6413 }
6414
6415 if (!ref.isEmpty()) {
6416 url = ref + Trait::latin1ToString("/") + url;
6417 }
6418
6419 return true;
6420 } else {
6421 return false;
6422 }
6423 };
6424
6425 if (!checkForFile(u) && u.contains(Trait::latin1ToChar('#'))) {
6426 const auto i = u.indexOf(Trait::latin1ToChar('#'));
6427 const auto ref = u.sliced(i);
6428 u = u.sliced(0, i);
6429
6430 if (!checkForFile(u, ref)) {
6431 u = u + ref;
6432 }
6433 }
6434 } else
6435 u = u + (po.m_workingPath.isEmpty() ? typename Trait::String() :
6436 Trait::latin1ToString("/") + po.m_workingPath) + Trait::latin1ToString("/") +
6437 po.m_fileName;
6438 }
6439
6440 std::shared_ptr<Link<Trait>> link(new Link<Trait>);
6441 link->setUrl(u);
6442 link->setOpts(po.m_opts);
6443 link->setTextPos(textPos);
6444 link->setUrlPos(urlPos);
6445
6446 MdBlock<Trait> block = {text, 0};
6447
6448 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
6449
6450 RawHtmlBlock<Trait> html;
6451
6452 parseFormattedTextLinksImages(block,
6453 std::static_pointer_cast<Block<Trait>>(p),
6454 po.m_doc,
6455 po.m_linksToParse,
6456 po.m_workingPath,
6457 po.m_fileName,
6458 po.m_collectRefLinks,
6459 true,
6460 html,
6461 true);
6462
6463 if (!p->isEmpty()) {
6464 std::shared_ptr<Image<Trait>> img;
6465
6466 if (p->items().size() == 1 && p->items().at(0)->type() == ItemType::Paragraph) {
6467 const auto ip = std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0));
6468
6469 for (auto it = ip->items().cbegin(), last = ip->items().cend(); it != last; ++it) {
6470 switch ((*it)->type()) {
6471 case ItemType::Link:
6472 return {};
6473
6474 case ItemType::Image: {
6475 img = std::static_pointer_cast<Image<Trait>>(*it);
6476 } break;
6477
6478 default:
6479 break;
6480 }
6481 }
6482
6483 if (img.get()) {
6484 link->setImg(img);
6485 }
6486
6487 link->setP(ip);
6488 }
6489 }
6490
6491 if (html.m_html.get()) {
6492 link->p()->appendItem(html.m_html);
6493 }
6494
6495 link->setText(toSingleLine(removeBackslashes<Trait>(text)));
6496 link->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
6497 link->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
6498 link->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
6499 link->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
6500
6501 initLastItemWithOpts<Trait>(po, link);
6502
6503 po.m_lastText = nullptr;
6504
6505 return link;
6506}
6507
6508template<class Trait>
6509inline bool
6510Parser<Trait>::createShortcutLink(const typename MdBlock<Trait>::Data &text,
6511 TextParsingOpts<Trait> &po,
6512 long long int startLine,
6513 long long int startPos,
6514 long long int lastLineForText,
6515 long long int lastPosForText,
6516 typename Delims::iterator lastIt,
6517 const typename MdBlock<Trait>::Data &linkText,
6518 bool doNotCreateTextOnFail,
6519 const WithPosition &textPos,
6520 const WithPosition &linkTextPos)
6521{
6522 const auto u = Trait::latin1ToString("#") + toSingleLine(text).toCaseFolded().toUpper();
6523 const auto url = u + Trait::latin1ToString("/") + (po.m_workingPath.isEmpty() ?
6524 typename Trait::String() : po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName;
6525
6526 po.m_wasRefLink = false;
6527 po.m_firstInParagraph = false;
6528 po.m_headingAllowed = true;
6529
6530 if (po.m_doc->labeledLinks().find(url) != po.m_doc->labeledLinks().cend()) {
6531 if (!po.m_collectRefLinks) {
6532 const auto isLinkTextEmpty = toSingleLine(linkText).isEmpty();
6533
6534 const auto link = makeLink(u,
6535 (isLinkTextEmpty ? text : linkText),
6536 po,
6537 doNotCreateTextOnFail,
6538 startLine,
6539 startPos,
6540 lastIt->m_line,
6541 lastIt->m_pos + lastIt->m_len,
6542 (isLinkTextEmpty ? textPos : linkTextPos),
6543 textPos);
6544
6545 if (link.get()) {
6546 po.m_linksToParse.push_back(url);
6547 po.m_parent->appendItem(link);
6548
6549 po.m_line = lastIt->m_line;
6550 po.m_pos = lastIt->m_pos + lastIt->m_len;
6551 } else {
6552 if (!doNotCreateTextOnFail) {
6553 makeText(lastLineForText, lastPosForText, po);
6554 }
6555
6556 return false;
6557 }
6558 }
6559
6560 return true;
6561 } else if (!doNotCreateTextOnFail) {
6562 makeText(lastLineForText, lastPosForText, po);
6563 }
6564
6565 return false;
6566}
6567
6568template<class Trait>
6569inline std::shared_ptr<Image<Trait>>
6570Parser<Trait>::makeImage(const typename Trait::String &url,
6571 const typename MdBlock<Trait>::Data &text,
6572 TextParsingOpts<Trait> &po,
6573 bool doNotCreateTextOnFail,
6574 long long int startLine,
6575 long long int startPos,
6576 long long int lastLine,
6577 long long int lastPos,
6578 const WithPosition &textPos,
6579 const WithPosition &urlPos)
6580{
6581 MD_UNUSED(doNotCreateTextOnFail)
6582
6583 std::shared_ptr<Image<Trait>> img(new Image<Trait>);
6584
6585 typename Trait::String u = (url.startsWith(Trait::latin1ToString("#")) ? url :
6586 removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(url)));
6587
6588 if (Trait::fileExists(u)) {
6589 img->setUrl(u);
6590 } else if (Trait::fileExists(u, po.m_workingPath)) {
6591 img->setUrl(po.m_workingPath + Trait::latin1ToString("/") + u);
6592 } else {
6593 img->setUrl(u);
6594 }
6595
6596 MdBlock<Trait> block = {text, 0};
6597
6598 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
6599
6600 RawHtmlBlock<Trait> html;
6601
6602 parseFormattedTextLinksImages(block,
6603 std::static_pointer_cast<Block<Trait>>(p),
6604 po.m_doc,
6605 po.m_linksToParse,
6606 po.m_workingPath,
6607 po.m_fileName,
6608 po.m_collectRefLinks,
6609 true,
6610 html,
6611 true);
6612
6613 if (!p->isEmpty()) {
6614 if (p->items().size() == 1 && p->items().at(0)->type() == ItemType::Paragraph) {
6615 img->setP(std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0)));
6616 }
6617 }
6618
6619 img->setText(toSingleLine(removeBackslashes<Trait>(text)));
6620 img->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
6621 img->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
6622 img->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
6623 img->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
6624 img->setTextPos(textPos);
6625 img->setUrlPos(urlPos);
6626
6627 initLastItemWithOpts<Trait>(po, img);
6628
6629 po.m_lastText = nullptr;
6630
6631 return img;
6632}
6633
6634template<class Trait>
6635inline bool
6636Parser<Trait>::createShortcutImage(const typename MdBlock<Trait>::Data &text,
6637 TextParsingOpts<Trait> &po,
6638 long long int startLine,
6639 long long int startPos,
6640 long long int lastLineForText,
6641 long long int lastPosForText,
6642 typename Delims::iterator lastIt,
6643 const typename MdBlock<Trait>::Data &linkText,
6644 bool doNotCreateTextOnFail,
6645 const WithPosition &textPos,
6646 const WithPosition &linkTextPos)
6647{
6648 const auto url = Trait::latin1ToString("#") + toSingleLine(text).toCaseFolded().toUpper() +
6649 Trait::latin1ToString("/") + (po.m_workingPath.isEmpty() ? typename Trait::String() :
6650 po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName;
6651
6652 po.m_wasRefLink = false;
6653 po.m_firstInParagraph = false;
6654 po.m_headingAllowed = true;
6655
6656 const auto iit = po.m_doc->labeledLinks().find(url);
6657
6658 if (iit != po.m_doc->labeledLinks().cend()) {
6659 if (!po.m_collectRefLinks) {
6660 const auto isLinkTextEmpty = toSingleLine(linkText).isEmpty();
6661
6662 const auto img = makeImage(iit->second->url(),
6663 (isLinkTextEmpty ? text : linkText),
6664 po,
6665 doNotCreateTextOnFail,
6666 startLine,
6667 startPos,
6668 lastIt->m_line,
6669 lastIt->m_pos + lastIt->m_len,
6670 (isLinkTextEmpty ? textPos : linkTextPos),
6671 textPos);
6672
6673 po.m_parent->appendItem(img);
6674
6675 po.m_line = lastIt->m_line;
6676 po.m_pos = lastIt->m_pos + lastIt->m_len;
6677 }
6678
6679 return true;
6680 } else if (!doNotCreateTextOnFail) {
6681 makeText(lastLineForText, lastPosForText, po);
6682 }
6683
6684 return false;
6685}
6686
6687//! Skip space in the block up to 1 new line.
6688template<class Trait>
6689inline void
6690skipSpacesUpTo1Line(long long int &line,
6691 long long int &pos,
6692 const typename MdBlock<Trait>::Data &fr)
6693{
6694 pos = skipSpaces<Trait>(pos, fr.at(line).first.asString());
6695
6696 if (pos == fr.at(line).first.length() && line + 1 < (long long int)fr.size()) {
6697 ++line;
6698 pos = skipSpaces<Trait>(0, fr.at(line).first.asString());
6699 }
6700}
6701
6702//! Read link's destination.
6703template<class Trait>
6704inline std::tuple<long long int, long long int, bool, typename Trait::String, long long int>
6705readLinkDestination(long long int line,
6706 long long int pos,
6707 const TextParsingOpts<Trait> &po,
6708 WithPosition *urlPos = nullptr)
6709{
6710 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6711
6712 const auto destLine = line;
6713 const auto &s = po.m_fr.m_data.at(line).first.asString();
6714 bool backslash = false;
6715
6716 if (pos < s.length() && line <= po.m_lastTextLine) {
6717 if (s[pos] == Trait::latin1ToChar('<')) {
6718 ++pos;
6719
6720 if (urlPos) {
6721 urlPos->setStartColumn(po.m_fr.m_data[line].first.virginPos(pos));
6722 urlPos->setStartLine(po.m_fr.m_data[line].second.m_lineNumber);
6723 }
6724
6725 const auto start = pos;
6726
6727 while (pos < s.size()) {
6728 bool now = false;
6729
6730 if (s[pos] == Trait::latin1ToChar('\\') && !backslash) {
6731 backslash = true;
6732 now = true;
6733 } else if (!backslash && s[pos] == Trait::latin1ToChar('<')) {
6734 return {line, pos, false, {}, destLine};
6735 } else if (!backslash && s[pos] == Trait::latin1ToChar('>')) {
6736 break;
6737 }
6738
6739 if (!now) {
6740 backslash = false;
6741 }
6742
6743 ++pos;
6744 }
6745
6746 if (pos < s.size() && s[pos] == Trait::latin1ToChar('>')) {
6747 if (urlPos) {
6748 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6749 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6750 }
6751
6752 ++pos;
6753
6754 return {line, pos, true, s.sliced(start, pos - start - 1), destLine};
6755 } else {
6756 return {line, pos, false, {}, destLine};
6757 }
6758 } else {
6759 long long int pc = 0;
6760
6761 const auto start = pos;
6762
6763 if (urlPos) {
6764 urlPos->setStartColumn(po.m_fr.m_data[line].first.virginPos(pos));
6765 urlPos->setStartLine(po.m_fr.m_data[line].second.m_lineNumber);
6766 }
6767
6768 while (pos < s.size()) {
6769 bool now = false;
6770
6771 if (s[pos] == Trait::latin1ToChar('\\') && !backslash) {
6772 backslash = true;
6773 now = true;
6774 } else if (!backslash && s[pos] == Trait::latin1ToChar(' ')) {
6775 if (!pc) {
6776 if (urlPos) {
6777 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6778 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6779 }
6780
6781 return {line, pos, true, s.sliced(start, pos - start), destLine};
6782 } else {
6783 return {line, pos, false, {}, destLine};
6784 }
6785 } else if (!backslash && s[pos] == Trait::latin1ToChar('(')) {
6786 ++pc;
6787 } else if (!backslash && s[pos] == Trait::latin1ToChar(')')) {
6788 if (!pc) {
6789 if (urlPos) {
6790 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6791 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6792 }
6793
6794 return {line, pos, true, s.sliced(start, pos - start), destLine};
6795 } else {
6796 --pc;
6797 }
6798 }
6799
6800 if (!now) {
6801 backslash = false;
6802 }
6803
6804 ++pos;
6805 }
6806
6807 if (urlPos) {
6808 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6809 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6810 }
6811
6812 return {line, pos, true, s.sliced(start, pos - start), destLine};
6813 }
6814 } else {
6815 return {line, pos, false, {}, destLine};
6816 }
6817}
6818
6819//! Read link's title.
6820template<class Trait>
6821inline std::tuple<long long int, long long int, bool, typename Trait::String, long long int>
6822readLinkTitle(long long int line,
6823 long long int pos,
6824 const TextParsingOpts<Trait> &po)
6825{
6826 const auto space = (pos < po.m_fr.m_data.at(line).first.length() ?
6827 po.m_fr.m_data.at(line).first[pos].isSpace() : true);
6828
6829 const auto firstLine = line;
6830
6831 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6832
6833 if (pos >= po.m_fr.m_data.at(line).first.length()) {
6834 return {line, pos, true, {}, firstLine};
6835 }
6836
6837 const auto sc = po.m_fr.m_data.at(line).first[pos];
6838
6839 if (sc != Trait::latin1ToChar('"') && sc != Trait::latin1ToChar('\'') &&
6840 sc != Trait::latin1ToChar('(') && sc != Trait::latin1ToChar(')')) {
6841 return {line, pos, (firstLine != line && line <= po.m_lastTextLine), {}, firstLine};
6842 } else if (!space && sc != Trait::latin1ToChar(')')) {
6843 return {line, pos, false, {}, firstLine};
6844 }
6845
6846 if (sc == Trait::latin1ToChar(')')) {
6847 return {line, pos, line <= po.m_lastTextLine, {}, firstLine};
6848 }
6849
6850 const auto startLine = line;
6851
6852 bool backslash = false;
6853
6854 ++pos;
6855
6856 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6857
6858 typename Trait::String title;
6859
6860 while (line < (long long int)po.m_fr.m_data.size() && pos < po.m_fr.m_data.at(line).first.length()) {
6861 bool now = false;
6862
6863 if (po.m_fr.m_data.at(line).first[pos] == Trait::latin1ToChar('\\') && !backslash) {
6864 backslash = true;
6865 now = true;
6866 } else if (sc == Trait::latin1ToChar('(') &&
6867 po.m_fr.m_data.at(line).first[pos] == Trait::latin1ToChar(')') && !backslash) {
6868 ++pos;
6869 return {line, pos, line <= po.m_lastTextLine, title, startLine};
6870 } else if (sc == Trait::latin1ToChar('(') &&
6871 po.m_fr.m_data.at(line).first[pos] == Trait::latin1ToChar('(') && !backslash) {
6872 return {line, pos, false, {}, startLine};
6873 } else if (sc != Trait::latin1ToChar('(') && po.m_fr.m_data.at(line).first[pos] == sc && !backslash) {
6874 ++pos;
6875 return {line, pos, line <= po.m_lastTextLine, title, startLine};
6876 } else {
6877 title.push_back(po.m_fr.m_data.at(line).first[pos]);
6878 }
6879
6880 if (!now) {
6881 backslash = false;
6882 }
6883
6884 ++pos;
6885
6886 if (pos == po.m_fr.m_data.at(line).first.length()) {
6887 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6888 }
6889 }
6890
6891 return {line, pos, false, {}, startLine};
6892}
6893
6894template<class Trait>
6895inline std::tuple<typename Trait::String, typename Trait::String, typename Parser<Trait>::Delims::iterator, bool>
6896Parser<Trait>::checkForInlineLink(typename Delims::iterator it,
6897 typename Delims::iterator last,
6898 TextParsingOpts<Trait> &po,
6899 WithPosition *urlPos)
6900{
6901 long long int p = it->m_pos + it->m_len;
6902 long long int l = it->m_line;
6903 bool ok = false;
6904 typename Trait::String dest, title;
6905 long long int destStartLine = 0;
6906
6907 std::tie(l, p, ok, dest, destStartLine) = readLinkDestination<Trait>(l, p, po, urlPos);
6908
6909 if (!ok) {
6910 return {{}, {}, it, false};
6911 }
6912
6913 long long int s = 0;
6914
6915 std::tie(l, p, ok, title, s) = readLinkTitle<Trait>(l, p, po);
6916
6917 skipSpacesUpTo1Line<Trait>(l, p, po.m_fr.m_data);
6918
6919 if (!ok || (l >= (long long int)po.m_fr.m_data.size() || p >= po.m_fr.m_data.at(l).first.length() ||
6920 po.m_fr.m_data.at(l).first[p] != Trait::latin1ToChar(')'))) {
6921 return {{}, {}, it, false};
6922 }
6923
6924 for (; it != last; ++it) {
6925 if (it->m_line == l && it->m_pos == p) {
6926 return {dest, title, it, true};
6927 }
6928 }
6929
6930 return {{}, {}, it, false};
6931}
6932
6933template<class Trait>
6934inline std::tuple<typename Trait::String, typename Trait::String, typename Parser<Trait>::Delims::iterator, bool>
6935Parser<Trait>::checkForRefLink(typename Delims::iterator it,
6936 typename Delims::iterator last,
6937 TextParsingOpts<Trait> &po,
6938 WithPosition *urlPos)
6939{
6940 long long int p = it->m_pos + it->m_len + 1;
6941 long long int l = it->m_line;
6942 bool ok = false;
6943 typename Trait::String dest, title;
6944 long long int destStartLine = 0;
6945
6946 std::tie(l, p, ok, dest, destStartLine) = readLinkDestination<Trait>(l, p, po, urlPos);
6947
6948 if (!ok) {
6949 return {{}, {}, it, false};
6950 }
6951
6952 long long int titleStartLine = 0;
6953
6954 std::tie(l, p, ok, title, titleStartLine) = readLinkTitle<Trait>(l, p, po);
6955
6956 if (!ok) {
6957 return {{}, {}, it, false};
6958 }
6959
6960 if (!title.isEmpty()) {
6961 p = skipSpaces<Trait>(p, po.m_fr.m_data.at(l).first.asString());
6962
6963 if (titleStartLine == destStartLine && p < po.m_fr.m_data.at(l).first.length()) {
6964 return {{}, {}, it, false};
6965 } else if (titleStartLine != destStartLine && p < po.m_fr.m_data.at(l).first.length()) {
6966 l = destStartLine;
6967 p = po.m_fr.m_data.at(l).first.length();
6968 title.clear();
6969 }
6970 }
6971
6972 for (; it != last; ++it) {
6973 if (it->m_line > l || (it->m_line == l && it->m_pos >= p)) {
6974 break;
6975 }
6976 }
6977
6978 po.m_line = l;
6979 po.m_pos = p;
6980
6981 return {dest, title, std::prev(it), true};
6982}
6983
6984template<class Trait>
6985inline typename Parser<Trait>::Delims::iterator
6986Parser<Trait>::checkForImage(typename Delims::iterator it,
6987 typename Delims::iterator last,
6988 TextParsingOpts<Trait> &po)
6989{
6990 const auto start = it;
6991
6992 typename MdBlock<Trait>::Data text;
6993
6994 po.m_wasRefLink = false;
6995 po.m_firstInParagraph = false;
6996 po.m_headingAllowed = true;
6997
6998 WithPosition textPos;
6999 std::tie(text, it) = checkForLinkText(it, last, po, &textPos);
7000
7001 if (it != start) {
7002 if (it->m_pos + it->m_len < po.m_fr.m_data.at(it->m_line).first.length()) {
7003 // Inline -> (
7004 if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('(')) {
7005 typename Trait::String url, title;
7006 typename Delims::iterator iit;
7007 bool ok;
7008
7009 WithPosition urlPos;
7010 std::tie(url, title, iit, ok) = checkForInlineLink(std::next(it), last, po, &urlPos);
7011
7012 if (ok) {
7013 if (!po.m_collectRefLinks) {
7014 po.m_parent->appendItem(
7015 makeImage(url, text, po, false, start->m_line, start->m_pos,
7016 iit->m_line, iit->m_pos + iit->m_len, textPos, urlPos));
7017 }
7018
7019 po.m_line = iit->m_line;
7020 po.m_pos = iit->m_pos + iit->m_len;
7021
7022 return iit;
7023 } else if (createShortcutImage(text, po, start->m_line, start->m_pos, start->m_line,
7024 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
7025 return it;
7026 }
7027 }
7028 // Reference -> [
7029 else if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('[')) {
7030 typename MdBlock<Trait>::Data label;
7031 typename Delims::iterator lit;
7032
7033 WithPosition labelPos;
7034 std::tie(label, lit) = checkForLinkLabel(std::next(it), last, po, &labelPos);
7035
7036 if (lit != std::next(it)) {
7037 const auto isLabelEmpty = toSingleLine(label).isEmpty();
7038
7039 if (!isLabelEmpty
7040 && createShortcutImage(label,
7041 po,
7042 start->m_line,
7043 start->m_pos,
7044 start->m_line,
7045 start->m_pos + start->m_len,
7046 lit,
7047 text,
7048 true,
7049 labelPos,
7050 textPos)) {
7051 return lit;
7052 } else if (isLabelEmpty
7053 && createShortcutImage(text,
7054 po,
7055 start->m_line,
7056 start->m_pos,
7057 start->m_line,
7058 start->m_pos + start->m_len,
7059 lit,
7060 {},
7061 false,
7062 textPos,
7063 {})) {
7064 return lit;
7065 }
7066 } else if (createShortcutImage(text, po, start->m_line, start->m_pos, start->m_line,
7067 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
7068 return it;
7069 }
7070 } else {
7071 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutImage);
7072 }
7073 } else {
7074 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutImage);
7075 }
7076 }
7077
7078 return start;
7079}
7080
7081template<class Trait>
7082inline typename Parser<Trait>::Delims::iterator
7083Parser<Trait>::checkForLink(typename Delims::iterator it,
7084 typename Delims::iterator last,
7085 TextParsingOpts<Trait> &po)
7086{
7087 const auto start = it;
7088
7089 typename MdBlock<Trait>::Data text;
7090
7091 const auto wasRefLink = po.m_wasRefLink;
7092 const auto firstInParagraph = po.m_firstInParagraph;
7093 po.m_wasRefLink = false;
7094 po.m_firstInParagraph = false;
7095 po.m_headingAllowed = true;
7096
7097 const auto ns = skipSpaces<Trait>(0, po.m_fr.m_data.at(po.m_line).first.asString());
7098
7099 WithPosition textPos;
7100 std::tie(text, it) = checkForLinkText(it, last, po, &textPos);
7101
7102 if (it != start) {
7103 // Footnote reference.
7104 if (text.front().first.asString().startsWith(Trait::latin1ToString("^")) &&
7105 text.front().first.asString().length() > 1 && text.size() == 1 &&
7106 start->m_line == it->m_line) {
7107 if (!po.m_collectRefLinks) {
7108 std::shared_ptr<FootnoteRef<Trait>> fnr(new FootnoteRef<Trait>(
7109 Trait::latin1ToString("#") + toSingleLine(text).toCaseFolded().toUpper() +
7110 Trait::latin1ToString("/") + (po.m_workingPath.isEmpty() ? typename Trait::String() :
7111 po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName));
7112 fnr->setStartColumn(po.m_fr.m_data.at(start->m_line).first.virginPos(start->m_pos));
7113 fnr->setStartLine(po.m_fr.m_data.at(start->m_line).second.m_lineNumber);
7114 fnr->setEndColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
7115 fnr->setEndLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
7116 fnr->setIdPos(textPos);
7117
7118 typename Trait::String fnrText = Trait::latin1ToString("[");
7119 bool firstFnrText = true;
7120
7121 for (const auto &t : text) {
7122 if (!firstFnrText) {
7123 fnrText.push_back(Trait::latin1ToString("\n"));
7124 }
7125
7126 firstFnrText = false;
7127
7128 fnrText.push_back(t.first.asString());
7129 }
7130
7131 fnrText.push_back(Trait::latin1ToString("]"));
7132 fnr->setText(fnrText);
7133 po.m_parent->appendItem(fnr);
7134
7135 initLastItemWithOpts<Trait>(po, fnr);
7136 }
7137
7138 po.m_line = it->m_line;
7139 po.m_pos = it->m_pos + it->m_len;
7140
7141 return it;
7142 } else if (it->m_pos + it->m_len < po.m_fr.m_data.at(it->m_line).first.length()) {
7143 // Reference definition -> :
7144 if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar(':')) {
7145 // Reference definitions allowed only at start of paragraph.
7146 if ((po.m_line == 0 || wasRefLink || firstInParagraph) && ns < 4 && start->m_pos == ns) {
7147 typename Trait::String url, title;
7148 typename Delims::iterator iit;
7149 bool ok;
7150
7151 WithPosition labelPos;
7152
7153 std::tie(text, it) = checkForLinkLabel(start, last, po, &labelPos);
7154
7155 if (it != start && !toSingleLine(text).simplified().isEmpty()) {
7156 WithPosition urlPos;
7157 std::tie(url, title, iit, ok) = checkForRefLink(it, last, po, &urlPos);
7158
7159 if (ok) {
7160 const auto label = Trait::latin1ToString("#") +
7161 toSingleLine(text).toCaseFolded().toUpper() +
7162 Trait::latin1ToString("/") +
7163 (po.m_workingPath.isEmpty() ? typename Trait::String() :
7164 po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName;
7165
7166 std::shared_ptr<Link<Trait>> link(new Link<Trait>);
7167 link->setStartColumn(po.m_fr.m_data.at(start->m_line).first.virginPos(
7168 start->m_pos));
7169 link->setStartLine(po.m_fr.m_data.at(start->m_line).second.m_lineNumber);
7170
7171 const auto endPos = prevPosition(po.m_fr,
7172 po.m_fr.m_data.at(po.m_line).first.virginPos(po.m_pos),
7173 po.m_fr.m_data.at(po.m_line).second.m_lineNumber);
7174
7175 link->setEndColumn(endPos.first);
7176 link->setEndLine(endPos.second);
7177
7178 link->setTextPos(labelPos);
7179 link->setUrlPos(urlPos);
7180
7181 url = removeBackslashes<typename Trait::String, Trait>(
7182 replaceEntity<Trait>(url));
7183
7184 if (!url.isEmpty()) {
7185 if (Trait::fileExists(url)) {
7186 url = Trait::absoluteFilePath(url);
7187 } else if (Trait::fileExists(url, po.m_workingPath)) {
7188 url = Trait::absoluteFilePath(
7189 (po.m_workingPath.isEmpty() ? typename Trait::String() :
7190 po.m_workingPath + Trait::latin1ToString("/")) + url);
7191 }
7192 }
7193
7194 link->setUrl(url);
7195
7196 po.m_wasRefLink = true;
7197 po.m_headingAllowed = false;
7198
7199 if (po.m_doc->labeledLinks().find(label) == po.m_doc->labeledLinks().cend()) {
7200 po.m_doc->insertLabeledLink(label, link);
7201 }
7202
7203 return iit;
7204 } else {
7205 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7206 }
7207 } else {
7208 return start;
7209 }
7210 } else {
7211 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7212 }
7213 }
7214 // Inline -> (
7215 else if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('(')) {
7216 typename Trait::String url, title;
7217 typename Delims::iterator iit;
7218 bool ok;
7219
7220 WithPosition urlPos;
7221 std::tie(url, title, iit, ok) = checkForInlineLink(std::next(it), last, po, &urlPos);
7222
7223 if (ok) {
7224 const auto link = makeLink(url,
7225 text,
7226 po,
7227 false,
7228 start->m_line,
7229 start->m_pos,
7230 iit->m_line,
7231 iit->m_pos + iit->m_len,
7232 textPos,
7233 urlPos);
7234
7235 if (link.get()) {
7236 if (!po.m_collectRefLinks) {
7237 po.m_parent->appendItem(link);
7238 }
7239
7240 po.m_line = iit->m_line;
7241 po.m_pos = iit->m_pos + iit->m_len;
7242
7243 return iit;
7244 } else {
7245 return start;
7246 }
7247 } else if (createShortcutLink(text, po, start->m_line, start->m_pos, start->m_line,
7248 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
7249 return it;
7250 }
7251 }
7252 // Reference -> [
7253 else if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('[')) {
7254 typename MdBlock<Trait>::Data label;
7255 typename Delims::iterator lit;
7256
7257 WithPosition labelPos;
7258 std::tie(label, lit) = checkForLinkLabel(std::next(it), last, po, &labelPos);
7259
7260 const auto isLabelEmpty = toSingleLine(label).isEmpty();
7261
7262 if (lit != std::next(it)) {
7263 if (!isLabelEmpty
7264 && createShortcutLink(label,
7265 po,
7266 start->m_line,
7267 start->m_pos,
7268 start->m_line,
7269 start->m_pos + start->m_len,
7270 lit,
7271 text,
7272 true,
7273 labelPos,
7274 textPos)) {
7275 return lit;
7276 } else if (isLabelEmpty
7277 && createShortcutLink(text,
7278 po,
7279 start->m_line,
7280 start->m_pos,
7281 start->m_line,
7282 start->m_pos + start->m_len,
7283 lit,
7284 {},
7285 false,
7286 textPos,
7287 {})) {
7288 return lit;
7289 }
7290 } else if (createShortcutLink(text, po, start->m_line, start->m_pos, start->m_line,
7291 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
7292 return it;
7293 }
7294 } else {
7295 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7296 }
7297 } else {
7298 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7299 }
7300 }
7301
7302 return start;
7303}
7304
7305//! Close style.
7306template<class Trait>
7307inline void
7308closeStyle(std::vector<typename TextParsingOpts<Trait>::StyleInfo> &styles,
7309 Style s)
7310{
7311 const auto it = std::find_if(styles.crbegin(), styles.crend(), [&](const auto &p) {
7312 return (p.m_style == s);
7313 });
7314
7315 if (it != styles.crend()) {
7316 styles.erase(it.base() - 1);
7317 }
7318}
7319
7320//! Apply styles.
7321template<class Trait>
7322inline void
7323applyStyles(int &opts,
7324 std::vector<typename TextParsingOpts<Trait>::StyleInfo> &styles)
7325{
7326 opts = 0;
7327
7328 for (const auto &s : styles) {
7329 switch (s.m_style) {
7331 opts |= StrikethroughText;
7332 break;
7333
7334 case Style::Italic1:
7335 case Style::Italic2:
7336 opts |= ItalicText;
7337 break;
7338
7339 case Style::Bold1:
7340 case Style::Bold2:
7341 opts |= BoldText;
7342 break;
7343
7344 default:
7345 break;
7346 }
7347 }
7348}
7349
7350template<class Trait>
7351inline int
7352Parser<Trait>::emphasisToInt(typename Delimiter::DelimiterType t)
7353{
7354 switch (t) {
7355 case Delimiter::Strikethrough:
7356 return 0;
7357
7358 case Delimiter::Emphasis1:
7359 return 1;
7360
7361 case Delimiter::Emphasis2:
7362 return 2;
7363
7364 default:
7365 return -1;
7366 }
7367}
7368
7369template<class Trait>
7370inline void
7371Parser<Trait>::createStyles(std::vector<std::pair<Style, long long int>> & styles,
7372 typename Delimiter::DelimiterType t,
7373 long long int style)
7374{
7375 if (t != Delimiter::Strikethrough) {
7376 if (style % 2 == 1) {
7377 styles.push_back({t == Delimiter::Emphasis1 ? Style::Italic1 : Style::Italic2, 1});
7378 }
7379
7380 if (style >= 2) {
7381 for (long long int i = 0; i < style / 2; ++i) {
7382 styles.push_back({t == Delimiter::Emphasis1 ? Style::Bold1 : Style::Bold2, 2});
7383 }
7384 }
7385 } else {
7386 styles.push_back({Style::Strikethrough, style});
7387 }
7388}
7389
7390template<class Trait>
7391inline std::vector<std::pair<Style, long long int>>
7392Parser<Trait>::createStyles(typename Delimiter::DelimiterType t,
7393 const std::vector<long long int> &styles,
7394 long long int lastStyle)
7395{
7396 std::vector<std::pair<Style, long long int>> ret;
7397
7398 createStyles(ret, t, lastStyle);
7399
7400 for (auto it = styles.crbegin(), last = styles.crend(); it != last; ++it) {
7401 createStyles(ret, t, *it);
7402 }
7403
7404 return ret;
7405}
7406
7407template<class Trait>
7408inline bool
7409Parser<Trait>::isSequence(typename Delims::iterator it,
7410 long long int itLine,
7411 long long int itPos,
7412 typename Delimiter::DelimiterType t)
7413{
7414 return (itLine == it->m_line && itPos + it->m_len == it->m_pos && it->m_type == t);
7415}
7416
7417template<class Trait>
7418inline typename Parser<Trait>::Delims::iterator
7419Parser<Trait>::readSequence(typename Delims::iterator it,
7420 typename Delims::iterator last,
7421 long long int &line,
7422 long long int &pos,
7423 long long int &len,
7424 long long int &itCount)
7425{
7426 line = it->m_line;
7427 pos = it->m_pos;
7428 len = it->m_len;
7429 const auto t = it->m_type;
7430 itCount = 1;
7431
7432 it = std::next(it);
7433
7434 while (it != last && isSequence(it, line, pos, t)) {
7435 pos += it->m_len;
7436 len += it->m_len;
7437
7438 ++it;
7439 ++itCount;
7440 }
7441
7442 return std::prev(it);
7443}
7444
7445inline bool
7446isMult3(long long int i1, long long int i2)
7447{
7448 return ((((i1 + i2) % 3) == 0) && !((i1 % 3 == 0) && (i2 % 3 == 0)));
7449}
7450
7451template<class Trait>
7452inline std::tuple<bool, std::vector<std::pair<Style, long long int>>, long long int, long long int>
7453Parser<Trait>::isStyleClosed(typename Delims::iterator first,
7454 typename Delims::iterator it,
7455 typename Delims::iterator last,
7456 typename Delims::iterator &stackBottom,
7457 TextParsingOpts<Trait> &po)
7458{
7459 const auto open = it;
7460 long long int openPos, openLength, itCount, lengthFromIt, tmp;
7461
7462 it = std::next(readSequence(first, open, last, openPos, openLength, tmp, lengthFromIt, itCount).second);
7463
7464 const auto length = lengthFromIt;
7465 long long int itLine, itPos, itLength;
7466
7467 struct RollbackValues {
7468 RollbackValues(TextParsingOpts<Trait> &po,
7469 long long int line,
7470 long long int pos,
7471 bool collectRefLinks,
7472 typename Delims::iterator &stackBottom,
7473 typename Delims::iterator last)
7474 : m_po(po)
7475 , m_line(line)
7476 , m_pos(pos)
7477 , m_collectRefLinks(collectRefLinks)
7478 , m_stackBottom(stackBottom)
7479 , m_last(last)
7480 , m_it(m_last)
7481 {
7482 }
7483
7484 void setIterator(typename Delims::iterator it)
7485 {
7486 m_it = it;
7487 }
7488
7489 ~RollbackValues()
7490 {
7491 m_po.m_line = m_line;
7492 m_po.m_pos = m_pos;
7493 m_po.m_collectRefLinks = m_collectRefLinks;
7494
7495 if (m_it != m_last && (m_it > m_stackBottom || m_stackBottom == m_last)) {
7496 m_stackBottom = m_it;
7497 }
7498 }
7499
7500 TextParsingOpts<Trait> &m_po;
7501 long long int m_line;
7502 long long int m_pos;
7503 bool m_collectRefLinks;
7504 typename Delims::iterator &m_stackBottom;
7505 typename Delims::iterator m_last;
7506 typename Delims::iterator m_it;
7507 };
7508
7509 RollbackValues rollback(po, po.m_line, po.m_pos, po.m_collectRefLinks, stackBottom, last);
7510
7511 po.m_collectRefLinks = true;
7512
7513 std::vector<long long int> styles;
7514
7515 struct Opener {
7516 std::vector<typename Delims::iterator> m_its;
7517 long long int m_length;
7518 };
7519
7520 std::vector<Opener> openers;
7521
7522 std::function<void(long long int, long long int)> dropOpeners;
7523
7524 dropOpeners = [&openers](long long int pos, long long int line) {
7525 while (!openers.empty()) {
7526 if (openers.back().m_its.front()->m_line > line || (openers.back().m_its.front()->m_line == line &&
7527 openers.back().m_its.front()->m_pos > pos)) {
7528 std::for_each( openers.back().m_its.begin(), openers.back().m_its.end(),
7529 [](auto &i) { i->m_skip = true; });
7530 openers.pop_back();
7531 } else {
7532 break;
7533 }
7534 }
7535 };
7536
7537 auto tryCloseEmphasis = [&dropOpeners, this, &openers, &open](typename Delims::iterator first,
7538 typename Delims::iterator it,
7539 typename Delims::iterator last) -> bool
7540 {
7541 const auto type = it->m_type;
7542 const auto both = it->m_leftFlanking && it->m_rightFlanking;
7543 long long int tmp1, tmp2, tmp3, tmp4;
7544 long long int closeLength;
7545
7546 it = this->readSequence(first, it, last, tmp1, closeLength, tmp2, tmp3, tmp4).first;
7547 it = std::prev(it);
7548
7549 long long int tmpLength = closeLength;
7550
7551 for (;; --it) {
7552 switch (it->m_type) {
7553 case Delimiter::Strikethrough: {
7554 if (it->m_leftFlanking && it->m_len == closeLength && type == it->m_type) {
7555 dropOpeners(it->m_pos, it->m_line);
7556 return true;
7557 }
7558 } break;
7559
7560 case Delimiter::Emphasis1:
7561 case Delimiter::Emphasis2:
7562 {
7563 if (it->m_leftFlanking && type == it->m_type) {
7564 long long int pos, len;
7565 this->readSequence(first, it, last, pos, len, tmp1, tmp2, tmp3);
7566
7567 if ((both || (it->m_leftFlanking && it->m_rightFlanking)) && isMult3(len, closeLength)) {
7568 continue;
7569 }
7570
7571 dropOpeners(pos - len, it->m_line);
7572
7573 if (tmpLength >= len) {
7574 tmpLength -= len;
7575
7576 if (open->m_type == it->m_type) {
7577 openers.pop_back();
7578 }
7579
7580 if (!tmpLength) {
7581 return true;
7582 }
7583 } else {
7584 if (open->m_type == it->m_type) {
7585 openers.back().m_length -= tmpLength;
7586 }
7587
7588 return true;
7589 }
7590 }
7591 } break;
7592
7593 default:
7594 break;
7595 }
7596
7597 if (it == first) {
7598 break;
7599 }
7600 }
7601
7602 return false;
7603 };
7604
7605 auto fillIterators = [](typename Delims::iterator first,
7606 typename Delims::iterator last) -> std::vector<typename Delims::iterator>
7607 {
7608 std::vector<typename Delims::iterator> res;
7609
7610 for (; first != last; ++first) {
7611 res.push_back(first);
7612 }
7613
7614 res.push_back(last);
7615
7616 return res;
7617 };
7618
7619 for (; it != last; ++it) {
7620 if (it > stackBottom) {
7621 return {false, {{Style::Unknown, 0}}, open->m_len, 1};
7622 }
7623
7624 if (it->m_line <= po.m_lastTextLine) {
7625 po.m_line = it->m_line;
7626 po.m_pos = it->m_pos;
7627
7628 switch (it->m_type) {
7629 case Delimiter::SquareBracketsOpen:
7630 it = checkForLink(it, last, po);
7631 break;
7632
7633 case Delimiter::ImageOpen:
7634 it = checkForImage(it, last, po);
7635 break;
7636
7637 case Delimiter::Less:
7638 it = checkForAutolinkHtml(it, last, po, false);
7639 break;
7640
7641 case Delimiter::Strikethrough: {
7642 if (open->m_type == it->m_type && open->m_len == it->m_len && it->m_rightFlanking) {
7643 rollback.setIterator(it);
7644 return {true, createStyles(open->m_type, styles, open->m_len), open->m_len, 1};
7645 } else if (it->m_rightFlanking && tryCloseEmphasis(open, it, last)) {
7646 } else if (it->m_leftFlanking && open->m_type == it->m_type) {
7647 openers.push_back({{it}, it->m_len});
7648 }
7649 } break;
7650
7651 case Delimiter::Emphasis1:
7652 case Delimiter::Emphasis2: {
7653 if (open->m_type == it->m_type) {
7654 const auto itBoth = (it->m_leftFlanking && it->m_rightFlanking);
7655
7656 if (it->m_rightFlanking) {
7657 bool notCheck = (open->m_leftFlanking && open->m_rightFlanking) || itBoth;
7658
7659 long long int count;
7660 auto firstIt = it;
7661 it = readSequence(it, last, itLine, itPos, itLength, count);
7662
7663 if (notCheck) {
7664 notCheck = isMult3(openLength, itLength);
7665 }
7666
7667 if (!openers.empty()) {
7668 long long int i = openers.size() - 1;
7669 auto &top = openers[i];
7670
7671 while (!openers.empty()) {
7672 if (i >= 0) {
7673 top = openers[i];
7674 } else {
7675 break;
7676 }
7677
7678 if ((itBoth || (top.m_its.front()->m_rightFlanking && top.m_its.front()->m_leftFlanking))
7679 && isMult3(itLength, top.m_length)) {
7680 --i;
7681 continue;
7682 }
7683
7684 if (top.m_length <= itLength) {
7685 itLength -= top.m_length;
7686 openers.erase(openers.begin() + i);
7687 } else {
7688 top.m_length -= itLength;
7689 itLength = 0;
7690 }
7691
7692 --i;
7693
7694 if (!itLength) {
7695 break;
7696 }
7697 }
7698 }
7699
7700 if (itLength) {
7701 if (!notCheck) {
7702 if (itLength >= lengthFromIt) {
7703 rollback.setIterator(it);
7704 return {true, createStyles(open->m_type, styles, lengthFromIt), length, itCount};
7705 } else {
7706 styles.push_back(itLength);
7707 lengthFromIt -= itLength;
7708 }
7709 } else if (firstIt->m_leftFlanking) {
7710 openers.push_back({fillIterators(firstIt, it), itLength});
7711 }
7712 }
7713 } else {
7714 long long int count;
7715 auto firstIt = it;
7716 it = readSequence(it, last, itLine, itPos, itLength, count);
7717 openers.push_back({fillIterators(firstIt, it), itLength});
7718 }
7719 } else if (it->m_rightFlanking) {
7720 tryCloseEmphasis(open, it, last);
7721 }
7722 } break;
7723
7724 case Delimiter::InlineCode:
7725 it = checkForInlineCode(it, last, po);
7726 break;
7727
7728 default:
7729 break;
7730 }
7731 } else {
7732 break;
7733 }
7734 }
7735
7736 return {false, {{Style::Unknown, 0}}, open->m_len, 1};
7737}
7738
7739template<class Trait>
7740inline typename Parser<Trait>::Delims::iterator
7741Parser<Trait>::incrementIterator(typename Delims::iterator it,
7742 typename Delims::iterator last,
7743 long long int count)
7744{
7745 const auto len = std::distance(it, last);
7746
7747 if (count < len) {
7748 return it + count;
7749 } else {
7750 return it + (len - 1);
7751 }
7752}
7753
7754//! Append close style.
7755template<class Trait>
7756inline void
7758 const StyleDelim &s)
7759{
7760 if (po.m_lastItemWithStyle) {
7761 po.m_lastItemWithStyle->closeStyles().push_back(s);
7762 }
7763}
7764
7765template<class Trait>
7766inline std::pair<typename Parser<Trait>::Delims::iterator, typename Parser<Trait>::Delims::iterator>
7767Parser<Trait>::readSequence(typename Delims::iterator first,
7768 typename Delims::iterator it,
7769 typename Delims::iterator last,
7770 long long int &pos,
7771 long long int &length,
7772 long long int &itCount,
7773 long long int &lengthFromIt,
7774 long long int &itCountFromIt)
7775{
7776 long long int line = it->m_line;
7777 pos = it->m_pos + it->m_len;
7778 long long int ppos = it->m_pos;
7779 const auto t = it->m_type;
7780 lengthFromIt = it->m_len;
7781 itCountFromIt = 1;
7782
7783 auto retItLast = std::next(it);
7784
7785 for (; retItLast != last; ++retItLast) {
7786 if (retItLast->m_line == line && pos == retItLast->m_pos && retItLast->m_type == t) {
7787 lengthFromIt += retItLast->m_len;
7788 pos = retItLast->m_pos + retItLast->m_len;
7789 ++itCountFromIt;
7790 } else {
7791 break;
7792 }
7793 }
7794
7795 length = lengthFromIt;
7796 itCount = itCountFromIt;
7797
7798 auto retItFirst = it;
7799 bool useNext = false;
7800
7801 if (retItFirst != first) {
7802 retItFirst = std::prev(retItFirst);
7803 useNext = true;
7804
7805 for (;; --retItFirst) {
7806 if (retItFirst->m_line == line && ppos - retItFirst->m_len == retItFirst->m_pos && retItFirst->m_type == t) {
7807 length += retItFirst->m_len;
7808 ppos = retItFirst->m_pos;
7809 ++itCount;
7810 useNext = false;
7811 } else {
7812 useNext = true;
7813 break;
7814 }
7815
7816 if (retItFirst == first) {
7817 break;
7818 }
7819 }
7820 }
7821
7822 return {useNext ? std::next(retItFirst) : retItFirst, std::prev(retItLast)};
7823}
7824
7825template<class Trait>
7826inline typename Parser<Trait>::Delims::iterator
7827Parser<Trait>::checkForStyle(typename Delims::iterator first,
7828 typename Delims::iterator it,
7829 typename Delims::iterator last,
7830 typename Delims::iterator &stackBottom,
7831 TextParsingOpts<Trait> &po)
7832{
7833 long long int count = 1;
7834
7835 po.m_wasRefLink = false;
7836 po.m_firstInParagraph = false;
7837
7838 if (it->m_rightFlanking) {
7839 long long int pos, len, tmp1, tmp2;
7840 readSequence(first, it, last, pos, len, count, tmp1, tmp2);
7841 const auto t = it->m_type;
7842
7843 long long int opened = 0;
7844 bool bothFlanking = false;
7845
7846 for (auto it = po.m_styles.crbegin(), last = po.m_styles.crend(); it != last; ++it) {
7847 bool doBreak = false;
7848
7849 switch (t) {
7850 case Delimiter::Emphasis1: {
7851 if (it->m_style == Style::Italic1 || it->m_style == Style::Bold1) {
7852 opened = it->m_length;
7853 bothFlanking = it->m_bothFlanking;
7854 doBreak = true;
7855 }
7856 } break;
7857
7858 case Delimiter::Emphasis2: {
7859 if (it->m_style == Style::Italic2 || it->m_style == Style::Bold2) {
7860 opened = it->m_length;
7861 bothFlanking = it->m_bothFlanking;
7862 doBreak = true;
7863 }
7864 } break;
7865
7866 case Delimiter::Strikethrough: {
7867 if (it->m_style == Style::Strikethrough) {
7868 opened = it->m_length;
7869 bothFlanking = it->m_bothFlanking;
7870 doBreak = true;
7871 }
7872 } break;
7873
7874 default:
7875 break;
7876 }
7877
7878 if (doBreak)
7879 break;
7880 }
7881
7882 const bool sumMult3 = (it->m_leftFlanking || bothFlanking ? isMult3(opened, len) : false);
7883
7884 if (count && opened && !sumMult3) {
7885 if (count > opened) {
7886 count = opened;
7887 }
7888
7889 auto pos = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos);
7890 const auto line = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
7891
7892 if (it->m_type == Delimiter::Strikethrough) {
7893 const auto len = it->m_len;
7894
7895 for (auto i = 0; i < count; ++i) {
7896 closeStyle<Trait>(po.m_styles, Style::Strikethrough);
7897 appendCloseStyle(po, {StrikethroughText, pos, line, pos + len - 1, line});
7898 pos += len;
7899 }
7900 } else {
7901 if (count % 2 == 1) {
7902 const auto st = (it->m_type == Delimiter::Emphasis1 ? Style::Italic1 : Style::Italic2);
7903
7904 closeStyle<Trait>(po.m_styles, st);
7905 appendCloseStyle(po, {ItalicText, pos, line, pos, line});
7906 ++pos;
7907 }
7908
7909 if (count >= 2) {
7910 const auto st = (it->m_type == Delimiter::Emphasis1 ? Style::Bold1 : Style::Bold2);
7911
7912 for (auto i = 0; i < count / 2; ++i) {
7913 closeStyle<Trait>(po.m_styles, st);
7914 appendCloseStyle(po, {BoldText, pos, line, pos + 1, line});
7915 pos += 2;
7916 }
7917 }
7918 }
7919
7920 applyStyles<Trait>(po.m_opts, po.m_styles);
7921
7922 const auto j = incrementIterator(it, last, count - 1);
7923
7924 po.m_pos = j->m_pos + j->m_len;
7925 po.m_line = j->m_line;
7926
7927 return j;
7928 }
7929 }
7930
7931 count = 1;
7932
7933 if (it->m_leftFlanking) {
7934 switch (it->m_type) {
7935 case Delimiter::Strikethrough:
7936 case Delimiter::Emphasis1:
7937 case Delimiter::Emphasis2: {
7938 bool closed = false;
7939 std::vector<std::pair<Style, long long int>> styles;
7940 long long int len = 0;
7941
7942 if (it > stackBottom) {
7943 stackBottom = last;
7944 }
7945
7946 if (it->m_skip) {
7947 closed = false;
7948 long long int tmp1, tmp2, tmp3;
7949 readSequence(it, last, tmp1, tmp2, len, tmp3);
7950 } else {
7951 std::tie(closed, styles, len, count) = isStyleClosed(first, it, last, stackBottom, po);
7952 }
7953
7954 if (closed) {
7955 auto pos = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos);
7956 const auto line = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
7957
7958 for (const auto &p : styles) {
7959 po.m_styles.push_back({p.first, p.second, it->m_leftFlanking && it->m_rightFlanking});
7960
7961 if (!po.m_collectRefLinks) {
7962 po.m_openStyles.push_back({styleToTextOption(p.first), pos, line,
7963 pos + p.second - 1, line});
7964 }
7965
7966 pos += p.second;
7967 }
7968
7969 po.m_pos = it->m_pos + len;
7970 po.m_line = it->m_line;
7971
7972 applyStyles<Trait>(po.m_opts, po.m_styles);
7973 } else {
7974 makeText(it->m_line, it->m_pos + len, po);
7975 }
7976 } break;
7977
7978 default: {
7979 makeText(it->m_line, it->m_pos + it->m_len, po);
7980 } break;
7981 }
7982 }
7983
7984 if (!count) {
7985 count = 1;
7986 }
7987
7988 resetHtmlTag(po.m_html, &po);
7989
7990 return incrementIterator(it, last, count - 1);
7991}
7992
7993//! Concatenate texts in block.
7994template<class Trait>
7995inline std::shared_ptr<Text<Trait>>
7998{
7999 std::shared_ptr<Text<Trait>> t(new Text<Trait>);
8000 t->setOpts(std::static_pointer_cast<Text<Trait>>(*it)->opts());
8001 t->setStartColumn((*it)->startColumn());
8002 t->setStartLine((*it)->startLine());
8003
8004 typename ItemWithOpts<Trait>::Styles close;
8005
8006 typename Trait::String data;
8007
8008 for (; it != last; ++it) {
8009 const auto tt = std::static_pointer_cast<Text<Trait>>(*it);
8010
8011 data.push_back(tt->text());
8012
8013 if (!tt->openStyles().empty()) {
8014 std::copy(tt->openStyles().cbegin(), tt->openStyles().cend(),
8015 std::back_inserter(t->openStyles()));
8016 }
8017
8018 if (!tt->closeStyles().empty()) {
8019 std::copy(tt->closeStyles().cbegin(), tt->closeStyles().cend(),
8020 std::back_inserter(close));
8021 }
8022 }
8023
8024 it = std::prev(it);
8025
8026 t->setText(data);
8027 t->setEndColumn((*it)->endColumn());
8028 t->setEndLine((*it)->endLine());
8029 t->closeStyles() = close;
8030
8031 return t;
8032}
8033
8034//! \return Is optimization type a semi one.
8035inline bool
8037{
8038 switch (t) {
8041 return true;
8042
8043 default:
8044 return false;
8045 }
8046}
8047
8048//! \return Is optimization type without raw data optimization?
8049inline bool
8051{
8052 switch (t) {
8055 return true;
8056
8057 default:
8058 return false;
8059 }
8060}
8061
8062//! Optimize Paragraph.
8063template<class Trait>
8064inline std::shared_ptr<Paragraph<Trait>>
8068{
8069 std::shared_ptr<Paragraph<Trait>> np(new Paragraph<Trait>);
8070 np->setStartColumn(p->startColumn());
8071 np->setStartLine(p->startLine());
8072 np->setEndColumn(p->endColumn());
8073 np->setEndLine(p->endLine());
8074
8075 int opts = TextWithoutFormat;
8076 auto start = p->items().cend();
8077 long long int line = -1;
8078 long long int auxStart = 0, auxIt = 0;
8079 bool finished = false;
8080
8081 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
8082 if ((*it)->type() == ItemType::Text) {
8083 const auto t = std::static_pointer_cast<Text<Trait>>(*it);
8084
8085 if (start == last) {
8086 start = it;
8087 opts = t->opts();
8088 line = t->endLine();
8089 finished = (isSemiOptimization(type) && !t->closeStyles().empty());
8090 } else {
8091 if (opts != t->opts() || t->startLine() != line || finished ||
8092 (!t->openStyles().empty() && isSemiOptimization(type))) {
8093 if (!isWithoutRawDataOptimization(type)) {
8094 po.concatenateAuxText(auxStart, auxIt);
8095 auxIt = auxIt - (auxIt - auxStart) + 1;
8096 auxStart = auxIt;
8097 }
8098
8099 np->appendItem(concatenateText<Trait>(start, it));
8100 start = it;
8101 opts = t->opts();
8102 line = t->endLine();
8103 }
8104
8105 finished = (isSemiOptimization(type) && !t->closeStyles().empty());
8106 }
8107
8109 ++auxIt;
8110 } else {
8111 finished = false;
8112
8113 if (start != last) {
8114 if (!isWithoutRawDataOptimization(type)) {
8115 po.concatenateAuxText(auxStart, auxIt);
8116 auxIt = auxIt - (auxIt - auxStart) + 1;
8117 auxStart = auxIt;
8118 }
8119
8120 np->appendItem(concatenateText<Trait>(start, it));
8121 start = last;
8122 opts = TextWithoutFormat;
8123 line = (*it)->endLine();
8124 }
8125
8126 np->appendItem((*it));
8127 }
8128 }
8129
8130 if (start != p->items().cend()) {
8131 np->appendItem(concatenateText<Trait>(start, p->items().cend()));
8132
8133 if (!isWithoutRawDataOptimization(type)) {
8134 po.concatenateAuxText(auxStart, po.m_rawTextData.size());
8135 }
8136 }
8137
8138 p = np;
8139
8140 return p;
8141}
8142
8143//! Normalize position.
8144inline void
8145normalizePos(long long int &pos,
8146 long long int &line,
8147 long long int length,
8148 long long int linesCount)
8149{
8150 if (pos != 0 && line < linesCount && pos == length) {
8151 pos = 0;
8152 ++line;
8153 }
8154}
8155
8156//! Make Paragraph.
8157template<class Trait>
8158inline std::shared_ptr<Paragraph<Trait>>
8161{
8162 auto p = std::make_shared<Paragraph<Trait>>();
8163
8164 p->setStartColumn((*first)->startColumn());
8165 p->setStartLine((*first)->startLine());
8166
8167 for (; first != last; ++first) {
8168 p->appendItem(*first);
8169 p->setEndColumn((*first)->endColumn());
8170 p->setEndLine((*first)->endLine());
8171 }
8172
8173 return p;
8174}
8175
8176//! Split Paragraph and free HTML.
8177template<class Trait>
8178inline std::shared_ptr<Paragraph<Trait>>
8180 std::shared_ptr<Paragraph<Trait>> p,
8182 bool collectRefLinks,
8183 bool fullyOptimizeParagraphs = true)
8184{
8185 auto first = p->items().cbegin();
8186 auto it = first;
8187 auto last = p->items().cend();
8188
8189 for (; it != last; ++it) {
8190 if (first == last) {
8191 first = it;
8192 }
8193
8194 if ((*it)->type() == ItemType::RawHtml &&
8195 UnprotectedDocsMethods<Trait>::isFreeTag(std::static_pointer_cast<RawHtml<Trait>>(*it))) {
8196 auto p = makeParagraph<Trait>(first, it);
8197
8198 if (!collectRefLinks) {
8199 if (!p->isEmpty()) {
8200 parent->appendItem(optimizeParagraph<Trait>(p, po,
8201 fullyOptimizeParagraphs ?
8204 }
8205
8206 parent->appendItem(*it);
8207 }
8208
8209 first = last;
8210 }
8211 }
8212
8213 if (first != last) {
8214 if (first != p->items().cbegin()) {
8215 const auto c = std::count_if(first, last, [](const auto &i) {
8216 return (i->type() == MD::ItemType::Text);
8217 });
8218 po.m_rawTextData.erase(po.m_rawTextData.cbegin(), po.m_rawTextData.cbegin() +
8219 (po.m_rawTextData.size() - c));
8220
8221 return makeParagraph<Trait>(first, last);
8222 } else {
8223 return p;
8224 }
8225 } else {
8226 po.m_rawTextData.clear();
8227
8228 return std::make_shared<Paragraph<Trait>>();
8229 }
8230}
8231
8232//! \return Last virgin position of the item.
8233template<class Trait>
8234inline long long int
8236{
8237 switch (item->type()) {
8238 case ItemType::Text:
8239 case ItemType::Link:
8240 case ItemType::Image:
8242 case ItemType::RawHtml:
8243 {
8244 auto i = static_cast<ItemWithOpts<Trait> *>(item);
8245
8246 if (!i->closeStyles().empty()) {
8247 return i->closeStyles().back().endColumn();
8248 } else {
8249 return i->endColumn();
8250 }
8251 }
8252 break;
8253
8254 case ItemType::Code:
8255 case ItemType::Math:
8256 {
8257 auto c = static_cast<Code<Trait> *>(item);
8258
8259 if (!c->closeStyles().empty()) {
8260 return c->closeStyles().back().endColumn();
8261 } else {
8262 return c->endDelim().endColumn();
8263 }
8264 }
8265 break;
8266
8267 default:
8268 return -1;
8269 }
8270}
8271
8272//! Make heading.
8273template<class Trait>
8274inline void
8275makeHeading(std::shared_ptr<Block<Trait>> parent,
8276 std::shared_ptr<Document<Trait>> doc,
8277 std::shared_ptr<Paragraph<Trait>> p,
8278 long long int lastColumn,
8279 long long int lastLine,
8280 int level,
8281 const typename Trait::String &workingPath,
8282 const typename Trait::String &fileName,
8283 bool collectRefLinks,
8284 const WithPosition &delim,
8286{
8287 if (!collectRefLinks) {
8288 if (p->items().back()->type() == ItemType::LineBreak) {
8289 auto lb = std::static_pointer_cast<LineBreak<Trait>>(p->items().back());
8290 const auto lineBreakBySpaces = lb->text().simplified().isEmpty();
8291
8292 p = makeParagraph<Trait>(p->items().cbegin(), std::prev(p->items().cend()));
8293 const auto lineBreakPos = localPosFromVirgin(po.m_fr, lb->startColumn(), lb->startLine());
8294
8295 if (!p->isEmpty()) {
8296 if (p->items().back()->type() == ItemType::Text) {
8297 auto lt = std::static_pointer_cast<Text<Trait>>(p->items().back());
8298
8299 if (!lineBreakBySpaces) {
8300 auto text = po.m_fr.m_data.at(lineBreakPos.second).first.fullVirginString().sliced(
8301 lt->startColumn());
8302 po.m_rawTextData.back().m_str = text;
8303
8304 if (!lt->text()[0].isSpace()) {
8305 const auto notSpacePos = skipSpaces<Trait>(0, text);
8306
8307 text.remove(0, notSpacePos);
8308 }
8309
8311 }
8312
8313 lt->setEndColumn(lt->endColumn() + lb->text().length());
8314 } else {
8315 if (!lineBreakBySpaces) {
8316 const auto lastItemVirginPos = lastVirginPositionInParagraph<Trait>(p->items().back().get());
8317 const auto lastItemPos = localPosFromVirgin(po.m_fr, lastItemVirginPos, lineBreakPos.second);
8318 const auto endOfLine = po.m_fr.m_data.at(lineBreakPos.second).first.virginSubString(
8319 lastItemPos.first + 1);
8320 auto t = std::make_shared<Text<Trait>>();
8321 t->setText(endOfLine);
8322 t->setStartColumn(lastItemVirginPos + 1);
8323 t->setStartLine(lb->startLine());
8324 t->setEndColumn(lb->endColumn());
8325 t->setEndLine(lb->endLine());
8326
8327 p->appendItem(t);
8328
8329 const auto pos = localPosFromVirgin(po.m_fr, lb->startColumn(), lb->startLine());
8330
8331 po.m_rawTextData.push_back({lb->text(), pos.first, pos.second});
8332 }
8333 }
8334 }
8335 }
8336
8337 std::pair<typename Trait::String, WithPosition> label;
8338
8339 if (p->items().back()->type() == ItemType::Text) {
8340 auto t = std::static_pointer_cast<Text<Trait>>(p->items().back());
8341
8342 if (t->opts() == TextWithoutFormat) {
8343 auto text = po.m_rawTextData.back();
8344 typename Trait::InternalString tmp(text.m_str);
8346
8347 if (!label.first.isEmpty()) {
8348 label.first = label.first.sliced(1, label.first.length() - 2);
8349
8350 if (tmp.asString().simplified().isEmpty()) {
8351 p->removeItemAt(p->items().size() - 1);
8352 po.m_rawTextData.pop_back();
8353
8354 if (!p->items().empty()) {
8355 const auto last = std::static_pointer_cast<WithPosition>(p->items().back());
8356 p->setEndColumn(last->endColumn());
8357 p->setEndLine(last->endLine());
8358 }
8359 } else {
8360 const auto notSpacePos = tmp.virginPos(skipSpaces<Trait>(0, tmp.asString()));
8361 const auto virginLine = t->endLine();
8362
8363 if (label.second.startColumn() > notSpacePos) {
8364 auto text = tmp.fullVirginString().sliced(0, label.second.startColumn());
8365 po.m_rawTextData.back().m_str = text;
8366
8367 if (!t->text()[0].isSpace()) {
8368 const auto notSpacePos = skipSpaces<Trait>(0, text);
8369
8370 text.remove(0, notSpacePos);
8371 }
8372
8374 t->setEndColumn(label.second.startColumn() - 1);
8375
8376 const auto lastPos = t->endColumn();
8377 const auto pos = localPosFromVirgin(po.m_fr, label.second.endColumn() + 1, virginLine);
8378
8379 if (pos.first != -1) {
8380 t = std::make_shared<Text<Trait>>();
8381 t->setStartColumn(label.second.endColumn() + 1);
8382 t->setStartLine(virginLine);
8383 t->setEndColumn(lastPos);
8384 t->setEndLine(virginLine);
8385 p->appendItem(t);
8386
8387 po.m_rawTextData.push_back({po.m_fr.m_data[pos.second].first.asString().sliced(pos.first),
8388 pos.first, pos.second});
8389 }
8390 }
8391
8392 const auto pos = localPosFromVirgin(po.m_fr, label.second.endColumn() + 1, virginLine);
8393
8394 if (pos.first != -1) {
8395 po.m_rawTextData.back() = {po.m_fr.m_data[pos.second].first.asString().sliced(pos.first),
8396 pos.first, pos.second};
8397
8398 auto text = po.m_rawTextData.back().m_str;
8399
8400 if (!text.simplified().isEmpty()) {
8401 if (p->items().size() == 1) {
8402 const auto ns = skipSpaces<Trait>(0, text);
8403
8404 text.remove(0, ns);
8405 }
8406
8407 t->setStartColumn(label.second.endColumn() + 1);
8409 } else {
8410 po.m_rawTextData.pop_back();
8411 p->removeItemAt(p->items().size() - 1);
8412 }
8413 }
8414
8415 p->setEndColumn(t->endColumn());
8416 }
8417 } else {
8418 label.first.clear();
8419 }
8420
8421 label.second.setStartLine(t->startLine());
8422 label.second.setEndLine(t->endLine());
8423 }
8424 }
8425
8426 std::shared_ptr<Heading<Trait>> h(new Heading<Trait>);
8427 h->setStartColumn(p->startColumn());
8428 h->setStartLine(p->startLine());
8429 h->setEndColumn(lastColumn);
8430 h->setEndLine(lastLine);
8431 h->setLevel(level);
8432
8433 if (!p->items().empty()) {
8434 h->setText(p);
8435 }
8436
8437 h->setDelims({delim});
8438
8439 if (label.first.isEmpty() && !p->items().empty()) {
8440 label.first = Trait::latin1ToString("#") + paragraphToLabel(p.get());
8441 } else {
8442 h->setLabelPos(label.second);
8443 }
8444
8445 if (!label.first.isEmpty()) {
8446 const auto path = Trait::latin1ToString("/") + (!workingPath.isEmpty() ?
8447 workingPath + Trait::latin1ToString("/") : typename Trait::String()) + fileName;
8448
8449 h->setLabel(label.first + path);
8450
8451 doc->insertLabeledHeading(label.first + path, h);
8452 h->labelVariants().push_back(h->label());
8453
8454 if (label.first != label.first.toLower()) {
8455 doc->insertLabeledHeading(label.first.toLower() + path, h);
8456 h->labelVariants().push_back(label.first.toLower() + path);
8457 }
8458 }
8459
8460 parent->appendItem(h);
8461 }
8462}
8463
8464//! \return Index of text item for the given index in raw text data.
8465template<class Trait>
8466inline long long int
8467textAtIdx(std::shared_ptr<Paragraph<Trait>> p,
8468 size_t idx)
8469{
8470 size_t i = 0;
8471
8472 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
8473 if ((*it)->type() == ItemType::Text) {
8474 if (i == idx) {
8475 return std::distance(p->items().cbegin(), it);
8476 }
8477
8478 ++i;
8479 }
8480 }
8481
8482 return -1;
8483}
8484
8485//! Process text plugins.
8486template<class Trait>
8487inline void
8490 const TextPluginsMap<Trait> &textPlugins,
8491 bool inLink)
8492{
8493 for (const auto &plugin : textPlugins) {
8494 if (inLink && !std::get<bool>(plugin.second)) {
8495 continue;
8496 }
8497
8498 std::get<TextPluginFunc<Trait>>(plugin.second)(p, po,
8499 std::get<typename Trait::StringList>(plugin.second));
8500 }
8501}
8502
8503//! Make horizontal line.
8504template<class Trait>
8505inline void
8507 std::shared_ptr<Block<Trait>> parent)
8508{
8509 std::shared_ptr<Item<Trait>> hr(new HorizontalLine<Trait>);
8510 hr->setStartColumn(line.first.virginPos(skipSpaces<Trait>(0, line.first.asString())));
8511 hr->setStartLine(line.second.m_lineNumber);
8512 hr->setEndColumn(line.first.virginPos(line.first.length() - 1));
8513 hr->setEndLine(line.second.m_lineNumber);
8514 parent->appendItem(hr);
8515}
8516
8517template<class Trait>
8518inline long long int
8520 std::shared_ptr<Block<Trait>> parent,
8521 std::shared_ptr<Document<Trait>> doc,
8522 typename Trait::StringList &linksToParse,
8523 const typename Trait::String &workingPath,
8524 const typename Trait::String &fileName,
8525 bool collectRefLinks,
8526 bool ignoreLineBreak,
8527 RawHtmlBlock<Trait> &html,
8528 bool inLink)
8529
8530{
8531 if (fr.m_data.empty()) {
8532 return -1;
8533 }
8534
8535 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
8536 p->setStartColumn(fr.m_data.at(0).first.virginPos(0));
8537 p->setStartLine(fr.m_data.at(0).second.m_lineNumber);
8538
8539 auto delims = collectDelimiters(fr.m_data);
8540
8541 TextParsingOpts<Trait> po = {fr, p, nullptr, doc, linksToParse, workingPath, fileName,
8542 collectRefLinks, ignoreLineBreak, html, m_textPlugins};
8543 typename Delims::iterator styleStackBottom = delims.end();
8544
8545 if (html.m_html.get() && html.m_continueHtml) {
8546 finishRawHtmlTag(delims.begin(), delims.end(), po, false);
8547 } else if (!delims.empty()) {
8548 for (auto it = delims.begin(), last = delims.end(); it != last; ++it) {
8549 if (po.m_line > po.m_lastTextLine) {
8550 checkForTableInParagraph(po, fr.m_data.size() - 1);
8551 }
8552
8553 if (po.shouldStopParsing() && po.m_lastTextLine < it->m_line) {
8554 break;
8555 } else {
8556 makeText(po.m_lastTextLine < it->m_line ? po.m_lastTextLine : it->m_line,
8557 po.m_lastTextLine < it->m_line ? po.m_lastTextPos : it->m_pos, po);
8558 }
8559
8560 switch (it->m_type) {
8561 case Delimiter::SquareBracketsOpen: {
8562 it = checkForLink(it, last, po);
8563 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8564 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8565 } break;
8566
8567 case Delimiter::ImageOpen: {
8568 it = checkForImage(it, last, po);
8569 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8570 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8571 } break;
8572
8573 case Delimiter::Less: {
8574 it = checkForAutolinkHtml(it, last, po, true);
8575
8576 if (!html.m_html.get()) {
8577 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8578 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8579 }
8580 } break;
8581
8582 case Delimiter::Strikethrough:
8583 case Delimiter::Emphasis1:
8584 case Delimiter::Emphasis2: {
8585 if (!collectRefLinks) {
8586 it = checkForStyle(delims.begin(), it, last, styleStackBottom, po);
8587 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8588 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8589 }
8590 } break;
8591
8592 case Delimiter::Math: {
8593 it = checkForMath(it, last, po);
8594 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8595 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8596 } break;
8597
8598 case Delimiter::InlineCode: {
8599 if (!it->m_backslashed) {
8600 it = checkForInlineCode(it, last, po);
8601 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8602 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8603 }
8604 } break;
8605
8606 case Delimiter::HorizontalLine: {
8607 po.m_wasRefLink = false;
8608 po.m_firstInParagraph = false;
8609
8610 const auto pos = skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString());
8611 const auto withoutSpaces = po.m_fr.m_data[it->m_line].first.asString().sliced(pos);
8612
8613 auto h2 = isH2<Trait>(withoutSpaces);
8614
8615 optimizeParagraph<Trait>(p, po, OptimizeParagraphType::Semi);
8616
8617 checkForTextPlugins<Trait>(p, po, m_textPlugins, inLink);
8618
8619 if (it->m_line - 1 >= 0) {
8620 p->setEndColumn(fr.m_data.at(it->m_line - 1).first.virginPos(
8621 fr.m_data.at(it->m_line - 1).first.length() - 1));
8622 p->setEndLine(fr.m_data.at(it->m_line - 1).second.m_lineNumber);
8623 }
8624
8625 p = splitParagraphsAndFreeHtml(parent, p, po, collectRefLinks, m_fullyOptimizeParagraphs);
8626
8627 if (!h2 || !po.m_headingAllowed) {
8628 if (!collectRefLinks && !p->isEmpty()) {
8629 parent->appendItem(p);
8630 }
8631
8632 h2 = false;
8633 } else {
8634 makeHeading(parent,
8635 doc,
8636 optimizeParagraph<Trait>(p, po, defaultParagraphOptimization()),
8637 fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
8638 fr.m_data[it->m_line].second.m_lineNumber,
8639 2,
8640 workingPath,
8641 fileName,
8642 collectRefLinks,
8643 {po.m_fr.m_data[it->m_line].first.virginPos(pos),
8644 fr.m_data[it->m_line].second.m_lineNumber,
8645 po.m_fr.m_data[it->m_line].first.virginPos(
8646 lastNonSpacePos(po.m_fr.m_data[it->m_line].first.asString())),
8647 fr.m_data[it->m_line].second.m_lineNumber},
8648 po);
8649
8650 po.m_checkLineOnNewType = true;
8651 }
8652
8653 p.reset(new Paragraph<Trait>);
8654 po.m_rawTextData.clear();
8655
8656 if (it->m_line + 1 < static_cast<long long int>(fr.m_data.size())) {
8657 p->setStartColumn(fr.m_data.at(it->m_line + 1).first.virginPos(0));
8658 p->setStartLine(fr.m_data.at(it->m_line + 1).second.m_lineNumber);
8659 }
8660
8661 po.m_parent = p;
8662 po.m_line = it->m_line;
8663 po.m_pos = it->m_pos + it->m_len;
8664
8665 if (!h2 && !collectRefLinks) {
8666 makeHorLine<Trait>(fr.m_data[it->m_line], parent);
8667 }
8668 } break;
8669
8670 case Delimiter::H1:
8671 case Delimiter::H2: {
8672 po.m_wasRefLink = false;
8673 po.m_firstInParagraph = false;
8674
8675 optimizeParagraph<Trait>(p, po, OptimizeParagraphType::Semi);
8676
8677 checkForTextPlugins<Trait>(p, po, m_textPlugins, inLink);
8678
8679 if (it->m_line - 1 >= 0) {
8680 p->setEndColumn(fr.m_data.at(it->m_line - 1).first.virginPos(
8681 fr.m_data.at(it->m_line - 1).first.length() - 1));
8682 p->setEndLine(fr.m_data.at(it->m_line - 1).second.m_lineNumber);
8683 }
8684
8685 p = splitParagraphsAndFreeHtml(parent, p, po, collectRefLinks,
8686 m_fullyOptimizeParagraphs);
8687
8688 if (po.m_headingAllowed) {
8689 makeHeading(parent,
8690 doc,
8691 optimizeParagraph<Trait>(p, po, defaultParagraphOptimization()),
8692 fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
8693 fr.m_data[it->m_line].second.m_lineNumber,
8694 it->m_type == Delimiter::H1 ? 1 : 2,
8695 workingPath,
8696 fileName,
8697 collectRefLinks,
8698 {po.m_fr.m_data[it->m_line].first.virginPos(skipSpaces<Trait>(
8699 0, po.m_fr.m_data[it->m_line].first.asString())),
8700 fr.m_data[it->m_line].second.m_lineNumber,
8701 po.m_fr.m_data[it->m_line].first.virginPos(lastNonSpacePos(
8702 po.m_fr.m_data[it->m_line].first.asString())),
8703 fr.m_data[it->m_line].second.m_lineNumber},
8704 po);
8705
8706 po.m_checkLineOnNewType = true;
8707
8708 p.reset(new Paragraph<Trait>);
8709 po.m_rawTextData.clear();
8710
8711 if (it->m_line + 1 < static_cast<long long int>(fr.m_data.size())) {
8712 p->setStartColumn(fr.m_data.at(it->m_line + 1).first.virginPos(0));
8713 p->setStartLine(fr.m_data.at(it->m_line + 1).second.m_lineNumber);
8714 }
8715
8716 po.m_line = it->m_line;
8717 po.m_pos = it->m_pos + it->m_len;
8718 } else if (p->startColumn() == -1) {
8719 p->setStartColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
8720 p->setStartLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8721 }
8722
8723 po.m_parent = p;
8724 } break;
8725
8726 default: {
8727 if (!po.shouldStopParsing()) {
8728 po.m_wasRefLink = false;
8729 po.m_firstInParagraph = false;
8730
8731 makeText(it->m_line, it->m_pos + it->m_len, po);
8732 }
8733 } break;
8734 }
8735
8736 if (po.shouldStopParsing()) {
8737 break;
8738 }
8739
8740 if (po.m_checkLineOnNewType) {
8741 if (po.m_line + 1 < static_cast<long long int>(po.m_fr.m_data.size())) {
8742 const auto type = Parser<Trait>::whatIsTheLine(po.m_fr.m_data[po.m_line + 1].first);
8743
8744 bool doBreak = false;
8745
8746 switch (type) {
8748 po.m_detected = TextParsingOpts<Trait>::Detected::Code;
8749 doBreak = true;
8750 break;
8751
8754 po.m_detected = TextParsingOpts<Trait>::Detected::List;
8755 doBreak = true;
8756 break;
8757
8759 po.m_detected = TextParsingOpts<Trait>::Detected::Blockquote;
8760 doBreak = true;
8761 break;
8762
8763 default:
8764 break;
8765 }
8766
8767 if (doBreak) {
8768 break;
8769 }
8770 }
8771
8772 po.m_checkLineOnNewType = false;
8773 }
8774 }
8775 }
8776
8777 if (po.m_lastTextLine == -1) {
8778 checkForTableInParagraph(po, po.m_fr.m_data.size() - 1);
8779 }
8780
8781 switch(po.m_detected) {
8782 case TextParsingOpts<Trait>::Detected::Table:
8783 makeText(po.m_lastTextLine, po.m_lastTextPos, po);
8784 break;
8785
8786 case TextParsingOpts<Trait>::Detected::Nothing:
8787 {
8788 if(po.m_line <= static_cast<long long int>(po.m_fr.m_data.size() - 1)) {
8789 makeText(po.m_fr.m_data.size() - 1, po.m_fr.m_data.back().first.length(), po);
8790 }
8791 }
8792 break;
8793
8794 default:
8795 break;
8796 }
8797
8798 if (!p->isEmpty()) {
8799 optimizeParagraph<Trait>(p, po, OptimizeParagraphType::Semi);
8800
8801 checkForTextPlugins<Trait>(p, po, m_textPlugins, inLink);
8802
8803 p = splitParagraphsAndFreeHtml(parent, p, po, collectRefLinks, m_fullyOptimizeParagraphs);
8804
8805 if (!p->isEmpty() && !collectRefLinks) {
8806 parent->appendItem(optimizeParagraph<Trait>(p, po, defaultParagraphOptimization()));
8807 }
8808
8809 po.m_rawTextData.clear();
8810 }
8811
8812 normalizePos(po.m_pos, po.m_line, po.m_line < static_cast<long long int>(po.m_fr.m_data.size()) ?
8813 po.m_fr.m_data[po.m_line].first.length() : 0, po.m_fr.m_data.size());
8814
8815 if (po.m_detected != TextParsingOpts<Trait>::Detected::Nothing) {
8816 if (po.m_line < static_cast<long long int>(po.m_fr.m_data.size())) {
8817 return po.m_fr.m_data.at(po.m_line).second.m_lineNumber;
8818 }
8819 }
8820
8821 return -1;
8822}
8823
8824template<class Trait>
8825inline void
8826Parser<Trait>::parseFootnote(MdBlock<Trait> &fr,
8827 std::shared_ptr<Block<Trait>>,
8828 std::shared_ptr<Document<Trait>> doc,
8829 typename Trait::StringList &linksToParse,
8830 const typename Trait::String &workingPath,
8831 const typename Trait::String &fileName,
8832 bool collectRefLinks)
8833{
8834 {
8835 const auto it = (std::find_if(fr.m_data.rbegin(), fr.m_data.rend(), [](const auto &s) {
8836 return !s.first.isEmpty();
8837 })).base();
8838
8839 if (it != fr.m_data.end()) {
8840 fr.m_data.erase(it, fr.m_data.end());
8841 }
8842 }
8843
8844 if (!fr.m_data.empty()) {
8845 std::shared_ptr<Footnote<Trait>> f(new Footnote<Trait>);
8846 f->setStartColumn(fr.m_data.front().first.virginPos(0));
8847 f->setStartLine(fr.m_data.front().second.m_lineNumber);
8848 f->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
8849 f->setEndLine(fr.m_data.back().second.m_lineNumber);
8850
8851 auto delims = collectDelimiters(fr.m_data);
8852
8853 RawHtmlBlock<Trait> html;
8854
8855 TextParsingOpts<Trait> po = {fr, f, nullptr, doc, linksToParse, workingPath, fileName,
8856 collectRefLinks, false, html, m_textPlugins};
8857 po.m_lastTextLine = fr.m_data.size();
8858 po.m_lastTextPos = fr.m_data.back().first.length();
8859
8860 if (!delims.empty() && delims.cbegin()->m_type == Delimiter::SquareBracketsOpen &&
8861 !delims.cbegin()->m_isWordBefore) {
8862 typename MdBlock<Trait>::Data id;
8863 typename Delims::iterator it = delims.end();
8864
8865 po.m_line = delims.cbegin()->m_line;
8866 po.m_pos = delims.cbegin()->m_pos;
8867
8868 std::tie(id, it) = checkForLinkText(delims.begin(), delims.end(), po);
8869
8870 if (!toSingleLine(id).isEmpty() &&
8871 id.front().first.asString().startsWith(Trait::latin1ToString("^")) &&
8872 it != delims.cend() &&
8873 fr.m_data.at(it->m_line).first.length() > it->m_pos + 2 &&
8874 fr.m_data.at(it->m_line).first[it->m_pos + 1] == Trait::latin1ToChar(':') &&
8875 fr.m_data.at(it->m_line).first[it->m_pos + 2].isSpace()) {
8876 f->setIdPos({fr.m_data[delims.cbegin()->m_line].first.virginPos(delims.cbegin()->m_pos),
8877 fr.m_data[delims.cbegin()->m_line].second.m_lineNumber,
8878 fr.m_data.at(it->m_line).first.virginPos(it->m_pos + 1),
8879 fr.m_data.at(it->m_line).second.m_lineNumber});
8880
8881 {
8882 typename MdBlock<Trait>::Data tmp;
8883 std::copy(fr.m_data.cbegin() + it->m_line, fr.m_data.cend(),
8884 std::back_inserter(tmp));
8885 fr.m_data = tmp;
8886 }
8887
8888 fr.m_data.front().first = fr.m_data.front().first.sliced(it->m_pos + 3);
8889
8890 for (auto it = fr.m_data.begin(), last = fr.m_data.end(); it != last; ++it) {
8891 if (it->first.asString().startsWith(Trait::latin1ToString(" "))) {
8892 it->first = it->first.sliced(4);
8893 }
8894 }
8895
8896 StringListStream<Trait> stream(fr.m_data);
8897
8898 parse(stream, f, doc, linksToParse, workingPath, fileName, collectRefLinks);
8899
8900 if (!f->isEmpty()) {
8901 doc->insertFootnote(Trait::latin1ToString("#") + toSingleLine(id) +
8902 Trait::latin1ToString("/") + (!workingPath.isEmpty() ?
8903 workingPath + Trait::latin1ToString("/") : typename Trait::String()) + fileName,
8904 f);
8905 }
8906 }
8907 }
8908 }
8909}
8910
8911template<class Trait>
8912inline long long int
8913Parser<Trait>::parseBlockquote(MdBlock<Trait> &fr,
8914 std::shared_ptr<Block<Trait>> parent,
8915 std::shared_ptr<Document<Trait>> doc,
8916 typename Trait::StringList &linksToParse,
8917 const typename Trait::String &workingPath,
8918 const typename Trait::String &fileName,
8919 bool collectRefLinks,
8920 RawHtmlBlock<Trait> &)
8921{
8922 const long long int pos = fr.m_data.front().first.asString().indexOf(Trait::latin1ToChar('>'));
8923 long long int extra = 0;
8924
8925 long long int line = -1;
8926
8927 if (pos > -1) {
8928 typename Blockquote<Trait>::Delims delims;
8929
8930 long long int i = 0, j = 0;
8931
8932 BlockType bt = BlockType::EmptyLine;
8933
8934 for (auto it = fr.m_data.begin(), last = fr.m_data.end(); it != last; ++it, ++i) {
8935 const auto ns = skipSpaces<Trait>(0, it->first.asString());
8936 const auto gt = (ns < it->first.length() ? (it->first[ns] == Trait::latin1ToChar('>') ? ns : -1) : -1);
8937
8938 if (gt > -1) {
8939 const auto dp = it->first.virginPos(gt);
8940 delims.push_back({dp, it->second.m_lineNumber, dp, it->second.m_lineNumber});
8941
8942 if (it == fr.m_data.begin()) {
8943 extra = gt + (it->first.length() > gt + 1 ?
8944 (it->first[gt + 1] == Trait::latin1ToChar(' ') ? 1 : 0) : 0) + 1;
8945 }
8946
8947 it->first = it->first.sliced(gt + (it->first.length() > gt + 1 ?
8948 (it->first[gt + 1] == Trait::latin1ToChar(' ') ? 1 : 0) : 0) + 1);
8949
8950 bt = whatIsTheLine(it->first);
8951 }
8952 // Process lazyness...
8953 else {
8954 if (ns < 4 && isHorizontalLine<Trait>(it->first.asString().sliced(ns))) {
8955 line = it->second.m_lineNumber;
8956 break;
8957 }
8958
8959 const auto tmpBt = whatIsTheLine(it->first);
8960
8961 if (isListType(tmpBt)) {
8962 line = it->second.m_lineNumber;
8963 break;
8964 }
8965
8966 if (bt == BlockType::Text) {
8967 if (isH1<Trait>(it->first.asString())) {
8968 const auto p = it->first.asString().indexOf(Trait::latin1ToChar('='));
8969
8970 it->first.insert(p, Trait::latin1ToChar('\\'));
8971
8972 continue;
8973 } else if (isH2<Trait>(it->first.asString())) {
8974 const auto p = it->first.asString().indexOf(Trait::latin1ToChar('-'));
8975
8976 it->first.insert(p, Trait::latin1ToChar('\\'));
8977
8978 continue;
8979 }
8980 } else if ((bt == BlockType::Code || bt == BlockType::CodeIndentedBySpaces) &&
8981 it->second.m_mayBreakList) {
8982 line = it->second.m_lineNumber;
8983 break;
8984 }
8985
8986 if ((bt == BlockType::Text || bt == BlockType::Blockquote || bt == BlockType::List)
8987 && (tmpBt == BlockType::Text || tmpBt == BlockType::CodeIndentedBySpaces)) {
8988 continue;
8989 } else {
8990 line = it->second.m_lineNumber;
8991 break;
8992 }
8993 }
8994 }
8995
8996 typename MdBlock<Trait>::Data tmp;
8997
8998 for (; j < i; ++j) {
8999 tmp.push_back(fr.m_data.at(j));
9000 }
9001
9002 StringListStream<Trait> stream(tmp);
9003
9004 std::shared_ptr<Blockquote<Trait>> bq(new Blockquote<Trait>);
9005 bq->setStartColumn(fr.m_data.at(0).first.virginPos(0) - extra);
9006 bq->setStartLine(fr.m_data.at(0).second.m_lineNumber);
9007 bq->setEndColumn(fr.m_data.at(j - 1).first.virginPos(fr.m_data.at(j - 1).first.length() - 1));
9008 bq->setEndLine(fr.m_data.at(j - 1).second.m_lineNumber);
9009 bq->delims() = delims;
9010
9011 parse(stream, bq, doc, linksToParse, workingPath, fileName, collectRefLinks);
9012
9013 if (!collectRefLinks) {
9014 parent->appendItem(bq);
9015 }
9016 }
9017
9018 return line;
9019}
9020
9021//! \return Is the given string a new list item.
9022template<class Trait>
9023inline bool
9024isListItemAndNotNested(const typename Trait::String &s,
9025 long long int indent)
9026{
9027 long long int p = skipSpaces<Trait>(0, s);
9028
9029 if (p >= indent || p == s.size()) {
9030 return false;
9031 }
9032
9033 bool space = false;
9034
9035 if (p + 1 >= s.size()) {
9036 space = true;
9037 } else {
9038 space = s[p + 1].isSpace();
9039 }
9040
9041 if (p < 4) {
9042 if (s[p] == Trait::latin1ToChar('*') && space) {
9043 return true;
9044 } else if (s[p] == Trait::latin1ToChar('-') && space) {
9045 return true;
9046 } else if (s[p] == Trait::latin1ToChar('+') && space) {
9047 return true;
9048 } else {
9049 return isOrderedList<Trait>(s);
9050 }
9051 } else
9052 return false;
9053}
9054
9055//! \return Indent.
9056template<class Trait>
9057inline std::pair<long long int, long long int>
9058calculateIndent(const typename Trait::String &s,
9059 long long int p)
9060{
9061 return {0, skipSpaces<Trait>(p, s)};
9062}
9063
9064//! \return List item data.
9065template<class Trait>
9066inline std::tuple<bool, long long int, typename Trait::Char, bool>
9067listItemData(const typename Trait::String &s,
9068 bool wasText)
9069{
9070 long long int p = skipSpaces<Trait>(0, s);
9071
9072 if (p == s.size()) {
9073 return {false, 0, typename Trait::Char(), false};
9074 }
9075
9076 bool space = false;
9077
9078 if (p + 1 >= s.size()) {
9079 space = true;
9080 } else {
9081 space = s[p + 1].isSpace();
9082 }
9083
9084 if (p < 4) {
9085 if (s[p] == Trait::latin1ToChar('*') && space) {
9086 return {true, p + 2, Trait::latin1ToChar('*'),
9087 p + 2 < s.size() ? !s.sliced(p + 2).isEmpty() : false};
9088 } else if (s[p] == Trait::latin1ToChar('-')) {
9089 if (isH2<Trait>(s) && wasText) {
9090 return {false, p + 2, Trait::latin1ToChar('-'), false};
9091 } else if (space) {
9092 return {true, p + 2, Trait::latin1ToChar('-'),
9093 p + 2 < s.size() ? !s.sliced(p + 2).isEmpty() : false};
9094 }
9095 } else if (s[p] == Trait::latin1ToChar('+') && space) {
9096 return {true, p + 2, Trait::latin1ToChar('+'),
9097 p + 2 < s.size() ? !s.sliced(p + 2).isEmpty() : false};
9098 } else {
9099 int d = 0, l = 0;
9100 typename Trait::Char c;
9101
9102 if (isOrderedList<Trait>(s, &d, &l, &c)) {
9103 return {true, p + l + 2, c,
9104 p + l + 2 < s.size() ? !s.sliced(p + l + 2).isEmpty() : false};
9105 } else {
9106 return {false, 0, typename Trait::Char(), false};
9107 }
9108 }
9109 }
9110
9111 return {false, 0, typename Trait::Char(), false};
9112}
9113
9114//! Set last position of the item.
9115template<class Trait>
9116inline void
9117setLastPos(std::shared_ptr<Item<Trait>> item,
9118 long long int pos,
9119 long long int line)
9120{
9121 item->setEndColumn(pos);
9122 item->setEndLine(line);
9123}
9124
9125//! Update last position of all parent.
9126template<class Trait>
9127inline void
9129{
9130 if (html.m_parent != html.m_topParent) {
9131 const auto it = html.m_toAdjustLastPos.find(html.m_parent);
9132
9133 if (it != html.m_toAdjustLastPos.end()) {
9134 for (auto &i : it->second) {
9135 i.first->setEndColumn(html.m_html->endColumn());
9136 i.first->setEndLine(html.m_html->endLine());
9137 }
9138 }
9139 }
9140}
9141
9142template<class Trait>
9143inline long long int
9144Parser<Trait>::parseList(MdBlock<Trait> &fr,
9145 std::shared_ptr<Block<Trait>> parent,
9146 std::shared_ptr<Document<Trait>> doc,
9147 typename Trait::StringList &linksToParse,
9148 const typename Trait::String &workingPath,
9149 const typename Trait::String &fileName,
9150 bool collectRefLinks,
9151 RawHtmlBlock<Trait> &html)
9152{
9153 bool resetTopParent = false;
9154 long long int line = -1;
9155
9156 if (!html.m_topParent) {
9157 html.m_topParent = parent;
9158 resetTopParent = true;
9159 }
9160
9161 const auto p = skipSpaces<Trait>(0, fr.m_data.front().first.asString());
9162
9163 if (p != fr.m_data.front().first.length()) {
9164 std::shared_ptr<List<Trait>> list(new List<Trait>);
9165
9166 typename MdBlock<Trait>::Data listItem;
9167 auto it = fr.m_data.begin();
9168 listItem.push_back(*it);
9169 list->setStartColumn(it->first.virginPos(p));
9170 list->setStartLine(it->second.m_lineNumber);
9171 ++it;
9172
9173 long long int indent = 0;
9174 typename Trait::Char marker;
9175
9176 std::tie(std::ignore, indent, marker, std::ignore) =
9177 listItemData<Trait>(listItem.front().first.asString(), false);
9178
9179 html.m_blocks.push_back({list, list->startColumn() + indent});
9180
9181 if (!collectRefLinks) {
9182 html.m_toAdjustLastPos.insert({list, html.m_blocks});
9183 }
9184
9185 bool updateIndent = false;
9186
9187 auto addListMakeNew = [&]() {
9188 if (!list->isEmpty() && !collectRefLinks) {
9189 parent->appendItem(list);
9190 }
9191
9192 html.m_blocks.pop_back();
9193
9194 list.reset(new List<Trait>);
9195
9196 html.m_blocks.push_back({list, indent});
9197
9198 if (!collectRefLinks) {
9199 html.m_toAdjustLastPos.insert({list, html.m_blocks});
9200 }
9201 };
9202
9203 auto processLastHtml = [&](std::shared_ptr<ListItem<Trait>> resItem) {
9204 if (html.m_html && resItem) {
9205 html.m_parent = (resItem->startLine() == html.m_html->startLine() ||
9206 html.m_html->startColumn() >= resItem->startColumn() + indent ?
9207 resItem : html.findParent(html.m_html->startColumn()));
9208
9209 if (!html.m_parent) {
9210 html.m_parent = html.m_topParent;
9211 }
9212
9213 if (html.m_parent != resItem) {
9214 addListMakeNew();
9215 }
9216
9217 const auto continueHtml = html.m_onLine && html.m_continueHtml && html.m_parent == html.m_topParent;
9218
9219 if (!collectRefLinks) {
9220 if (!continueHtml) {
9221 html.m_parent->appendItem(html.m_html);
9222 }
9223
9224 updateLastPosInList<Trait>(html);
9225 }
9226
9227 if (!continueHtml) {
9228 resetHtmlTag<Trait>(html);
9229 }
9230 }
9231 };
9232
9233 auto processListItem = [&]() {
9234 MdBlock<Trait> block = {listItem, 0};
9235
9236 std::shared_ptr<ListItem<Trait>> resItem;
9237
9238 line = parseListItem(block, list, doc, linksToParse, workingPath, fileName,
9239 collectRefLinks, html, &resItem);
9240 listItem.clear();
9241
9242 if (html.m_html) {
9243 processLastHtml(resItem);
9244 } else if (line >= 0) {
9245 addListMakeNew();
9246 }
9247 };
9248
9249 for (auto last = fr.m_data.end(); it != last; ++it) {
9250 if (updateIndent) {
9251 std::tie(std::ignore, indent, marker, std::ignore) =
9252 listItemData<Trait>(it->first.asString(), false);
9253
9254 if (!collectRefLinks) {
9255 html.m_blocks.back().second = indent;
9256 }
9257
9258 updateIndent = false;
9259 }
9260
9261 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9262
9263 if (isH1<Trait>(it->first.asString().sliced(ns)) && ns < indent && !listItem.empty()) {
9264 const auto p = it->first.asString().indexOf(Trait::latin1ToChar('='));
9265
9266 it->first.insert(p, Trait::latin1ToChar('\\'));
9267 } else if (isHorizontalLine<Trait>(it->first.asString().sliced(ns)) &&
9268 ns < indent && !listItem.empty()) {
9269 updateIndent = true;
9270
9271 processListItem();
9272
9273 if (!list->isEmpty()) {
9274 addListMakeNew();
9275 }
9276
9277 if (!collectRefLinks) {
9278 makeHorLine<Trait>(*it, parent);
9279 }
9280
9281 continue;
9282 } else if (isListItemAndNotNested<Trait>(it->first.asString(), indent) &&
9283 !listItem.empty() && !it->second.m_mayBreakList) {
9284 typename Trait::Char tmpMarker;
9285 std::tie(std::ignore, indent, tmpMarker, std::ignore) =
9286 listItemData<Trait>(it->first.asString(), false);
9287
9288 processListItem();
9289
9290 if (tmpMarker != marker) {
9291 if (!list->isEmpty()) {
9292 addListMakeNew();
9293 }
9294
9295 marker = tmpMarker;
9296 }
9297 }
9298
9299 if (line > 0) {
9300 break;
9301 }
9302
9303 listItem.push_back(*it);
9304
9305 if (list->startColumn() == -1) {
9306 list->setStartColumn(
9307 it->first.virginPos(std::min(it->first.length() ?
9308 it->first.length() - 1 : 0, skipSpaces<Trait>(0, it->first.asString()))));
9309 list->setStartLine(it->second.m_lineNumber);
9310
9311 if (!collectRefLinks) {
9312 html.m_blocks.back().second += list->startColumn();
9313 }
9314 }
9315 }
9316
9317 if (!listItem.empty()) {
9318 MdBlock<Trait> block = {listItem, 0};
9319 line = parseListItem(block, list, doc, linksToParse, workingPath, fileName,
9320 collectRefLinks, html);
9321 }
9322
9323 if (!list->isEmpty() && !collectRefLinks) {
9324 parent->appendItem(list);
9325 }
9326
9327 html.m_blocks.pop_back();
9328 }
9329
9330 if (resetTopParent) {
9331 html.m_topParent.reset();
9332 }
9333
9334 return line;
9335}
9336
9337template<class Trait>
9338inline long long int
9339Parser<Trait>::parseListItem(MdBlock<Trait> &fr,
9340 std::shared_ptr<Block<Trait>> parent,
9341 std::shared_ptr<Document<Trait>> doc,
9342 typename Trait::StringList &linksToParse,
9343 const typename Trait::String &workingPath,
9344 const typename Trait::String &fileName,
9345 bool collectRefLinks,
9346 RawHtmlBlock<Trait> &html,
9347 std::shared_ptr<ListItem<Trait>> *resItem)
9348{
9349 {
9350 const auto it = (std::find_if(fr.m_data.rbegin(), fr.m_data.rend(), [](const auto &s) {
9351 return !s.first.isEmpty();
9352 })).base();
9353
9354 if (it != fr.m_data.end()) {
9355 fr.m_data.erase(it, fr.m_data.end());
9356 }
9357 }
9358
9359 const auto p = skipSpaces<Trait>(0, fr.m_data.front().first.asString());
9360
9361 std::shared_ptr<ListItem<Trait>> item(new ListItem<Trait>);
9362
9363 item->setStartColumn(fr.m_data.front().first.virginPos(p));
9364 item->setStartLine(fr.m_data.front().second.m_lineNumber);
9365
9366 int i = 0, len = 0;
9367
9368 if (isOrderedList<Trait>(fr.m_data.front().first.asString(), &i, &len)) {
9369 item->setListType(ListItem<Trait>::Ordered);
9370 item->setStartNumber(i);
9371 item->setDelim({item->startColumn(), item->startLine(), item->startColumn() + len, item->startLine()});
9372 } else {
9373 item->setListType(ListItem<Trait>::Unordered);
9374 item->setDelim({item->startColumn(), item->startLine(), item->startColumn(), item->startLine()});
9375 }
9376
9377 if (item->listType() == ListItem<Trait>::Ordered) {
9378 item->setOrderedListPreState(i == 1 ? ListItem<Trait>::Start : ListItem<Trait>::Continue);
9379 }
9380
9381 typename MdBlock<Trait>::Data data;
9382
9383 auto it = fr.m_data.begin();
9384 ++it;
9385
9386 int pos = 1;
9387
9388 long long int indent = 0;
9389 bool wasText = false;
9390
9391 std::tie(std::ignore, indent, std::ignore, wasText) =
9392 listItemData<Trait>(fr.m_data.front().first.asString(), wasText);
9393
9394 html.m_blocks.push_back({item, item->startColumn() + indent});
9395
9396 if (!collectRefLinks) {
9397 html.m_toAdjustLastPos.insert({item, html.m_blocks});
9398 }
9399
9400 const auto firstNonSpacePos = calculateIndent<Trait>(
9401 fr.m_data.front().first.asString(), indent).second;
9402
9403 if (firstNonSpacePos - indent < 4) {
9404 indent = firstNonSpacePos;
9405 }
9406
9407 if (indent < fr.m_data.front().first.length()) {
9408 data.push_back({fr.m_data.front().first.right(fr.m_data.front().first.length() - indent),
9409 fr.m_data.front().second});
9410 }
9411
9412 bool taskList = false;
9413 bool checked = false;
9414
9415 if (!data.empty()) {
9416 auto p = skipSpaces<Trait>(0, data.front().first.asString());
9417
9418 if (p < data.front().first.length()) {
9419 if (data.front().first[p] == Trait::latin1ToChar('[')) {
9420 const auto startTaskDelimPos = data.front().first.virginPos(p);
9421
9422 ++p;
9423
9424 if (p < data.front().first.length()) {
9425 if (data.front().first[p] == Trait::latin1ToChar(' ') ||
9426 data.front().first[p].toLower() == Trait::latin1ToChar('x')) {
9427 if (data.front().first[p].toLower() == Trait::latin1ToChar('x')) {
9428 checked = true;
9429 }
9430
9431 ++p;
9432
9433 if (p < data.front().first.length()) {
9434 if (data.front().first[p] == Trait::latin1ToChar(']')) {
9435 item->setTaskDelim({startTaskDelimPos, item->startLine(), data.front().first.virginPos(p), item->startLine()});
9436
9437 taskList = true;
9438
9439 data[0].first = data[0].first.sliced(p + 1);
9440 }
9441 }
9442 }
9443 }
9444 }
9445 }
9446 }
9447
9448 if (taskList) {
9449 item->setTaskList();
9450 item->setChecked(checked);
9451 }
9452
9453 bool fensedCode = false;
9454 typename Trait::String startOfCode;
9455 bool wasEmptyLine = false;
9456
9457 std::vector<std::pair<RawHtmlBlock<Trait>, long long int>> htmlToAdd;
9458 long long int line = -1;
9459
9460 auto parseStream = [&](StringListStream<Trait> &stream) -> long long int
9461 {
9462 const auto tmpHtml = html;
9463 long long int line = -1;
9464 std::tie(html, line) = parse(stream, item, doc, linksToParse, workingPath, fileName,
9465 collectRefLinks, false, true, true);
9466 html.m_topParent = tmpHtml.m_topParent;
9467 html.m_blocks = tmpHtml.m_blocks;
9468 html.m_toAdjustLastPos = tmpHtml.m_toAdjustLastPos;
9469
9470 return line;
9471 };
9472
9473 auto processHtml = [&](auto it) -> long long int
9474 {
9475 auto finishHtml = [&]()
9476 {
9477 if (html.m_html) {
9478 htmlToAdd.push_back({html, html.m_parent->items().size()});
9479 updateLastPosInList<Trait>(html);
9480 resetHtmlTag<Trait>(html);
9481 }
9482 };
9483
9484 if (html.m_html.get()) {
9485 html.m_parent = html.findParent(html.m_html->startColumn());
9486
9487 if (!html.m_parent) {
9488 html.m_parent = html.m_topParent;
9489 }
9490
9491 data.clear();
9492
9493 if (html.m_continueHtml) {
9494 MdBlock<Trait> tmp;
9495 tmp.m_emptyLineAfter = fr.m_emptyLineAfter;
9496 tmp.m_emptyLinesBefore = emptyLinesBeforeCount<Trait>(fr.m_data.begin(), it);
9497 std::copy(it, fr.m_data.end(), std::back_inserter(tmp.m_data));
9498
9499 const auto line = parseText(tmp, html.m_parent, doc, linksToParse, workingPath, fileName,
9500 collectRefLinks, html);
9501
9502 if (!html.m_continueHtml) {
9503 finishHtml();
9504 }
9505
9506 return line;
9507 }
9508
9509 finishHtml();
9510 }
9511
9512 return -2;
9513 };
9514
9515 if (processHtml(std::prev(it)) == -2) {
9516 for (auto last = fr.m_data.end(); it != last; ++it, ++pos) {
9517 if (!fensedCode) {
9518 fensedCode = isCodeFences<Trait>(it->first.asString().startsWith(
9519 typename Trait::String(indent, Trait::latin1ToChar(' '))) ?
9520 it->first.asString().sliced(indent) : it->first.asString());
9521
9522 if (fensedCode) {
9523 startOfCode = startSequence<Trait>(it->first.asString());
9524 }
9525 } else if (fensedCode &&
9526 isCodeFences<Trait>(it->first.asString().startsWith(
9527 typename Trait::String(indent, Trait::latin1ToChar(' '))) ?
9528 it->first.asString().sliced(indent) : it->first.asString(),
9529 true) && startSequence<Trait>(it->first.asString()).contains(startOfCode)) {
9530 fensedCode = false;
9531 }
9532
9533 if (!fensedCode) {
9534 long long int newIndent = 0;
9535 bool ok = false;
9536
9537 std::tie(ok, newIndent, std::ignore, wasText) = listItemData<Trait>(
9538 it->first.asString().startsWith(typename Trait::String(indent, Trait::latin1ToChar(' '))) ?
9539 it->first.asString().sliced(indent) : it->first.asString(),
9540 wasText);
9541
9542 if (ok && !it->second.m_mayBreakList) {
9543 StringListStream<Trait> stream(data);
9544
9545 line = parseStream(stream);
9546
9547 data.clear();
9548
9549 const auto lineAfterHtml = processHtml(it);
9550
9551 if (lineAfterHtml != -2) {
9552 if (lineAfterHtml == -1) {
9553 break;
9554 } else {
9555 if (html.m_parent == html.m_topParent) {
9556 line = lineAfterHtml;
9557 } else {
9558 it += (lineAfterHtml - it->second.m_lineNumber);
9559 }
9560 }
9561 }
9562
9563 if (line != -1) {
9564 break;
9565 }
9566
9567 if (!htmlToAdd.empty() && htmlToAdd.back().first.m_parent == html.m_topParent) {
9568 line = it->second.m_lineNumber;
9569
9570 break;
9571 } else {
9572 typename MdBlock<Trait>::Data nestedList;
9573 nestedList.push_back(*it);
9574 const auto emptyLinesBefore = emptyLinesBeforeCount<Trait>(fr.m_data.begin(), it);
9575 ++it;
9576
9577 wasEmptyLine = false;
9578
9579 for (; it != last; ++it) {
9580 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9581 std::tie(ok, std::ignore, std::ignore, wasText) =
9582 listItemData<Trait>((ns >= indent ? it->first.asString().sliced(indent) :
9583 it->first.asString()), wasText);
9584
9585 if (ok) {
9586 wasEmptyLine = false;
9587 }
9588
9589 if (ok || ns >= indent + newIndent || ns == it->first.length() || !wasEmptyLine) {
9590 nestedList.push_back(*it);
9591 } else {
9592 break;
9593 }
9594
9595 wasEmptyLine = (ns == it->first.length());
9596
9597 wasText = (wasEmptyLine ? false : wasText);
9598 }
9599
9600 for (auto it = nestedList.begin(), last = nestedList.end(); it != last; ++it) {
9601 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9602
9603 if (ns < indent && ns != it->first.length()) {
9604 it->second.m_mayBreakList = true;
9605 } else {
9606 it->first = it->first.sliced(std::min(ns, indent));
9607 }
9608 }
9609
9610 while (!nestedList.empty() &&
9611 nestedList.back().first.asString().isEmpty()) {
9612 nestedList.pop_back();
9613 }
9614
9615 MdBlock<Trait> block = {nestedList, emptyLinesBefore, wasEmptyLine};
9616
9617 line = parseList(block, item, doc, linksToParse, workingPath, fileName,
9618 collectRefLinks, html);
9619
9620 if (line >= 0) {
9621 break;
9622 }
9623
9624 for (; it != last; ++it) {
9625 if (it->first.asString().startsWith(typename Trait::String(
9626 indent, Trait::latin1ToChar(' ')))) {
9627 it->first = it->first.sliced(indent);
9628 }
9629
9630 data.push_back(*it);
9631 }
9632
9633 break;
9634 }
9635 } else {
9636 if (!it->second.m_mayBreakList &&
9637 it->first.asString().startsWith(typename Trait::String(
9638 indent, Trait::latin1ToChar(' ')))) {
9639 it->first = it->first.sliced(indent);
9640 }
9641
9642 data.push_back(*it);
9643
9644 wasEmptyLine = (skipSpaces<Trait>(0, it->first.asString()) == it->first.length());
9645
9646 wasText = !wasEmptyLine;
9647 }
9648 } else {
9649 if (!it->second.m_mayBreakList &&
9650 it->first.asString().startsWith(typename Trait::String(
9651 indent, Trait::latin1ToChar(' ')))) {
9652 it->first = it->first.sliced(indent);
9653 }
9654
9655 data.push_back(*it);
9656 }
9657 }
9658
9659 if (!data.empty()) {
9660 StringListStream<Trait> stream(data);
9661
9662 line = parseStream(stream);
9663
9664 if (html.m_html) {
9665 html.m_parent = html.findParent(html.m_html->startColumn());
9666
9667 if (!html.m_parent) {
9668 html.m_parent = html.m_topParent;
9669 }
9670 }
9671 }
9672 } else {
9673 item.reset();
9674 }
9675
9676 if (!collectRefLinks) {
9677 if (item) {
9678 parent->appendItem(item);
9679 }
9680
9681 long long int i = 0;
9682
9683 for (auto &h : htmlToAdd) {
9684 if (h.first.m_parent != h.first.m_topParent) {
9685 h.first.m_parent->insertItem(h.second + i, h.first.m_html);
9686
9687 ++i;
9688
9689 updateLastPosInList(h.first);
9690 } else {
9691 html = h.first;
9692
9693 break;
9694 }
9695 }
9696
9697 if (item) {
9698 long long int htmlStartColumn = -1;
9699 long long int htmlStartLine = -1;
9700
9701 if (html.m_html) {
9702 std::tie(htmlStartColumn, htmlStartLine) =
9703 localPosFromVirgin<Trait>(fr, html.m_html->startColumn(), html.m_html->startLine());
9704 }
9705
9706 long long int localLine = (html.m_html ? htmlStartLine : fr.m_data.size() - 1);
9707
9708 if (html.m_html) {
9709 if (skipSpaces<Trait>(0, fr.m_data[localLine].first.asString()) >= htmlStartColumn) {
9710 --localLine;
9711 }
9712 }
9713
9714 const auto lastLine = fr.m_data[localLine].second.m_lineNumber;
9715
9716 const auto lastColumn = fr.m_data[localLine].first.virginPos(
9717 fr.m_data[localLine].first.length() ? fr.m_data[localLine].first.length() - 1 : 0);
9718
9719 item->setEndColumn(lastColumn);
9720 item->setEndLine(lastLine);
9721 parent->setEndColumn(lastColumn);
9722 parent->setEndLine(lastLine);
9723 }
9724 }
9725
9726 if (resItem) {
9727 *resItem = item;
9728 }
9729
9730 html.m_blocks.pop_back();
9731
9732 return line;
9733}
9734
9735template<class Trait>
9736inline long long int
9737Parser<Trait>::parseCode(MdBlock<Trait> &fr,
9738 std::shared_ptr<Block<Trait>> parent,
9739 bool collectRefLinks)
9740{
9741 const auto indent = skipSpaces<Trait>(0, fr.m_data.front().first.asString());
9742
9743 if (indent != fr.m_data.front().first.length()) {
9744 WithPosition startDelim, endDelim, syntaxPos;
9745 typename Trait::String syntax;
9746 isStartOfCode<Trait>(fr.m_data.front().first.asString(), &syntax, &startDelim, &syntaxPos);
9747 syntax = replaceEntity<Trait>(syntax);
9748 startDelim.setStartLine(fr.m_data.front().second.m_lineNumber);
9749 startDelim.setEndLine(startDelim.startLine());
9750 startDelim.setStartColumn(fr.m_data.front().first.virginPos(startDelim.startColumn()));
9751 startDelim.setEndColumn(fr.m_data.front().first.virginPos(startDelim.endColumn()));
9752
9753 if (syntaxPos.startColumn() != -1) {
9754 syntaxPos.setStartLine(startDelim.startLine());
9755 syntaxPos.setEndLine(startDelim.startLine());
9756 syntaxPos.setStartColumn(fr.m_data.front().first.virginPos(syntaxPos.startColumn()));
9757 syntaxPos.setEndColumn(fr.m_data.front().first.virginPos(syntaxPos.endColumn()));
9758 }
9759
9760 const long long int startPos = fr.m_data.front().first.virginPos(indent);
9761 const long long int emptyColumn = fr.m_data.front().first.virginPos(fr.m_data.front().first.length());
9762 const long long int startLine = fr.m_data.front().second.m_lineNumber;
9763 const long long int endPos = fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1);
9764 const long long int endLine = fr.m_data.back().second.m_lineNumber;
9765
9766 fr.m_data.erase(fr.m_data.cbegin());
9767
9768 {
9769 const auto it = std::prev(fr.m_data.cend());
9770
9771 if (it->second.m_lineNumber > -1) {
9772 endDelim.setStartColumn(it->first.virginPos(skipSpaces<Trait>(0, it->first.asString())));
9773 endDelim.setStartLine(it->second.m_lineNumber);
9774 endDelim.setEndLine(endDelim.startLine());
9775 endDelim.setEndColumn(it->first.virginPos(it->first.length() - 1));
9776 }
9777
9778 fr.m_data.erase(it);
9779 }
9780
9781 if (syntax.toLower() == Trait::latin1ToString("math")) {
9782 typename Trait::String math;
9783 bool first = true;
9784
9785 for (const auto &l : std::as_const(fr.m_data)) {
9786 if (!first) {
9787 math.push_back(Trait::latin1ToChar('\n'));
9788 }
9789
9790 math.push_back(l.first.virginSubString());
9791
9792 first = false;
9793 }
9794
9795 if (!collectRefLinks) {
9796 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
9797 p->setStartColumn(startPos);
9798 p->setStartLine(startLine);
9799 p->setEndColumn(endPos);
9800 p->setEndLine(endLine);
9801
9802 std::shared_ptr<Math<Trait>> m(new Math<Trait>);
9803
9804 if (!fr.m_data.empty()) {
9805 m->setStartColumn(fr.m_data.front().first.virginPos(0));
9806 m->setStartLine(fr.m_data.front().second.m_lineNumber);
9807 m->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
9808 m->setEndLine(fr.m_data.back().second.m_lineNumber);
9809 } else {
9810 m->setStartColumn(emptyColumn);
9811 m->setStartLine(startLine);
9812 m->setEndColumn(emptyColumn);
9813 m->setEndLine(startLine);
9814 }
9815
9816 m->setInline(false);
9817 m->setExpr(math);
9818 m->setStartDelim(startDelim);
9819 m->setEndDelim(endDelim);
9820 m->setSyntaxPos(syntaxPos);
9821 m->setFensedCode(true);
9822 p->appendItem(m);
9823
9824 parent->appendItem(p);
9825 }
9826 } else {
9827 return parseCodeIndentedBySpaces(fr, parent, collectRefLinks, indent, syntax, emptyColumn,
9828 startLine, true, startDelim, endDelim, syntaxPos);
9829 }
9830 }
9831
9832 return -1;
9833}
9834
9835template<class Trait>
9836inline long long int
9838 std::shared_ptr<Block<Trait>> parent,
9839 bool collectRefLinks,
9840 int indent,
9841 const typename Trait::String &syntax,
9842 long long int emptyColumn,
9843 long long int startLine,
9844 bool fensedCode,
9845 const WithPosition &startDelim,
9846 const WithPosition &endDelim,
9847 const WithPosition &syntaxPos)
9848{
9849 typename Trait::String code;
9850 long long int startPos = 0;
9851 bool first = true;
9852
9853 auto it = fr.m_data.begin(), lastIt = fr.m_data.end();
9854
9855 for (; it != lastIt; ++it) {
9856 if (it->second.m_mayBreakList) {
9857 lastIt = it;
9858 break;
9859 }
9860
9861 if (!collectRefLinks) {
9862 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9863 if (first) {
9864 startPos = ns;
9865 }
9866 first = false;
9867
9868 code.push_back((indent > 0 ? it->first.virginSubString(ns < indent ? ns : indent) +
9869 typename Trait::String(Trait::latin1ToChar('\n')) :
9870 typename Trait::String(it->first.virginSubString()) +
9871 typename Trait::String(Trait::latin1ToChar('\n'))));
9872 }
9873 }
9874
9875 if (!collectRefLinks) {
9876 if (!code.isEmpty()) {
9877 code.remove(code.length() - 1, 1);
9878 }
9879
9880 std::shared_ptr<Code<Trait>> codeItem(new Code<Trait>(code, fensedCode, false));
9881 codeItem->setSyntax(syntax);
9882 codeItem->setStartDelim(startDelim);
9883 codeItem->setEndDelim(endDelim);
9884 codeItem->setSyntaxPos(syntaxPos);
9885
9886 if (lastIt != fr.m_data.end() || (it == fr.m_data.end() && !fr.m_data.empty())) {
9887 codeItem->setStartColumn(fr.m_data.front().first.virginPos(startPos));
9888 codeItem->setStartLine(fr.m_data.front().second.m_lineNumber);
9889 auto tmp = std::prev(lastIt);
9890 codeItem->setEndColumn(tmp->first.virginPos(tmp->first.length() - 1));
9891 codeItem->setEndLine(tmp->second.m_lineNumber);
9892 } else {
9893 codeItem->setStartColumn(emptyColumn);
9894 codeItem->setStartLine(startLine);
9895 codeItem->setEndColumn(emptyColumn);
9896 codeItem->setEndLine(startLine);
9897 }
9898
9899 if (fensedCode) {
9900 parent->appendItem(codeItem);
9901 } else if (!parent->items().empty() && parent->items().back()->type() == ItemType::Code) {
9902 auto c = std::static_pointer_cast<Code<Trait>>(parent->items().back());
9903
9904 if (!c->isFensedCode()) {
9905 auto line = c->endLine();
9906 auto text = c->text();
9907
9908 for (; line < codeItem->startLine(); ++line) {
9909 text.push_back(Trait::latin1ToString("\n"));
9910 }
9911
9912 text.push_back(codeItem->text());
9913 c->setText(text);
9914 c->setEndColumn(codeItem->endColumn());
9915 c->setEndLine(codeItem->endLine());
9916 } else {
9917 parent->appendItem(codeItem);
9918 }
9919 } else {
9920 parent->appendItem(codeItem);
9921 }
9922 }
9923
9924 if (lastIt != fr.m_data.end()) {
9925 return lastIt->second.m_lineNumber;
9926 }
9927
9928 return -1;
9929}
9930
9931} /* namespace MD */
9932
9933#endif // MD4QT_MD_PARSER_HPP_INCLUDED
Abstract block (storage of child items).
Definition doc.h:604
const Items & items() const
Definition doc.h:630
Blockquote.
Definition doc.h:863
Code.
Definition doc.h:1296
Document.
Definition doc.h:1801
Footnote.
Definition doc.h:1754
Heading.
Definition doc.h:711
typename Trait::template Vector< WithPosition > Delims
Type of list of service chanracters.
Definition doc.h:721
Horizontal line.
Definition doc.h:365
Image.
Definition doc.h:1210
Base class for items that can have style options.
Definition doc.h:260
void setOpts(int o)
Set style options.
Definition doc.h:288
const Styles & closeStyles() const
Definition doc.h:306
const Styles & openStyles() const
Definition doc.h:294
int opts() const
Definition doc.h:282
typename Trait::template Vector< StyleDelim > Styles
Type of list of emphasis.
Definition doc.h:279
Base class for item in Markdown document.
Definition doc.h:178
virtual ItemType type() const =0
Line break.
Definition doc.h:571
List.
Definition doc.h:1066
Page break.
Definition doc.h:335
Paragraph.
Definition doc.h:680
void removeTextPlugin(int id)
Remove text plugin.
Definition parser.h:1494
friend struct PrivateAccess
Used in tests.
Definition parser.h:2109
~Parser()=default
void addTextPlugin(int id, TextPluginFunc< Trait > plugin, bool processInLinks, const typename Trait::StringList &userData)
Add text plugin.
Definition parser.h:1479
std::shared_ptr< Document< Trait > > parse(const typename Trait::String &fileName, bool recursive=true, const typename Trait::StringList &ext={Trait::latin1ToString("md"), Trait::latin1ToString("markdown")}, bool fullyOptimizeParagraphs=true)
Definition parser.h:2125
Raw HTML.
Definition doc.h:441
Wrapper for typename Trait::StringList to be behaved like a stream.
Definition parser.h:278
Trait::InternalString lineAt(long long int pos)
Definition parser.h:312
std::pair< typename Trait::InternalString, bool > readLine()
Definition parser.h:291
bool atEnd() const
Definition parser.h:286
void setLineNumber(long long int lineNumber)
Definition parser.h:322
long long int size() const
Definition parser.h:317
StringListStream(typename MdBlock< Trait >::Data &stream)
Definition parser.h:280
long long int currentStreamPos() const
Definition parser.h:307
long long int currentLineNumber() const
Definition parser.h:301
Emphasis in the Markdown document.
Definition doc.h:217
Table cell.
Definition doc.h:1499
Table row.
Definition doc.h:1530
Alignment
Alignment.
Definition doc.h:1637
@ AlignCenter
Center.
Definition doc.h:1643
@ AlignLeft
Left.
Definition doc.h:1639
@ AlignRight
Right.
Definition doc.h:1641
TextStream(QTextStream &stream)
Definition parser.h:2169
TextStream(std::istream &stream)
Definition parser.h:2256
Text item in Paragraph.
Definition doc.h:514
Wrapper for UChar32 to be used with MD::Parser.
Definition traits.h:465
Wrapper for icu::UnicodeString to be used with MD::Parser.
Definition traits.h:600
void push_back(const UnicodeChar &ch)
Definition traits.h:648
std::vector< UnicodeString > split(const UnicodeChar &ch) const
Definition traits.h:729
bool isRelative() const
Definition traits.h:866
UnicodeString scheme() const
Definition traits.h:871
UnicodeString host() const
Definition traits.h:876
bool isValid() const
Definition traits.h:861
Base for any thing with start and end position.
Definition doc.h:77
void setEndColumn(long long int c)
Set end column.
Definition doc.h:138
long long int startColumn() const
Definition doc.h:102
void setStartColumn(long long int c)
Set start column.
Definition doc.h:126
long long int startLine() const
Definition doc.h:108
long long int endColumn() const
Definition doc.h:114
long long int endLine() const
Definition doc.h:120
Q_SCRIPTABLE QString start(QString train="")
Q_SCRIPTABLE Q_NOREPLY void start()
Type type(const QSqlDatabase &db)
QAction * quit(const QObject *recvr, const char *slot, QObject *parent)
QAction * end(const QObject *recvr, const char *slot, QObject *parent)
KIOCORE_EXPORT CopyJob * link(const QList< QUrl > &src, const QUrl &destDir, JobFlags flags=DefaultFlags)
QList< QVariant > parse(const QString &message, const QDateTime &externalIssueDateTime=QDateTime())
QString path(const QString &relativePath)
KIOCORE_EXPORT QStringList list(const QString &fileClass)
KGuiItem open()
KGuiItem back(BidiMode useBidi=IgnoreRTL)
QString label(StandardShortcut id)
Definition algo.h:17
TextOption
Text option.
Definition doc.h:200
@ ItalicText
Italic text.
Definition doc.h:206
@ StrikethroughText
Strikethrough.
Definition doc.h:208
@ TextWithoutFormat
No format.
Definition doc.h:202
@ BoldText
Bold text.
Definition doc.h:204
bool isOrderedList(const typename Trait::String &s, int *num=nullptr, int *len=nullptr, typename Trait::Char *delim=nullptr, bool *isFirstLineEmpty=nullptr)
Definition parser.h:134
Trait::String paragraphToLabel(Paragraph< Trait > *p)
Convert Paragraph to label.
Definition parser.h:3715
std::pair< long long int, long long int > prevPosition(const MdBlock< Trait > &fr, long long int pos, long long int line)
Definition parser.h:4187
bool isValidUrl< UnicodeStringTrait >(const UnicodeString &url)
Definition parser.h:1255
bool isMult3(long long int i1, long long int i2)
Definition parser.h:7446
std::pair< long long int, long long int > nextPosition(const MdBlock< Trait > &fr, long long int pos, long long int line)
Definition parser.h:4210
bool checkForEndHtmlComments(const typename Trait::String &line, long long int pos)
Definition parser.h:2352
bool isH1(const typename Trait::String &s)
Definition parser.h:4171
bool isEmail(const typename Trait::String &url)
Definition parser.h:1130
bool isLineBreak(const typename Trait::String &s)
Definition parser.h:4477
TextOption styleToTextOption(Style s)
Definition parser.h:878
std::shared_ptr< Text< Trait > > concatenateText(typename Block< Trait >::Items::const_iterator it, typename Block< Trait >::Items::const_iterator last)
Concatenate texts in block.
Definition parser.h:7996
bool isH(const typename Trait::String &s, const typename Trait::Char &c)
Definition parser.h:4138
std::tuple< bool, long long int, typename Trait::Char, bool > listItemData(const typename Trait::String &s, bool wasText)
Definition parser.h:9067
bool isGitHubAutolink< QStringTrait >(const QString &url)
Definition parser.h:1239
bool isSemiOptimization(OptimizeParagraphType t)
Definition parser.h:8036
long long int skipSpaces(long long int i, const typename Trait::String &line)
Skip spaces in line from position i.
Definition parser.h:71
Trait::InternalString prepareTableData(typename Trait::InternalString s)
Prepare data in table cell for parsing.
Definition parser.h:3930
void makeTextObject(const typename Trait::String &text, TextParsingOpts< Trait > &po, long long int startPos, long long int startLine, long long int endPos, long long int endLine, bool doRemoveSpacesAtEnd=false)
Make text item.
Definition parser.h:4524
static const Trait::String s_canBeEscaped
Characters that can be escaped.
Definition parser.h:469
int isTableHeader(const typename Trait::String &s)
Definition parser.h:3599
void initLastItemWithOpts(TextParsingOpts< Trait > &po, std::shared_ptr< ItemWithOpts< Trait > > item)
Initialize item with style information and set it as last item.
Definition parser.h:4513
std::tuple< long long int, long long int, bool, typename Trait::String, long long int > readLinkDestination(long long int line, long long int pos, const TextParsingOpts< Trait > &po, WithPosition *urlPos=nullptr)
Read link's destination.
Definition parser.h:6705
Trait::StringList splitString(const typename Trait::String &str, const typename Trait::Char &ch)
Split string.
void removeSpacesAtEnd(String &s)
Remove spaces at the end of string s.
Definition parser.h:99
bool isListItemAndNotNested(const typename Trait::String &s, long long int indent)
Definition parser.h:9024
std::pair< bool, bool > readUnquotedHtmlAttrValue(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr)
Read HTML attribute value.
Definition parser.h:4789
static const char * s_startComment
Starting HTML comment string.
Definition parser.h:47
long long int processGitHubAutolinkExtension(std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, long long int idx)
Process GitHub autolinks for the text with index idx.
Definition parser.h:1279
void setLastPos(std::shared_ptr< Item< Trait > > item, long long int pos, long long int line)
Set last position of the item.
Definition parser.h:9117
long long int lastNonSpacePos(const String &line)
Definition parser.h:85
std::shared_ptr< Paragraph< Trait > > optimizeParagraph(std::shared_ptr< Paragraph< Trait > > &p, TextParsingOpts< Trait > &po, OptimizeParagraphType type=OptimizeParagraphType::Full)
Optimize Paragraph.
Definition parser.h:8065
WithPosition findAndRemoveClosingSequence(typename Trait::InternalString &s)
Find and remove closing sequence of "#" in heading.
Definition parser.h:3770
std::shared_ptr< Paragraph< Trait > > splitParagraphsAndFreeHtml(std::shared_ptr< Block< Trait > > parent, std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, bool collectRefLinks, bool fullyOptimizeParagraphs=true)
Split Paragraph and free HTML.
Definition parser.h:8179
bool isFootnote(const typename Trait::String &s)
Definition parser.h:337
void githubAutolinkPlugin(std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, const typename Trait::StringList &)
GitHub autolinks plugin.
Definition parser.h:1411
void replaceTabs(typename Trait::InternalString &s)
Replace tabs with spaces (just for internal simpler use).
Definition parser.h:2499
bool isCodeFences(const typename Trait::String &s, bool closing=false)
Definition parser.h:377
long long int lineBreakLength(const typename Trait::String &s)
Definition parser.h:4493
bool indentInList(const std::vector< long long int > *indents, long long int indent, bool codeIndentedBySpaces)
Definition parser.h:51
std::pair< long long int, long long int > localPosFromVirgin(const MdBlock< Trait > &fr, long long int virginColumn, long long int virginLine)
Definition parser.h:1093
OptimizeParagraphType
Type of the paragraph's optimization.
Definition parser.h:830
@ Semi
Semi optimization, optimization won't concatenate text items if style delimiters will be in the middl...
Definition parser.h:835
@ SemiWithoutRawData
Semi optimization, but raw text data won't be concatenated (will be untouched).
Definition parser.h:839
@ Full
Full optimization.
Definition parser.h:832
@ FullWithoutRawData
Full optimization, but raw text data won't be concatenated (will be untouched).
Definition parser.h:837
std::shared_ptr< Paragraph< Trait > > makeParagraph(typename Block< Trait >::Items::const_iterator first, typename Block< Trait >::Items::const_iterator last)
Make Paragraph.
Definition parser.h:8159
bool isH2(const typename Trait::String &s)
Definition parser.h:4179
Trait::String readEscapedSequence(long long int i, const typename Trait::String &str, long long int *endPos=nullptr)
Skip escaped sequence of characters till first space.
Definition parser.h:432
std::function< void(std::shared_ptr< Paragraph< Trait > >, TextParsingOpts< Trait > &, const typename Trait::StringList &)> TextPluginFunc
Functor type for text plugin.
Definition parser.h:906
bool isGitHubAutolink< UnicodeStringTrait >(const UnicodeString &url)
Definition parser.h:1264
void normalizePos(long long int &pos, long long int &line, long long int length, long long int linesCount)
Normalize position.
Definition parser.h:8145
std::pair< bool, bool > readHtmlAttrValue(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr)
Read HTML attribute value.
Definition parser.h:4813
std::pair< long long int, long long int > calculateIndent(const typename Trait::String &s, long long int p)
Definition parser.h:9058
void makeHorLine(const typename MdBlock< Trait >::Line &line, std::shared_ptr< Block< Trait > > parent)
Make horizontal line.
Definition parser.h:8506
long long int emptyLinesBeforeCount(typename MdBlock< Trait >::Data::iterator begin, typename MdBlock< Trait >::Data::iterator it)
Definition parser.h:251
void makeText(long long int lastLine, long long int lastPos, TextParsingOpts< Trait > &po)
Make text item.
Definition parser.h:4657
std::tuple< bool, long long int, long long int, bool, typename Trait::String > isHtmlTag(long long int line, long long int pos, TextParsingOpts< Trait > &po, int rule)
Definition parser.h:5042
void resetHtmlTag(RawHtmlBlock< Trait > &html, TextParsingOpts< Trait > *po=nullptr)
Reset pre-stored HTML.
Definition parser.h:1006
bool isStartOfCode(const typename Trait::String &str, typename Trait::String *syntax=nullptr, WithPosition *delim=nullptr, WithPosition *syntaxPos=nullptr)
Definition parser.h:503
Trait::String stringToLabel(const typename Trait::String &s)
Convert string to label.
Definition parser.h:3694
UnicodeStringTrait::StringList splitString< UnicodeStringTrait >(const UnicodeString &str, const UnicodeChar &ch)
Definition parser.h:665
int isTableAlignment(const typename Trait::String &s)
Definition parser.h:686
bool isColumnAlignment(const typename Trait::String &s)
Definition parser.h:613
void makeHeading(std::shared_ptr< Block< Trait > > parent, std::shared_ptr< Document< Trait > > doc, std::shared_ptr< Paragraph< Trait > > p, long long int lastColumn, long long int lastLine, int level, const typename Trait::String &workingPath, const typename Trait::String &fileName, bool collectRefLinks, const WithPosition &delim, TextParsingOpts< Trait > &po)
Make heading.
Definition parser.h:8275
void checkForTableInParagraph(TextParsingOpts< Trait > &po, long long int lastLine)
Check for table in paragraph.
Definition parser.h:4627
std::tuple< long long int, long long int, bool, typename Trait::String, long long int > readLinkTitle(long long int line, long long int pos, const TextParsingOpts< Trait > &po)
Read link's title.
Definition parser.h:6822
bool isOnlyHtmlTagsAfterOrClosedRule1(long long int line, long long int pos, TextParsingOpts< Trait > &po, int rule)
Definition parser.h:4970
void checkForTextPlugins(std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, const TextPluginsMap< Trait > &textPlugins, bool inLink)
Process text plugins.
Definition parser.h:8488
Style
Emphasis type.
Definition parser.h:861
@ Bold2
"__"
Definition parser.h:869
@ Unknown
Unknown.
Definition parser.h:873
@ Strikethrough
"~"
Definition parser.h:871
@ Bold1
"**"
Definition parser.h:867
@ Italic1
"*"
Definition parser.h:863
@ Italic2
"_"
Definition parser.h:865
Trait::String virginSubstr(const MdBlock< Trait > &fr, const WithPosition &virginPos)
Definition parser.h:1026
std::map< int, std::tuple< TextPluginFunc< Trait >, bool, typename Trait::StringList > > TextPluginsMap
Type of the map of text plugins.
Definition parser.h:916
void eatRawHtml(long long int line, long long int pos, long long int toLine, long long int toPos, TextParsingOpts< Trait > &po, bool finish, int htmlRule, bool onLine, bool continueEating=false)
Read HTML data.
Definition parser.h:5248
void skipSpacesInHtml(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr)
Skip spaces.
Definition parser.h:4770
@ Document
Document.
Definition doc.h:56
@ FootnoteRef
Footnote ref.
Definition doc.h:52
@ Table
Table.
Definition doc.h:50
@ PageBreak
Page break.
Definition doc.h:58
@ Link
Link.
Definition doc.h:40
@ Text
Text.
Definition doc.h:28
@ Anchor
Anchor.
Definition doc.h:60
@ Math
Math expression.
Definition doc.h:66
@ ListItem
List item.
Definition doc.h:36
@ Image
Image.
Definition doc.h:42
@ Code
Code.
Definition doc.h:44
@ RawHtml
Raw HTML.
Definition doc.h:64
@ LineBreak
Line break.
Definition doc.h:32
@ Paragraph
Paragraph.
Definition doc.h:30
void appendCloseStyle(TextParsingOpts< Trait > &po, const StyleDelim &s)
Append close style.
Definition parser.h:7757
void makeTextObjectWithLineBreak(const typename Trait::String &text, TextParsingOpts< Trait > &po, long long int startPos, long long int startLine, long long int endPos, long long int endLine)
Make text item with line break.
Definition parser.h:4594
std::pair< typename Trait::InternalStringList, std::vector< long long int > > splitTableRow(const typename Trait::InternalString &s)
Split table's row on cells.
Definition parser.h:3940
bool isSetextHeadingBetween(const TextParsingOpts< Trait > &po, long long int startLine, long long int endLine)
Definition parser.h:5023
long long int textAtIdx(std::shared_ptr< Paragraph< Trait > > p, size_t idx)
Definition parser.h:8467
long long int listLevel(const std::vector< long long int > &indents, long long int pos)
Definition parser.h:3373
long long int posOfListItem(const typename Trait::String &s, bool ordered)
Definition parser.h:3331
bool isGitHubAutolink(const typename Trait::String &url)
bool isHorizontalLine(const typename Trait::String &s)
Definition parser.h:570
bool isValidUrl(const typename Trait::String &url)
Trait::String startSequence(const typename Trait::String &line)
Definition parser.h:111
bool isValidUrl< QStringTrait >(const QString &url)
Definition parser.h:1230
TextPlugin
ID of text plugin.
Definition parser.h:847
@ UnknownPluginID
Unknown plugin.
Definition parser.h:849
@ UserDefinedPluginID
First user defined plugin ID.
Definition parser.h:853
@ GitHubAutoLinkPluginID
GitHub's autolinks plugin.
Definition parser.h:851
void applyStyles(int &opts, std::vector< typename TextParsingOpts< Trait >::StyleInfo > &styles)
Apply styles.
Definition parser.h:7323
long long int lastVirginPositionInParagraph(Item< Trait > *item)
Definition parser.h:8235
void skipSpacesUpTo1Line(long long int &line, long long int &pos, const typename MdBlock< Trait >::Data &fr)
Skip space in the block up to 1 new line.
Definition parser.h:6690
bool isHtmlComment(const typename Trait::String &s)
Definition parser.h:702
void resolveLinks(typename Trait::StringList &linksToParse, std::shared_ptr< Document< Trait > > doc)
Resolve links in the document.
Definition parser.h:3231
QStringTrait::StringList splitString< QStringTrait >(const QString &str, const QChar &ch)
Definition parser.h:676
bool isWithoutRawDataOptimization(OptimizeParagraphType t)
Definition parser.h:8050
Trait::String replaceEntity(const typename Trait::String &s)
Replace entities in the string with corresponding character.
Definition parser.h:735
static const std::map< typename Trait::String, const char16_t * > s_entityMap
String removeBackslashes(const String &s)
Remove backslashes from the string.
Definition parser.h:475
Trait::String removeLineBreak(const typename Trait::String &s)
Remove line break from the end of string.
Definition parser.h:4501
void updateLastPosInList(const RawHtmlBlock< Trait > &html)
Update last position of all parent.
Definition parser.h:9128
void closeStyle(std::vector< typename TextParsingOpts< Trait >::StyleInfo > &styles, Style s)
Close style.
Definition parser.h:7308
std::pair< typename Trait::String, WithPosition > findAndRemoveHeaderLabel(typename Trait::InternalString &s)
Find and remove heading label.
Definition parser.h:3664
std::pair< bool, bool > readHtmlAttr(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr, bool checkForSpace)
Read HTML attribute.
Definition parser.h:4870
void checkForHtmlComments(const typename Trait::InternalString &line, StringListStream< Trait > &stream, MdLineData::CommentDataMap &res)
Collect information about HTML comments.
Definition parser.h:2367
bool isEmpty() const const
void push_back(parameter_type value)
void clear()
QString first(qsizetype n) const const
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
qsizetype length() const const
void push_back(QChar ch)
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString sliced(qsizetype pos) const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QString toCaseFolded() const const
QString toLower() const const
QString toUpper() const const
const QChar * unicode() const const
bool contains(QLatin1StringView str, Qt::CaseSensitivity cs) const const
qsizetype size() const const
SkipEmptyParts
QString host(ComponentFormattingOptions options) const const
bool isRelative() const const
bool isValid() const const
QString scheme() const const
Internal structure for block of text in Markdown.
Definition parser.h:240
Data m_data
Definition parser.h:244
typename Trait::template Vector< Line > Data
Definition parser.h:242
std::pair< typename Trait::InternalString, MdLineData > Line
Definition parser.h:241
long long int m_emptyLinesBefore
Definition parser.h:245
bool m_emptyLineAfter
Definition parser.h:246
Internal structure for auxiliary information about a line in Markdown.
Definition parser.h:224
long long int m_lineNumber
Definition parser.h:225
std::pair< char, bool > CommentData
Definition parser.h:226
bool m_mayBreakList
Definition parser.h:231
std::map< long long int, CommentData > CommentDataMap
Definition parser.h:227
CommentDataMap m_htmlCommentData
Definition parser.h:229
Trait to use this library with QString.
Definition traits.h:1017
QStringList StringList
Definition traits.h:1034
Internal structure for pre-storing HTML.
Definition parser.h:195
int m_htmlBlockType
Definition parser.h:202
std::unordered_map< std::shared_ptr< Block< Trait > >, SequenceOfBlock > m_toAdjustLastPos
Definition parser.h:201
SequenceOfBlock m_blocks
Definition parser.h:200
std::vector< std::pair< std::shared_ptr< Block< Trait > >, long long int > > SequenceOfBlock
Definition parser.h:199
std::shared_ptr< RawHtml< Trait > > m_html
Definition parser.h:196
std::shared_ptr< Block< Trait > > findParent(long long int indent) const
Definition parser.h:207
bool m_continueHtml
Definition parser.h:203
std::shared_ptr< Block< Trait > > m_topParent
Definition parser.h:198
std::shared_ptr< Block< Trait > > m_parent
Definition parser.h:197
Internal structure for auxiliary options for parser.
Definition parser.h:926
bool shouldStopParsing() const
Definition parser.h:970
RawHtmlBlock< Trait > & m_html
Definition parser.h:936
std::shared_ptr< Document< Trait > > m_doc
Definition parser.h:930
long long int m_pos
Definition parser.h:986
bool m_checkLineOnNewType
Definition parser.h:940
ItemWithOpts< Trait >::Styles m_openStyles
Definition parser.h:1000
void concatenateAuxText(long long int start, long long int end)
Definition parser.h:954
Trait::StringList & m_linksToParse
Definition parser.h:931
bool m_firstInParagraph
Definition parser.h:942
long long int m_lastTextPos
Definition parser.h:989
long long int m_line
Definition parser.h:985
Trait::String m_fileName
Definition parser.h:933
long long int m_startTableLine
Definition parser.h:987
std::shared_ptr< ItemWithOpts< Trait > > m_lastItemWithStyle
Definition parser.h:1001
std::shared_ptr< Block< Trait > > m_parent
Definition parser.h:928
std::shared_ptr< RawHtml< Trait > > m_tmpHtml
Definition parser.h:929
std::shared_ptr< Text< Trait > > m_lastText
Definition parser.h:938
const TextPluginsMap< Trait > & m_textPlugins
Definition parser.h:937
MdBlock< Trait > & m_fr
Definition parser.h:927
Trait::String m_workingPath
Definition parser.h:932
Detected m_detected
Definition parser.h:967
std::vector< StyleInfo > m_styles
Definition parser.h:999
std::vector< TextData > m_rawTextData
Definition parser.h:951
long long int m_lastTextLine
Definition parser.h:988
Trait to use this library with std::string.
Definition traits.h:893
std::vector< String > StringList
Definition traits.h:908
static bool fileExists(const String &fileName, const String &workingPath)
Definition traits.h:953
static bool isFreeTag(std::shared_ptr< RawHtml< Trait > > html)
Definition parser.h:4247
static void setFreeTag(std::shared_ptr< RawHtml< Trait > > html, bool on)
Definition parser.h:4253
#define MD_DISABLE_COPY(Class)
Macro for disabling copy.
Definition utils.h:17
#define MD_UNUSED(x)
Avoid "unused parameter" warnings.
Definition utils.h:26
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Mar 7 2025 11:46:51 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.