6#ifndef MD4QT_MD_PARSER_HPP_INCLUDED
7#define MD4QT_MD_PARSER_HPP_INCLUDED
15#ifdef MD4QT_QT_SUPPORT
24#ifdef MD4QT_ICU_STL_SUPPORT
40#include <unordered_map>
53 bool codeIndentedBySpaces)
55 if (indents && !indents->empty()) {
56 return (std::find_if(indents->cbegin(),
58 [indent, codeIndentedBySpaces](
const auto &v) {
59 return (indent >= v && (codeIndentedBySpaces ?
60 true : indent <= v + 3));
71skipSpaces(
long long int i,
const typename Trait::String &line)
73 const auto length = line.length();
75 while (i < length && line[i].isSpace()) {
88 long long int i = s.length() - 1;
91 if (!s[i].isSpace()) {
96 if (i != s.length() - 1) {
97 s.remove(i + 1, s.length() - i - 1);
106 long long int i = line.length() - 1;
108 while (i > 0 && line[i].isSpace()) {
117inline typename Trait::String
122 if (pos >= line.length()) {
126 const auto sch = line[pos];
127 const auto start = pos;
131 while (pos < line.length() && line[pos] == sch) {
144 typename Trait::Char *delim =
nullptr,
145 bool *isFirstLineEmpty =
nullptr)
149 long long int dp = p;
151 for (; p < s.size(); ++p) {
152 if (!s[p].isDigit()) {
157 if (dp != p && p < s.size()) {
158 const auto digits = s.sliced(dp, p - dp);
160 if (digits.size() > 9) {
164 const auto i = digits.toInt();
174 if (s[p] == Trait::latin1ToChar(
'.') || s[p] == Trait::latin1ToChar(
')')) {
183 if (isFirstLineEmpty) {
184 *isFirstLineEmpty = (tmp == s.size());
187 if ((p < s.size() && s[p] == Trait::latin1ToChar(
' ')) || p == s.size()) {
203 std::shared_ptr<RawHtml<Trait>>
m_html = {};
206 using SequenceOfBlock = std::vector<std::pair<std::shared_ptr<Block<Trait>>,
long long int>>;
213 std::shared_ptr<Block<Trait>>
216 for (
auto it =
m_blocks.crbegin(), last =
m_blocks.crend(); it != last; ++it) {
217 if (indent >= it->second) {
257 using Line = std::pair<typename Trait::InternalString, MdLineData>;
258 using Data =
typename Trait::template Vector<Line>;
282 return (m_pos >= (
long long int)m_stream.size());
286 return m_stream.at(m_pos++).first;
290 return (m_pos <
size() ? m_stream.at(m_pos).second.m_lineNumber :
size());
292 typename Trait::InternalString
lineAt(
long long int pos)
294 return m_stream.at(pos).first;
298 return m_stream.size();
307checkStack(std::vector<std::pair<std::pair<long long int, bool>,
int>> &s,
308 const std::pair<std::pair<long long int, bool>,
int> &v,
311 int value = -v.first.first;
313 for (
long long int i = s.size() - 1; i >= 0; --i) {
314 if (s[i].second == v.second && s[i].first.first > 0) {
316 if (!((s[i].first.second || v.first.second) &&
317 (s[i].first.first + value) % 3 == 0 &&
318 !(s[i].first.first % 3 == 0 && value % 3 == 0))) {
319 if (s[i].first.first - value <= 0) {
320 if (i == (
long long int)idx) {
324 value -= s[i].first.first;
326 s.erase(s.cbegin() + i, s.cend());
332 s[i].first.first -= value;
334 s.erase(s.cbegin() + i + 1, s.cend());
356 if (s.size() - p < 5) {
360 if (s[p++] != Trait::latin1ToChar(
'[')) {
364 if (s[p++] != Trait::latin1ToChar(
'^')) {
368 if (s[p] == Trait::latin1ToChar(
']') || s[p].isSpace()) {
372 for (; p < s.size(); ++p) {
373 if (s[p] == Trait::latin1ToChar(
']')) {
375 }
else if (s[p].isSpace()) {
382 if (p < s.size() && s[p] == Trait::latin1ToChar(
':')) {
396 if (p > 3 || p == s.length()) {
400 const auto ch = s[p];
402 if (ch != Trait::latin1ToChar(
'~') && ch != Trait::latin1ToChar(
'`')) {
411 for (; p < s.length(); ++p) {
412 if (s[p].isSpace()) {
414 }
else if (s[p] == ch) {
415 if (space && (closing ?
true : ch == Trait::latin1ToChar(
'`'))) {
422 }
else if (closing) {
433 if (ch == Trait::latin1ToChar(
'`')) {
434 for (; p < s.length(); ++p) {
435 if (s[p] == Trait::latin1ToChar(
'`')) {
446inline typename Trait::String
448 const typename Trait::String &str,
449 long long int *endPos =
nullptr)
451 bool backslash =
false;
452 const auto start = i;
454 if (
start >= str.length()) {
458 while (i < str.length()) {
461 if (str[i] == Trait::latin1ToChar(
'\\') && !backslash) {
464 }
else if (str[i].isSpace() && !backslash) {
485 Trait::latin1ToString(
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
488template<
class String,
class Trait>
493 bool backslash =
false;
494 long long int extra = 0;
496 for (
long long int i = 0; i < s.length(); ++i) {
499 if (s[i] == Trait::latin1ToChar(
'\\') && !backslash && i != s.length() - 1) {
503 r.remove(i - extra - 1, 1);
519 typename Trait::String *syntax =
nullptr,
526 delim->setStartColumn(p);
533 if (str.size() - p < 3) {
537 const bool c96 = str[p] == Trait::latin1ToChar(
'`');
538 const bool c126 = str[p] == Trait::latin1ToChar(
'~');
544 while (p < str.length()) {
545 if (str[p] != (c96 ? Trait::latin1ToChar(
'`') : Trait::latin1ToChar(
'~'))) {
554 delim->setEndColumn(p - 1);
563 long long int endSyntaxPos = p;
565 if (p < str.size()) {
570 syntaxPos->setStartColumn(p);
571 syntaxPos->setEndColumn(endSyntaxPos);
591 typename Trait::Char c;
593 if (s[0] == Trait::latin1ToChar(
'*')) {
594 c = Trait::latin1ToChar(
'*');
595 }
else if (s[0] == Trait::latin1ToChar(
'-')) {
596 c = Trait::latin1ToChar(
'-');
597 }
else if (s[0] == Trait::latin1ToChar(
'_')) {
598 c = Trait::latin1ToChar(
'_');
604 long long int count = 1;
606 for (; p < s.size(); ++p) {
607 if (s[p] != c && !s[p].isSpace()) {
609 }
else if (s[p] == c) {
632 static const typename Trait::String s_legitime = Trait::latin1ToString(
":-");
634 if (p >= s.length()) {
638 if (!s_legitime.contains(s[p])) {
642 if (s[p] == Trait::latin1ToChar(
':')) {
646 for (; p < s.size(); ++p) {
647 if (s[p] != Trait::latin1ToChar(
'-')) {
656 if (s[p] != Trait::latin1ToChar(
':') && !s[p].isSpace()) {
662 for (; p < s.size(); ++p) {
663 if (!s[p].isSpace()) {
673typename Trait::StringList
674splitString(
const typename Trait::String &str,
const typename Trait::Char &ch);
676#ifdef MD4QT_ICU_STL_SUPPORT
682 return str.
split(ch);
687#ifdef MD4QT_QT_SUPPORT
705 for (
const auto &c : columns) {
711 return columns.size();
727 long long int p = -1;
728 bool endFound =
false;
730 while ((p = c.indexOf(Trait::latin1ToString(
"--"), p + 1)) > -1) {
731 if (c.size() > p + 2 && c[p + 2] == Trait::latin1ToChar(
'>')) {
737 }
else if (p - 2 >= 0 && c.sliced(p - 2, 4) == Trait::latin1ToString(
"<!--")) {
739 }
else if (c.size() > p + 3 && c.sliced(p, 4) == Trait::latin1ToString(
"--!>")) {
749inline typename Trait::String
752 long long int p1 = 0;
754 typename Trait::String res;
757 while ((p1 = s.indexOf(Trait::latin1ToChar(
'&'), p1)) != -1) {
758 if (p1 > 0 && s[p1 - 1] == Trait::latin1ToChar(
'\\')) {
764 const auto p2 = s.indexOf(Trait::latin1ToChar(
';'), p1);
767 const auto en = s.sliced(p1, p2 - p1 + 1);
769 if (en.size() > 2 && en[1] == Trait::latin1ToChar(
'#')) {
770 if (en.size() > 3 && en[2].toLower() == Trait::latin1ToChar(
'x')) {
771 const auto hex = en.sliced(3, en.size() - 4);
773 if (hex.size() <= 6 && hex.size() > 0) {
776 const char32_t c = hex.toInt(&ok, 16);
779 res.push_back(s.sliced(i, p1 - i));
783 Trait::appendUcs4(res, c);
785 res.push_back(
typename Trait::Char(0xFFFD));
790 const auto dec = en.sliced(2, en.size() - 3);
792 if (dec.size() <= 7 && dec.size() > 0) {
795 const char32_t c = dec.toInt(&ok, 10);
798 res.push_back(s.sliced(i, p1 - i));
802 Trait::appendUcs4(res, c);
804 res.push_back(
typename Trait::Char(0xFFFD));
813 res.push_back(s.sliced(i, p1 - i));
815 res.push_back(Trait::utf16ToString(it->second));
825 res.push_back(s.sliced(i, s.size() - i));
837 for (
auto &line : tmp) {
917struct TextParsingOpts;
923 const typename Trait::StringList &)>;
933 typename Trait::StringList>>;
945 std::shared_ptr<Document<Trait>>
m_doc;
970 for (
auto i =
start + 1; i < end; ++i) {
1003 std::vector<std::pair<Style, long long int>>
m_styles = {};
1013template<
class Trait>
1014inline typename Trait::String
1021 long long int startLine = virginPos.
startLine() < fr.
m_data.at(0).second.m_lineNumber ?
1022 (virginPos.
endLine() < fr.
m_data.at(0).second.m_lineNumber ? -1 : 0) :
1025 if (startLine >=
static_cast<long long int>(fr.
m_data.size()) || startLine < 0) {
1029 auto spos = virginPos.
startColumn() - fr.
m_data.at(startLine).first.virginPos(0);
1035 long long int epos = 0;
1040 if (startLine + linesCount >
static_cast<long long int>(fr.
m_data.size())) {
1041 linesCount = fr.
m_data.size() - startLine - 1;
1042 epos = fr.
m_data.back().first.length();
1044 epos = virginPos.
endColumn() - fr.
m_data.at(linesCount + startLine).first.virginPos(0) + 1;
1051 if (epos > fr.
m_data.at(linesCount + startLine).first.length()) {
1052 epos = fr.
m_data.at(linesCount + startLine).first.length();
1055 typename Trait::String str =
1056 (linesCount ? fr.
m_data.at(startLine).first.sliced(spos).asString() :
1057 fr.
m_data.at(startLine).first.sliced(spos, epos - spos).asString());
1059 long long int i = startLine + 1;
1061 for (; i < startLine + linesCount; ++i) {
1062 str.push_back(Trait::latin1ToString(
"\n"));
1063 str.push_back(fr.
m_data.at(i).first.asString());
1067 str.push_back(Trait::latin1ToString(
"\n"));
1068 str.push_back(fr.
m_data.at(i).first.sliced(0, epos).asString());
1080template<
class Trait>
1081inline std::pair<long long int, long long int>
1088 if (fr.
m_data.front().second.m_lineNumber > virginLine ||
1089 fr.
m_data.back().second.m_lineNumber < virginLine) {
1093 auto line = virginLine - fr.
m_data.front().second.m_lineNumber;
1095 if (fr.
m_data.at(line).first.isEmpty()) {
1099 const auto vzpos = fr.
m_data.at(line).first.virginPos(0);
1101 if (vzpos > virginColumn || virginColumn > vzpos + fr.
m_data.at(line).first.length() - 1) {
1105 return {virginColumn - vzpos, line};
1117template<
class Trait>
1121 auto isAllowed = [](
const typename Trait::Char &ch) ->
bool {
1122 const auto unicode = ch.unicode();
1123 return ((unicode >= 48 && unicode <= 57) || (unicode >= 97 && unicode <= 122) ||
1124 (unicode >= 65 && unicode <= 90));
1127 auto isAdditional = [](
const typename Trait::Char &ch) ->
bool {
1128 const auto unicode = ch.unicode();
1129 return (unicode == 33 || (unicode >= 35 && unicode <= 39) ||
1130 unicode == 42 || unicode == 43 || (unicode >= 45 && unicode <= 47) ||
1131 unicode == 61 || unicode == 63 || (unicode >= 94 && unicode <= 96) ||
1132 (unicode >= 123 && unicode <= 126));
1135 static const auto s_delim = Trait::latin1ToChar(
'-');
1136 static const auto s_dog = Trait::latin1ToChar(
'@');
1137 static const auto s_dot = Trait::latin1ToChar(
'.');
1139 long long int i = (url.startsWith(Trait::latin1ToString(
"mailto:")) ? 7 : 0);
1140 const auto dogPos = url.indexOf(s_dog, i);
1147 for (; i < dogPos; ++i) {
1148 if (!isAllowed(url[i]) && !isAdditional(url[i])) {
1153 auto checkToDot = [&](
long long int start,
long long int dotPos) ->
bool {
1154 static const long long int maxlen = 63;
1156 if (dotPos -
start > maxlen ||
1157 start + 1 > dotPos ||
1158 start >= url.length() ||
1159 dotPos > url.length()) {
1163 if (url[
start] == s_delim) {
1167 if (url[dotPos - 1] == s_delim) {
1172 if (!isAllowed(url[
start]) && url[
start] != s_delim) {
1180 long long int dotPos = url.indexOf(s_dot, dogPos + 1);
1185 while (dotPos != -1) {
1186 if (!checkToDot(i, dotPos)) {
1191 dotPos = url.indexOf(s_dot, i);
1194 if (!checkToDot(i, url.length())) {
1206template<
class Trait>
1211template<
class Trait>
1215#ifdef MD4QT_QT_SUPPORT
1240#ifdef MD4QT_ICU_STL_SUPPORT
1258 && ((!u.
scheme().isEmpty() && !u.
host().isEmpty())
1259 || (url.startsWith(
UnicodeString(
"www.")) && url.length() >= 7 &&
1266template<
class Trait>
1272 if (idx < 0 || idx >= (
long long int)po.
m_rawTextData.size()) {
1276 static const auto s_delims = Trait::latin1ToString(
"*_~()<>");
1279 long long int j = 0;
1280 auto end =
typename Trait::Char(0x00);
1281 bool skipSpace =
true;
1282 long long int ret = idx;
1284 while (s.m_str.length()) {
1285 long long int i = 0;
1286 end =
typename Trait::Char(0x00);
1288 for (; i < s.m_str.length(); ++i) {
1290 if (s.m_str[i] == Trait::latin1ToChar(
'(')) {
1291 end = Trait::latin1ToChar(
')');
1294 if (s_delims.indexOf(s.m_str[i]) == -1 && !s.m_str[i].isSpace()) {
1299 if (s.m_str[i].isSpace() || i == s.m_str.length() - 1 || s.m_str[i] == end) {
1300 auto tmp = s.m_str.sliced(j, i - j +
1301 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1303 skipSpace = s.m_str[i].isSpace();
1310 if (ti >= 0 && ti <
static_cast<long long int>(p->items().size())) {
1312 const auto opts = std::static_pointer_cast<Text<Trait>>(p->items().at(ti))->opts();
1314 if (j == 0 || s.m_str.sliced(0, j).isEmpty()) {
1315 openStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->
openStyles();
1316 closeStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->
closeStyles();
1317 p->removeItemAt(ti);
1321 const auto tmp = s.m_str.sliced(0, j);
1323 auto t = std::static_pointer_cast<Text<Trait>>(p->items().at(ti));
1324 t->setEndColumn(po.
m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j - 1));
1326 t->closeStyles() = {};
1333 std::shared_ptr<Link<Trait>> lnk(
new Link<Trait>);
1334 lnk->setStartColumn(po.
m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j));
1335 lnk->setStartLine(po.
m_fr.m_data.at(s.m_line).second.m_lineNumber);
1337 po.
m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + i -
1338 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1340 lnk->setEndLine(po.
m_fr.m_data.at(s.m_line).second.m_lineNumber);
1341 lnk->openStyles() = openStyles;
1342 lnk->setTextPos({lnk->startColumn(), lnk->startLine(), lnk->endColumn(), lnk->endLine()});
1343 lnk->setUrlPos(lnk->textPos());
1345 if (email && !tmp.toLower().startsWith(Trait::latin1ToString(
"mailto:"))) {
1346 tmp = Trait::latin1ToString(
"mailto:") + tmp;
1349 if (!email && tmp.toLower().startsWith(Trait::latin1ToString(
"www."))) {
1350 tmp = Trait::latin1ToString(
"http://") + tmp;
1355 p->insertItem(ti, lnk);
1357 s.m_pos += i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1);
1358 s.m_str.remove(0, i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1));
1362 if (!s.m_str.isEmpty()) {
1366 auto t = std::make_shared<Text<Trait>>();
1367 t->setStartColumn(po.
m_fr.m_data[s.m_line].first.virginPos(s.m_pos));
1368 t->setStartLine(po.
m_fr.m_data.at(s.m_line).second.m_lineNumber);
1369 t->setEndLine(po.
m_fr.m_data.at(s.m_line).second.m_lineNumber);
1370 t->setEndColumn(po.
m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + s.m_str.length() - 1));
1372 t->closeStyles() = closeStyles;
1373 p->insertItem(ti + 1, t);
1375 lnk->closeStyles() = closeStyles;
1382 j = i + (skipSpace ? 1 : 0);
1389 if (i == s.m_str.length()) {
1398template<
class Trait>
1402 const typename Trait::StringList &)
1405 long long int i = 0;
1407 while (i >= 0 && i < (
long long int)po.
m_rawTextData.size()) {
1420template<
class Trait>
1432 std::shared_ptr<Document<Trait>>
1435 const typename Trait::String &fileName,
1438 bool recursive =
true,
1441 const typename Trait::StringList &ext = {Trait::latin1ToString(
"md"), Trait::latin1ToString(
"markdown")},
1447 bool fullyOptimizeParagraphs =
true);
1450 std::shared_ptr<Document<Trait>>
1453 typename Trait::TextStream &stream,
1456 const typename Trait::String &path,
1458 const typename Trait::String &fileName,
1464 bool fullyOptimizeParagraphs =
true);
1474 bool processInLinks,
1476 const typename Trait::StringList &userData)
1478 m_textPlugins.insert({id, {plugin, processInLinks, userData}});
1487 m_textPlugins.erase(
id);
1492 parseFile(
const typename Trait::String &fileName,
1495 const typename Trait::StringList &ext,
1496 typename Trait::StringList *parentLinks =
nullptr);
1499 parseStream(
typename Trait::TextStream &stream,
1500 const typename Trait::String &workingPath,
1501 const typename Trait::String &fileName,
1504 const typename Trait::StringList &ext,
1505 typename Trait::StringList *parentLinks =
nullptr);
1510 enum class BlockType {
1515 ListWithFirstEmptyLine,
1516 CodeIndentedBySpaces,
1526 long long int m_level = -1;
1527 long long int m_indent = -1;
1531 whatIsTheLine(
typename Trait::InternalString &str,
1532 bool inList =
false,
1533 bool inListWithFirstEmptyLine =
false,
1534 bool fensedCodeInList =
false,
1535 typename Trait::String *startOfCode =
nullptr,
1536 ListIndent *indent =
nullptr,
1537 bool emptyLinePreceded =
false,
1538 bool calcIndent =
false,
1539 const std::vector<long long int> *indents =
nullptr);
1542 parseFragment(MdBlock<Trait> &fr,
1543 std::shared_ptr<Block<Trait>> parent,
1545 typename Trait::StringList &linksToParse,
1546 const typename Trait::String &workingPath,
1547 const typename Trait::String &fileName,
1548 bool collectRefLinks,
1549 RawHtmlBlock<Trait> &html);
1552 parseText(MdBlock<Trait> &fr,
1553 std::shared_ptr<Block<Trait>> parent,
1555 typename Trait::StringList &linksToParse,
1556 const typename Trait::String &workingPath,
1557 const typename Trait::String &fileName,
1558 bool collectRefLinks,
1559 RawHtmlBlock<Trait> &html);
1562 parseBlockquote(MdBlock<Trait> &fr,
1563 std::shared_ptr<Block<Trait>> parent,
1565 typename Trait::StringList &linksToParse,
1566 const typename Trait::String &workingPath,
1567 const typename Trait::String &fileName,
1568 bool collectRefLinks,
1569 RawHtmlBlock<Trait> &html);
1572 parseList(MdBlock<Trait> &fr,
1573 std::shared_ptr<Block<Trait>> parent,
1575 typename Trait::StringList &linksToParse,
1576 const typename Trait::String &workingPath,
1577 const typename Trait::String &fileName,
1578 bool collectRefLinks,
1579 RawHtmlBlock<Trait> &html);
1582 parseCode(MdBlock<Trait> &fr, std::shared_ptr<Block<Trait>> parent,
bool collectRefLinks);
1585 parseCodeIndentedBySpaces(MdBlock<Trait> &fr,
1586 std::shared_ptr<Block<Trait>> parent,
1587 bool collectRefLinks,
1589 const typename Trait::String &syntax,
1590 long long int emptyColumn,
1591 long long int startLine,
1593 const WithPosition &startDelim = {},
1594 const WithPosition &endDelim = {},
1595 const WithPosition &syntaxPos = {});
1598 parseListItem(MdBlock<Trait> &fr,
1599 std::shared_ptr<Block<Trait>> parent,
1601 typename Trait::StringList &linksToParse,
1602 const typename Trait::String &workingPath,
1603 const typename Trait::String &fileName,
1604 bool collectRefLinks,
1605 RawHtmlBlock<Trait> &html,
1609 parseHeading(MdBlock<Trait> &fr,
1610 std::shared_ptr<Block<Trait>> parent,
1612 typename Trait::StringList &linksToParse,
1613 const typename Trait::String &workingPath,
1614 const typename Trait::String &fileName,
1615 bool collectRefLinks);
1618 parseFootnote(MdBlock<Trait> &fr,
1619 std::shared_ptr<Block<Trait>> parent,
1621 typename Trait::StringList &linksToParse,
1622 const typename Trait::String &workingPath,
1623 const typename Trait::String &fileName,
1624 bool collectRefLinks);
1627 parseTable(MdBlock<Trait> &fr,
1628 std::shared_ptr<Block<Trait>> parent,
1630 typename Trait::StringList &linksToParse,
1631 const typename Trait::String &workingPath,
1632 const typename Trait::String &fileName,
1633 bool collectRefLinks,
1637 parseParagraph(MdBlock<Trait> &fr,
1638 std::shared_ptr<Block<Trait>> parent,
1640 typename Trait::StringList &linksToParse,
1641 const typename Trait::String &workingPath,
1642 const typename Trait::String &fileName,
1643 bool collectRefLinks,
1644 RawHtmlBlock<Trait> &html);
1647 parseFormattedTextLinksImages(MdBlock<Trait> &fr,
1648 std::shared_ptr<Block<Trait>> parent,
1650 typename Trait::StringList &linksToParse,
1651 const typename Trait::String &workingPath,
1652 const typename Trait::String &fileName,
1653 bool collectRefLinks,
1654 bool ignoreLineBreak,
1655 RawHtmlBlock<Trait> &html,
1659 parse(StringListStream<Trait> &stream,
1660 std::shared_ptr<Block<Trait>> parent,
1662 typename Trait::StringList &linksToParse,
1663 const typename Trait::String &workingPath,
1664 const typename Trait::String &fileName,
1665 bool collectRefLinks,
1667 bool dontProcessLastFreeHtml =
false);
1669 struct ParserContext {
1670 typename Trait::template Vector<MdBlock<Trait>> m_splitted;
1672 bool m_emptyLineInList =
false;
1673 bool m_fensedCodeInList =
false;
1674 long long int m_emptyLinesCount = 0;
1675 long long int m_lineCounter = 0;
1676 std::vector<long long int> m_indents;
1677 ListIndent m_indent;
1678 RawHtmlBlock<Trait> m_html;
1679 long long int m_emptyLinesBefore = 0;
1681 typename Trait::String m_startOfCode;
1682 typename Trait::String m_startOfCodeInList;
1683 BlockType m_type = BlockType::EmptyLine;
1684 BlockType m_lineType = BlockType::Unknown;
1685 BlockType m_prevLineType = BlockType::Unknown;
1689 parseFragment(ParserContext &ctx,
1690 std::shared_ptr<Block<Trait>> parent,
1692 typename Trait::StringList &linksToParse,
1693 const typename Trait::String &workingPath,
1694 const typename Trait::String &fileName,
1695 bool collectRefLinks);
1698 eatFootnote(ParserContext &ctx,
1699 StringListStream<Trait> &stream,
1700 std::shared_ptr<Block<Trait>> parent,
1702 typename Trait::StringList &linksToParse,
1703 const typename Trait::String &workingPath,
1704 const typename Trait::String &fileName,
1705 bool collectRefLinks);
1708 finishHtml(ParserContext &ctx,
1709 std::shared_ptr<Block<Trait>> parent,
1711 bool collectRefLinks,
1713 bool dontProcessLastFreeHtml);
1716 makeLineMain(ParserContext &ctx,
1717 const typename Trait::InternalString &line,
1718 long long int emptyLinesCount,
1719 const ListIndent ¤tIndent,
1721 long long int currentLineNumber);
1724 parseFragmentAndMakeNextLineMain(ParserContext &ctx,
1725 std::shared_ptr<Block<Trait>> parent,
1727 typename Trait::StringList &linksToParse,
1728 const typename Trait::String &workingPath,
1729 const typename Trait::String &fileName,
1730 bool collectRefLinks,
1731 const typename Trait::InternalString &line,
1732 const ListIndent ¤tIndent,
1734 long long int currentLineNumber);
1737 isListType(BlockType t);
1739 typename Trait::InternalString
1740 readLine(ParserContext &ctx, StringListStream<Trait> &stream);
1742 std::shared_ptr<Image<Trait>>
1743 makeImage(
const typename Trait::String &url,
1745 TextParsingOpts<Trait> &po,
1746 bool doNotCreateTextOnFail,
1747 long long int startLine,
1748 long long int startPos,
1749 long long int lastLine,
1750 long long int lastPos,
1751 const WithPosition &textPos,
1752 const WithPosition &urlPos);
1754 std::shared_ptr<Link<Trait>>
1755 makeLink(
const typename Trait::String &url,
1757 TextParsingOpts<Trait> &po,
1758 bool doNotCreateTextOnFail,
1759 long long int startLine,
1760 long long int startPos,
1761 long long int lastLine,
1762 long long int lastPos,
1763 const WithPosition &textPos,
1764 const WithPosition &urlPos);
1767 enum DelimiterType {
1775 SquareBracketsClose,
1798 DelimiterType m_type = Unknown;
1799 long long int m_line = -1;
1800 long long int m_pos = -1;
1801 long long int m_len = 0;
1802 bool m_isWordBefore =
false;
1803 bool m_backslashed =
false;
1804 bool m_leftFlanking =
false;
1805 bool m_rightFlanking =
false;
1808 using Delims =
typename Trait::template Vector<Delimiter>;
1812 TextParsingOpts<Trait> &po,
1813 long long int startLine,
1814 long long int startPos,
1815 long long int lastLineForText,
1816 long long int lastPosForText,
1817 typename Delims::const_iterator lastIt,
1819 bool doNotCreateTextOnFail,
1820 const WithPosition &textPos,
1821 const WithPosition &linkTextPos);
1823 typename Delims::const_iterator
1824 checkForImage(
typename Delims::const_iterator it,
1825 typename Delims::const_iterator last,
1826 TextParsingOpts<Trait> &po);
1830 TextParsingOpts<Trait> &po,
1831 long long int startLine,
1832 long long int startPos,
1833 long long int lastLineForText,
1834 long long int lastPosForText,
1835 typename Delims::const_iterator lastIt,
1837 bool doNotCreateTextOnFail,
1838 const WithPosition &textPos,
1839 const WithPosition &linkTextPos);
1841 typename Delims::const_iterator
1842 checkForLink(
typename Delims::const_iterator it,
1843 typename Delims::const_iterator last,
1844 TextParsingOpts<Trait> &po);
1849 std::pair<typename Trait::String, bool>
1850 readHtmlTag(
typename Delims::const_iterator it, TextParsingOpts<Trait> &po);
1852 typename Delims::const_iterator
1853 findIt(
typename Delims::const_iterator it,
1854 typename Delims::const_iterator last,
1855 TextParsingOpts<Trait> &po);
1858 finishRule1HtmlTag(
typename Delims::const_iterator it,
1859 typename Delims::const_iterator last,
1860 TextParsingOpts<Trait> &po,
1864 finishRule2HtmlTag(
typename Delims::const_iterator it,
1865 typename Delims::const_iterator last,
1866 TextParsingOpts<Trait> &po);
1869 finishRule3HtmlTag(
typename Delims::const_iterator it,
1870 typename Delims::const_iterator last,
1871 TextParsingOpts<Trait> &po);
1874 finishRule4HtmlTag(
typename Delims::const_iterator it,
1875 typename Delims::const_iterator last,
1876 TextParsingOpts<Trait> &po);
1879 finishRule5HtmlTag(
typename Delims::const_iterator it,
1880 typename Delims::const_iterator last,
1881 TextParsingOpts<Trait> &po);
1884 finishRule6HtmlTag(
typename Delims::const_iterator it,
1885 typename Delims::const_iterator last,
1886 TextParsingOpts<Trait> &po);
1888 typename Parser<Trait>::Delims::const_iterator
1889 finishRule7HtmlTag(
typename Delims::const_iterator it,
1890 typename Delims::const_iterator last,
1891 TextParsingOpts<Trait> &po);
1893 typename Delims::const_iterator
1894 finishRawHtmlTag(
typename Delims::const_iterator it,
1895 typename Delims::const_iterator last,
1896 TextParsingOpts<Trait> &po,
1900 htmlTagRule(
typename Delims::const_iterator it,
1901 typename Delims::const_iterator last,
1902 TextParsingOpts<Trait> &po);
1904 typename Delims::const_iterator
1905 checkForRawHtml(
typename Delims::const_iterator it,
1906 typename Delims::const_iterator last,
1907 TextParsingOpts<Trait> &po);
1909 typename Delims::const_iterator
1910 checkForMath(
typename Delims::const_iterator it,
1911 typename Delims::const_iterator last,
1912 TextParsingOpts<Trait> &po);
1914 typename Delims::const_iterator
1915 checkForAutolinkHtml(
typename Delims::const_iterator it,
1916 typename Delims::const_iterator last,
1917 TextParsingOpts<Trait> &po,
1920 typename Delims::const_iterator
1921 checkForInlineCode(
typename Delims::const_iterator it,
1922 typename Delims::const_iterator last,
1923 TextParsingOpts<Trait> &po);
1925 std::pair<typename MdBlock<Trait>::Data,
typename Delims::const_iterator>
1926 readTextBetweenSquareBrackets(
typename Delims::const_iterator
start,
1927 typename Delims::const_iterator it,
1928 typename Delims::const_iterator last,
1929 TextParsingOpts<Trait> &po,
1930 bool doNotCreateTextOnFail,
1931 WithPosition *pos =
nullptr);
1933 std::pair<typename MdBlock<Trait>::Data,
typename Delims::const_iterator>
1934 checkForLinkText(
typename Delims::const_iterator it,
1935 typename Delims::const_iterator last,
1936 TextParsingOpts<Trait> &po,
1937 WithPosition *pos =
nullptr);
1939 std::pair<typename MdBlock<Trait>::Data,
typename Delims::const_iterator>
1940 checkForLinkLabel(
typename Delims::const_iterator it,
1941 typename Delims::const_iterator last,
1942 TextParsingOpts<Trait> &po,
1943 WithPosition *pos =
nullptr);
1945 std::tuple<typename Trait::String, typename Trait::String, typename Delims::const_iterator, bool>
1946 checkForInlineLink(
typename Delims::const_iterator it,
1947 typename Delims::const_iterator last,
1948 TextParsingOpts<Trait> &po,
1949 WithPosition *urlPos =
nullptr);
1951 inline std::tuple<typename Trait::String, typename Trait::String, typename Delims::const_iterator, bool>
1952 checkForRefLink(
typename Delims::const_iterator it,
1953 typename Delims::const_iterator last,
1954 TextParsingOpts<Trait> &po,
1955 WithPosition *urlPos =
nullptr);
1957 typename Trait::String
1960 template<
class Func>
1961 typename Delims::const_iterator
1962 checkShortcut(
typename Delims::const_iterator it,
1963 typename Delims::const_iterator last,
1964 TextParsingOpts<Trait> &po,
1967 const auto start = it;
1971 WithPosition labelPos;
1972 std::tie(text, it) = checkForLinkLabel(
start, last, po, &labelPos);
1974 if (it !=
start && !toSingleLine(text).simplified().isEmpty()) {
1975 if ((this->*functor)(text, po,
start->m_line,
start->m_pos,
start->m_line,
1976 start->m_pos +
start->m_len, it, {},
false, labelPos, {})) {
1985 createStyles(std::vector<std::pair<Style, long long int>> &s,
1987 typename Delimiter::DelimiterType t,
1988 long long int &count);
1991 isSequence(
typename Delims::const_iterator it,
1992 long long int itLine,
1993 long long int itPos,
1994 typename Delimiter::DelimiterType t);
1996 typename Delims::const_iterator
1997 readSequence(
typename Delims::const_iterator it,
1998 typename Delims::const_iterator last,
1999 long long int &line,
2002 typename Delims::const_iterator ¤t);
2005 emphasisToInt(
typename Delimiter::DelimiterType t);
2007 std::pair<bool, size_t>
2008 checkEmphasisSequence(
const std::vector<std::pair<std::pair<long long int, bool>,
int>> &s,
2011 std::vector<std::pair<std::pair<long long int, bool>,
int>>
2012 fixSequence(
const std::vector<std::pair<std::pair<long long int, bool>,
int>> &s);
2014 std::vector<std::vector<std::pair<std::pair<long long int, bool>,
int>>>
2015 closedSequences(
const std::vector<std::vector<std::pair<std::pair<long long int, bool>,
int>>> &vars,
2018 std::vector<std::pair<Style, long long int>>
2019 createStyles(
const std::vector<std::pair<std::pair<long long int, bool>,
int>> &s,
2021 typename Delimiter::DelimiterType t,
2022 long long int &count);
2024 std::tuple<bool, std::vector<std::pair<Style, long long int>>,
long long int,
long long int>
2025 isStyleClosed(
typename Delims::const_iterator it,
2026 typename Delims::const_iterator last,
2027 TextParsingOpts<Trait> &po);
2029 typename Delims::const_iterator
2030 incrementIterator(
typename Delims::const_iterator it,
2031 typename Delims::const_iterator last,
2032 long long int count);
2034 typename Delims::const_iterator
2035 checkForStyle(
typename Delims::const_iterator first,
2036 typename Delims::const_iterator it,
2037 typename Delims::const_iterator last,
2038 TextParsingOpts<Trait> &po);
2041 isListOrQuoteAfterHtml(TextParsingOpts<Trait> &po);
2044 parseTableInParagraph(TextParsingOpts<Trait> &po,
2047 typename Trait::StringList &linksToParse,
2048 const typename Trait::String &workingPath,
2049 const typename Trait::String &fileName,
2050 bool collectRefLinks);
2053 isNewBlockIn(MdBlock<Trait> &fr,
2054 long long int startLine,
2055 long long int endLine);
2058 makeInlineCode(
long long int startLine,
2059 long long int startPos,
2060 long long int lastLine,
2061 long long int lastPos,
2062 TextParsingOpts<Trait> &po,
2063 typename Delims::const_iterator startDelimIt,
2064 typename Delims::const_iterator endDelimIt);
2067 defaultParagraphOptimization()
const
2078 typename Trait::StringList m_parsedFiles;
2080 bool m_fullyOptimizeParagraphs =
true;
2089template<
class Trait>
2090inline std::shared_ptr<Document<Trait>>
2093 const typename Trait::StringList &ext,
2094 bool fullyOptimizeParagraphs)
2096 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2100 parseFile(fileName, recursive, doc, ext);
2107template<
class Trait>
2108inline std::shared_ptr<Document<Trait>>
2110 const typename Trait::String &path,
2111 const typename Trait::String &fileName,
2112 bool fullyOptimizeParagraphs)
2114 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2118 parseStream(stream, path, fileName,
false, doc,
typename Trait::StringList());
2125template<
class Trait>
2128#ifdef MD4QT_QT_SUPPORT
2145 return (m_lastBuf && m_pos == m_buf.size());
2152 bool rFound =
false;
2155 const auto c = getChar();
2183 m_buf = m_stream.read(512);
2185 if (m_stream.atEnd()) {
2195 if (m_pos < m_buf.size()) {
2196 return m_buf.at(m_pos++);
2197 }
else if (!atEnd()) {
2210 long long int m_pos;
2215#ifdef MD4QT_ICU_STL_SUPPORT
2225 std::vector<unsigned char> content;
2227 stream.seekg(0, std::ios::end);
2228 const auto ssize = stream.tellg();
2229 content.resize((
size_t)ssize + 1);
2230 stream.seekg(0, std::ios::beg);
2231 stream.read((
char *)&content[0], ssize);
2232 content[(size_t)ssize] = 0;
2234 const auto z = std::count(content.cbegin(), content.cend(), 0);
2237 std::vector<unsigned char> tmp;
2238 tmp.resize(content.size() + (z - 1) * 2);
2240 for (
size_t i = 0, j = 0; i < content.size() - 1; ++i, ++j) {
2241 if (content[i] == 0) {
2247 tmp[j] = content[i];
2251 tmp[tmp.size() - 1] = 0;
2253 std::swap(content, tmp);
2256 m_str = UnicodeString::fromUTF8((
char *)&content[0]);
2262 return m_pos == m_str.size();
2270 bool rFound =
false;
2273 const auto c = getChar();
2302 return m_str[m_pos++];
2304 return UnicodeChar();
2309 UnicodeString m_str;
2310 long long int m_pos;
2316template<
class Trait>
2321 const long long int e = line.indexOf(Trait::latin1ToString(
"-->"), pos);
2331template<
class Trait>
2339 const auto &str = line.asString();
2341 while ((p = str.indexOf(Trait::latin1ToString(
s_startComment), p)) != -1) {
2342 bool addNegative =
false;
2344 auto c = str.sliced(p);
2346 if (c.startsWith(Trait::latin1ToString(
"<!-->"))) {
2347 res.insert({line.virginPos(p), {0,
true}});
2352 }
else if (c.startsWith(Trait::latin1ToString(
"<!--->"))) {
2353 res.insert({line.virginPos(p), {1,
true}});
2361 res.insert({line.virginPos(p), {2,
true}});
2365 for (; l < stream.
size(); ++l) {
2366 c.push_back(Trait::latin1ToChar(
' '));
2367 c.push_back(stream.
lineAt(l).asString());
2370 res.insert({line.virginPos(p), {2,
true}});
2372 addNegative =
false;
2380 res.insert({line.virginPos(p), {-1,
false}});
2387template<
class Trait>
2389Parser<Trait>::parseFragment(
typename Parser<Trait>::ParserContext &ctx,
2390 std::shared_ptr<Block<Trait>> parent,
2392 typename Trait::StringList &linksToParse,
2393 const typename Trait::String &workingPath,
2394 const typename Trait::String &fileName,
2395 bool collectRefLinks)
2397 if (!ctx.m_fragment.empty()) {
2398 MdBlock<Trait> block = {ctx.m_fragment, ctx.m_emptyLinesBefore, ctx.m_emptyLinesCount > 0};
2400 ctx.m_emptyLinesBefore = ctx.m_emptyLinesCount;
2402 ctx.m_splitted.push_back(block);
2404 long long int line = 0;
2407 line = parseFragment(block, parent, doc, linksToParse, workingPath,
2408 fileName, collectRefLinks, ctx.m_html);
2413 if (ctx.m_html.m_html) {
2414 if (!collectRefLinks) {
2415 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2421 const auto it = std::find_if(ctx.m_fragment.cbegin(), ctx.m_fragment.cend(), [line](
const auto &d) {
2422 return (d.second.m_lineNumber == line);
2425 block.m_data.clear();
2426 std::copy(it, ctx.m_fragment.cend(), std::back_inserter(block.m_data));
2427 block.m_emptyLinesBefore = 0;
2431 ctx.m_fragment.clear();
2434 ctx.m_type = BlockType::EmptyLine;
2435 ctx.m_emptyLineInList =
false;
2436 ctx.m_fensedCodeInList =
false;
2437 ctx.m_emptyLinesCount = 0;
2438 ctx.m_lineCounter = 0;
2439 ctx.m_indents.clear();
2440 ctx.m_indent = {-1, -1};
2441 ctx.m_startOfCode.clear();
2442 ctx.m_startOfCodeInList.clear();
2446template<
class Trait>
2450 unsigned char size = 4;
2451 long long int len = s.length();
2453 for (
long long int i = 0; i < len; ++i, --size) {
2454 if (s[i] == Trait::latin1ToChar(
'\t')) {
2455 s.replaceOne(i, 1,
typename Trait::String(size, Trait::latin1ToChar(
' ')));
2468template<
class Trait>
2470Parser<Trait>::eatFootnote(
typename Parser<Trait>::ParserContext &ctx,
2471 StringListStream<Trait> &stream,
2472 std::shared_ptr<Block<Trait>> parent,
2474 typename Trait::StringList &linksToParse,
2475 const typename Trait::String &workingPath,
2476 const typename Trait::String &fileName,
2477 bool collectRefLinks)
2479 long long int emptyLinesCount = 0;
2480 bool wasEmptyLine =
false;
2482 while (!stream.atEnd()) {
2483 const auto currentLineNumber = stream.currentLineNumber();
2485 auto line = readLine(ctx, stream);
2491 if (ns == line.length() || line.asString().startsWith(Trait::latin1ToString(
" "))) {
2492 if (ns == line.length()) {
2494 wasEmptyLine =
true;
2496 emptyLinesCount = 0;
2499 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2500 }
else if (!wasEmptyLine) {
2502 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2504 ctx.m_lineType = BlockType::Footnote;
2506 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2510 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2513 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2516 whatIsTheLine(line,
false,
false,
false, &ctx.m_startOfCodeInList, &ctx.m_indent,
2517 ctx.m_lineType == BlockType::EmptyLine,
true, &ctx.m_indents);
2519 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2521 if (ctx.m_type == BlockType::Footnote) {
2522 wasEmptyLine =
false;
2531 if (stream.atEnd() && !ctx.m_fragment.empty()) {
2532 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2536template<
class Trait>
2538Parser<Trait>::finishHtml(ParserContext &ctx,
2539 std::shared_ptr<Block<Trait>> parent,
2541 bool collectRefLinks,
2543 bool dontProcessLastFreeHtml)
2545 if (!collectRefLinks || top) {
2546 if (ctx.m_html.m_html->isFreeTag()) {
2547 if (!dontProcessLastFreeHtml) {
2548 if (ctx.m_html.m_parent) {
2549 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2553 parent->appendItem(ctx.m_html.m_html);
2558 p->appendItem(ctx.m_html.m_html);
2559 p->setStartColumn(ctx.m_html.m_html->startColumn());
2560 p->setStartLine(ctx.m_html.m_html->startLine());
2561 p->setEndColumn(ctx.m_html.m_html->endColumn());
2562 p->setEndLine(ctx.m_html.m_html->endLine());
2567 if (!dontProcessLastFreeHtml) {
2571 ctx.m_html.m_toAdjustLastPos.clear();
2574template<
class Trait>
2576Parser<Trait>::makeLineMain(ParserContext &ctx,
2577 const typename Trait::InternalString &line,
2578 long long int emptyLinesCount,
2579 const ListIndent ¤tIndent,
2581 long long int currentLineNumber)
2583 if (ctx.m_html.m_htmlBlockType >= 6) {
2584 ctx.m_html.m_continueHtml = (emptyLinesCount <= 0);
2587 ctx.m_type = ctx.m_lineType;
2589 switch (ctx.m_type) {
2590 case BlockType::List:
2591 case BlockType::ListWithFirstEmptyLine: {
2592 if (ctx.m_indents.empty())
2593 ctx.m_indents.push_back(currentIndent.m_indent);
2595 ctx.m_indent = currentIndent;
2598 case BlockType::Code:
2606 if (!line.isEmpty() && ns < line.length()) {
2607 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2610 ctx.m_lineCounter = 1;
2611 ctx.m_emptyLinesCount = 0;
2614template<
class Trait>
2616Parser<Trait>::parseFragmentAndMakeNextLineMain(ParserContext &ctx,
2617 std::shared_ptr<Block<Trait>> parent,
2619 typename Trait::StringList &linksToParse,
2620 const typename Trait::String &workingPath,
2621 const typename Trait::String &fileName,
2622 bool collectRefLinks,
2623 const typename Trait::InternalString &line,
2624 const ListIndent ¤tIndent,
2626 long long int currentLineNumber)
2628 const auto empty = ctx.m_emptyLinesCount;
2630 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2632 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2635template<
class Trait>
2637Parser<Trait>::isListType(BlockType t)
2640 case BlockType::List:
2641 case BlockType::ListWithFirstEmptyLine:
2649template<
class Trait>
2650typename Trait::InternalString
2651Parser<Trait>::readLine(
typename Parser<Trait>::ParserContext &ctx,
2652 StringListStream<Trait> &stream)
2654 ctx.m_htmlCommentData.clear();
2656 auto line = stream.readLine();
2658 static const char16_t c_zeroReplaceWith[2] = {0xFFFD, 0};
2660 line.replace(
typename Trait::Char(0), Trait::utf16ToString(&c_zeroReplaceWith[0]));
2667template<
class Trait>
2668inline RawHtmlBlock<Trait>
2670 std::shared_ptr<Block<Trait>> parent,
2672 typename Trait::StringList &linksToParse,
2673 const typename Trait::String &workingPath,
2674 const typename Trait::String &fileName,
2675 bool collectRefLinks,
2677 bool dontProcessLastFreeHtml)
2681 while (!stream.atEnd()) {
2682 const auto currentLineNumber = stream.currentLineNumber();
2684 auto line = readLine(ctx, stream);
2686 if (ctx.m_lineType != BlockType::Unknown) {
2687 ctx.m_prevLineType = ctx.m_lineType;
2690 ctx.m_lineType = whatIsTheLine(line,
2691 (ctx.m_emptyLineInList || isListType(ctx.m_type)),
2692 ctx.m_prevLineType == BlockType::ListWithFirstEmptyLine,
2693 ctx.m_fensedCodeInList,
2694 &ctx.m_startOfCodeInList,
2696 ctx.m_lineType == BlockType::EmptyLine,
2700 if (isListType(ctx.m_type) && ctx.m_lineType == BlockType::FensedCodeInList) {
2701 ctx.m_fensedCodeInList = !ctx.m_fensedCodeInList;
2704 const auto currentIndent = ctx.m_indent;
2708 const auto indentInListValue =
indentInList(&ctx.m_indents, ns,
true);
2710 if (isListType(ctx.m_lineType) && !ctx.m_fensedCodeInList && ctx.m_indent.m_level > -1) {
2711 if (ctx.m_indent.m_level < (
long long int)ctx.m_indents.size()) {
2712 ctx.m_indents.erase(ctx.m_indents.cbegin() + ctx.m_indent.m_level, ctx.m_indents.cend());
2715 ctx.m_indents.push_back(ctx.m_indent.m_indent);
2718 if (ctx.m_type == BlockType::CodeIndentedBySpaces && ns > 3) {
2719 ctx.m_lineType = BlockType::CodeIndentedBySpaces;
2722 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2 &&
2723 !isListType(ctx.m_lineType)) {
2724 if (ctx.m_emptyLinesCount > 0) {
2725 parseFragmentAndMakeNextLineMain(ctx,
2739 ctx.m_emptyLineInList =
false;
2740 ctx.m_emptyLinesCount = 0;
2744 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2) {
2745 ctx.m_type = BlockType::List;
2749 if (ctx.m_lineType == BlockType::Footnote) {
2750 parseFragmentAndMakeNextLineMain(ctx,
2762 eatFootnote(ctx, stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2768 if (ns != line.length() && ctx.m_type == BlockType::EmptyLine) {
2769 makeLineMain(ctx, line, ctx.m_emptyLinesCount, currentIndent, ns, currentLineNumber);
2772 }
else if (ns == line.length() && ctx.m_type == BlockType::EmptyLine) {
2776 ++ctx.m_lineCounter;
2779 if (ns == line.length()) {
2780 ++ctx.m_emptyLinesCount;
2782 switch (ctx.m_type) {
2783 case BlockType::Blockquote: {
2784 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2789 case BlockType::Text:
2790 case BlockType::CodeIndentedBySpaces:
2794 case BlockType::Code: {
2795 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2796 ctx.m_emptyLinesCount = 0;
2801 case BlockType::List:
2802 case BlockType::ListWithFirstEmptyLine: {
2803 ctx.m_emptyLineInList =
true;
2813 else if (ctx.m_emptyLineInList) {
2814 if (indentInListValue || isListType(ctx.m_lineType) || ctx.m_lineType == BlockType::SomethingInList) {
2815 for (
long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2816 ctx.m_fragment.push_back({
typename Trait::String(),
2817 {currentLineNumber - ctx.m_emptyLinesCount + i, {}}});
2820 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2822 ctx.m_emptyLineInList =
false;
2823 ctx.m_emptyLinesCount = 0;
2827 const auto empty = ctx.m_emptyLinesCount;
2829 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2831 ctx.m_lineType = whatIsTheLine(line,
false,
false,
false,
nullptr,
nullptr,
2832 true,
false, &ctx.m_indents);
2834 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2838 }
else if (ctx.m_emptyLinesCount > 0) {
2839 if (ctx.m_type == BlockType::CodeIndentedBySpaces &&
2840 ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2841 const auto indent =
skipSpaces<Trait>(0, ctx.m_fragment.front().first.asString());
2843 for (
long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2844 ctx.m_fragment.push_back({
typename Trait::String(indent, Trait::latin1ToChar(
' ')),
2845 {currentLineNumber - ctx.m_emptyLinesCount + i, {}}});
2848 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2849 ctx.m_emptyLinesCount = 0;
2851 parseFragmentAndMakeNextLineMain(ctx,
2868 if (ctx.m_type != ctx.m_lineType && ctx.m_type != BlockType::Code &&
2869 !isListType(ctx.m_type) && ctx.m_type != BlockType::Blockquote) {
2870 if (ctx.m_type == BlockType::Text && ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2871 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2874 if (ctx.m_type == BlockType::Text && isListType(ctx.m_lineType)) {
2875 if (ctx.m_lineType != BlockType::ListWithFirstEmptyLine) {
2880 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2886 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2892 parseFragmentAndMakeNextLineMain(ctx,
2906 else if (ctx.m_type == BlockType::Code && ctx.m_type == ctx.m_lineType &&
2907 !ctx.m_startOfCode.isEmpty() &&
2910 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2912 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2915 else if (ctx.m_type != ctx.m_lineType && isListType(ctx.m_type) &&
2916 ctx.m_lineType != BlockType::SomethingInList &&
2917 ctx.m_lineType != BlockType::FensedCodeInList && !isListType(ctx.m_lineType)) {
2918 parseFragmentAndMakeNextLineMain(ctx,
2929 }
else if (ctx.m_type == BlockType::Heading) {
2930 parseFragmentAndMakeNextLineMain(ctx,
2942 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2945 ctx.m_emptyLinesCount = 0;
2948 if (!ctx.m_fragment.empty()) {
2949 if (ctx.m_type == BlockType::Code) {
2950 ctx.m_fragment.push_back({ctx.m_startOfCode, {-1, {}}});
2953 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2959 for (
long long int i = 0; i < (
long long int)ctx.m_splitted.size(); ++i) {
2960 long long int line = 0;
2962 auto &data = ctx.m_splitted[i];
2965 line = parseFragment(data, parent, doc, linksToParse, workingPath, fileName,
false, ctx.m_html);
2970 if (ctx.m_html.m_html) {
2971 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2976 const auto it = std::find_if(data.m_data.cbegin(), data.m_data.cend(), [line](
const auto &d) {
2977 return (d.second.m_lineNumber == line);
2980 data.m_data.erase(data.m_data.cbegin(), it);
2984 if (ctx.m_html.m_htmlBlockType >= 6) {
2985 ctx.m_html.m_continueHtml = (!ctx.m_splitted[i].m_emptyLineAfter);
2988 if (ctx.m_html.m_html && !ctx.m_html.m_continueHtml) {
2989 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
2990 }
else if (!ctx.m_html.m_html) {
2991 ctx.m_html.m_toAdjustLastPos.clear();
2996 if (ctx.m_html.m_html) {
2997 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
3003#ifdef MD4QT_QT_SUPPORT
3007Parser<QStringTrait>::parseFile(
const QString &fileName,
3015 if (fi.exists() && ext.
contains(fi.suffix().toLower())) {
3022 parseStream(s, fi.absolutePath(), fi.fileName(), recursive, doc, ext, parentLinks);
3029#ifdef MD4QT_ICU_STL_SUPPORT
3033Parser<UnicodeStringTrait>::parseFile(
const UnicodeString &fileName,
3036 const std::vector<UnicodeString> &ext,
3037 std::vector<UnicodeString> *parentLinks)
3041 fileName.toUTF8String(fn);
3044 auto e = UnicodeString::fromUTF8(std::filesystem::u8path(fn).extension().u8string());
3050 if (std::find(ext.cbegin(), ext.cend(), e.toLower()) != ext.cend()) {
3051 auto path = std::filesystem::canonical(std::filesystem::u8path(fn));
3052 std::ifstream file(
path.c_str(), std::ios::in | std::ios::binary);
3055 const auto fileNameS =
path.filename().u8string();
3056 auto workingDirectory =
path.remove_filename().u8string();
3058 if (!workingDirectory.empty()) {
3059 workingDirectory.erase(workingDirectory.size() - 1, 1);
3062 std::replace(workingDirectory.begin(), workingDirectory.end(),
'\\',
'/');
3064 parseStream(file, UnicodeString::fromUTF8(workingDirectory),
3065 UnicodeString::fromUTF8(fileNameS), recursive, doc, ext, parentLinks);
3070 }
catch (
const std::exception &) {
3078template<
class Trait>
3083 for (
auto it = linksToParse.begin(), last = linksToParse.end(); it != last; ++it) {
3084 auto nextFileName = *it;
3086 if (nextFileName.startsWith(Trait::latin1ToString(
"#"))) {
3087 const auto lit = doc->labeledLinks().find(nextFileName);
3089 if (lit != doc->labeledLinks().cend()) {
3090 nextFileName = lit->second->url();
3096 if (Trait::fileExists(nextFileName)) {
3097 *it = Trait::absoluteFilePath(nextFileName);
3102template<
class Trait>
3104Parser<Trait>::parseStream(
typename Trait::TextStream &s,
3105 const typename Trait::String &workingPath,
3106 const typename Trait::String &fileName,
3109 const typename Trait::StringList &ext,
3110 typename Trait::StringList *parentLinks)
3112 typename Trait::StringList linksToParse;
3114 const auto path = workingPath.
isEmpty() ?
typename Trait::String(fileName) :
3115 typename Trait::String(workingPath + Trait::latin1ToString(
"/") + fileName);
3122 TextStream<Trait> stream(s);
3124 long long int i = 0;
3126 while (!stream.atEnd()) {
3127 data.push_back(std::pair<typename Trait::InternalString, MdLineData>(stream.readLine(), {i}));
3132 StringListStream<Trait> stream(data);
3134 parse(stream, doc, doc, linksToParse, workingPath, fileName,
true,
true);
3136 m_parsedFiles.push_back(path);
3141 if (recursive && !linksToParse.empty()) {
3142 const auto tmpLinks = linksToParse;
3144 while (!linksToParse.empty()) {
3145 auto nextFileName = linksToParse.front();
3146 linksToParse.erase(linksToParse.cbegin());
3149 const auto pit = std::find(parentLinks->cbegin(), parentLinks->cend(), nextFileName);
3151 if (pit != parentLinks->cend()) {
3156 if (nextFileName.startsWith(Trait::latin1ToString(
"#"))) {
3160 const auto pit = std::find(m_parsedFiles.cbegin(), m_parsedFiles.cend(), nextFileName);
3162 if (pit == m_parsedFiles.cend()) {
3167 parseFile(nextFileName, recursive, doc, ext, &linksToParse);
3172 std::copy(tmpLinks.cbegin(), tmpLinks.cend(), std::back_inserter(*parentLinks));
3178template<
class Trait>
3183 long long int p = 0;
3185 for (; p < s.size(); ++p) {
3186 if (!s[p].isSpace()) {
3192 for (; p < s.size(); ++p) {
3193 if (!s[p].isDigit()) {
3201 long long int sc = 0;
3203 for (; p < s.size(); ++p) {
3204 if (!s[p].isSpace()) {
3211 if (p == s.length() || sc > 4) {
3213 }
else if (sc == 0) {
3225 long long int level = indents.
size();
3227 for (
auto it = indents.crbegin(), last = indents.crend(); it != last; ++it) {
3238template<
class Trait>
3239inline typename Parser<Trait>::BlockType
3240Parser<Trait>::whatIsTheLine(
typename Trait::InternalString &str,
3242 bool inListWithFirstEmptyLine,
3243 bool fensedCodeInList,
3244 typename Trait::String *startOfCode,
3246 bool emptyLinePreceded,
3248 const std::vector<long long int> *indents)
3254 if (first < str.length()) {
3255 auto s = str.sliced(first);
3257 const bool isBlockquote = s.asString().startsWith(Trait::latin1ToString(
">"));
3258 const bool indentIn =
indentInList(indents, first,
false);
3259 bool isHeading =
false;
3262 return BlockType::Footnote;
3265 if (s.asString().startsWith(Trait::latin1ToString(
"#")) &&
3266 (indent ? first - indent->m_indent < 4 : first < 4)) {
3267 long long int c = 0;
3269 while (c < s.length() && s[c] == Trait::latin1ToChar(
'#')) {
3273 if (c <= 6 && ((c < s.length() && s[c].isSpace()) || c == s.length())) {
3279 bool isFirstLineEmpty =
false;
3283 const auto codeIndentedBySpaces = emptyLinePreceded && first >= 4 &&
3286 if (fensedCodeInList) {
3290 return BlockType::FensedCodeInList;
3294 return BlockType::SomethingInList;
3298 if (fensedCode && indentIn) {
3303 return BlockType::FensedCodeInList;
3304 }
else if ((((s.asString().startsWith(Trait::latin1ToString(
"-")) ||
3305 s.asString().startsWith(Trait::latin1ToString(
"+")) ||
3306 s.asString().startsWith(Trait::latin1ToString(
"*"))) &&
3307 ((s.length() > 1 && s[1] == Trait::latin1ToChar(
' ')) || s.length() == 1)) ||
3308 orderedList) && (first < 4 || indentIn)) {
3309 if (codeIndentedBySpaces) {
3310 return BlockType::CodeIndentedBySpaces;
3313 if (indent && calcIndent) {
3315 indent->m_level = (indents ?
listLevel(*indents, first) : -1);
3318 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3319 return BlockType::ListWithFirstEmptyLine;
3321 return BlockType::List;
3324 return BlockType::SomethingInList;
3327 if (!isHeading && !isBlockquote &&
3328 !(fensedCode && first < 4) && !emptyLinePreceded && !inListWithFirstEmptyLine) {
3329 return BlockType::SomethingInList;
3333 bool isFirstLineEmpty =
false;
3337 const bool isHLine = first < 4 && isHorizontalLine<Trait>(s.asString());
3340 (((s.asString().startsWith(Trait::latin1ToString(
"-")) || s.asString().startsWith(Trait::latin1ToString(
"+")) ||
3341 s.asString().startsWith(Trait::latin1ToString(
"*"))) &&
3342 ((s.length() > 1 && s[1] == Trait::latin1ToChar(
' ')) || s.length() == 1)) ||
3343 orderedList) && first < 4) {
3344 if (indent && calcIndent) {
3346 indent->m_level = (indents ?
listLevel(*indents, first) : -1);
3349 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3350 return BlockType::ListWithFirstEmptyLine;
3352 return BlockType::List;
3357 if (str.asString().startsWith(
typename Trait::String(4, Trait::latin1ToChar(
' ')))) {
3358 return BlockType::CodeIndentedBySpaces;
3360 return BlockType::Code;
3361 }
else if (isBlockquote) {
3362 return BlockType::Blockquote;
3363 }
else if (isHeading) {
3364 return BlockType::Heading;
3367 return BlockType::EmptyLine;
3370 return BlockType::Text;
3373template<
class Trait>
3375Parser<Trait>::parseFragment(MdBlock<Trait> &fr,
3376 std::shared_ptr<Block<Trait>> parent,
3378 typename Trait::StringList &linksToParse,
3379 const typename Trait::String &workingPath,
3380 const typename Trait::String &fileName,
3381 bool collectRefLinks,
3382 RawHtmlBlock<Trait> &html)
3384 if (html.m_continueHtml) {
3385 parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3388 if (!collectRefLinks) {
3389 parent->appendItem(html.m_html);
3395 switch (whatIsTheLine(fr.m_data.front().first)) {
3396 case BlockType::Footnote:
3397 parseFootnote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3400 case BlockType::Text:
3401 parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3404 case BlockType::Blockquote:
3405 parseBlockquote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3408 case BlockType::Code:
3409 parseCode(fr, parent, collectRefLinks);
3412 case BlockType::CodeIndentedBySpaces: {
3415 if (fr.m_data.front().first.asString().startsWith(Trait::latin1ToString(
" "))) {
3419 parseCodeIndentedBySpaces(fr, parent, collectRefLinks, indent, {}, -1, -1,
false);
3422 case BlockType::Heading:
3423 parseHeading(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3426 case BlockType::List:
3427 case BlockType::ListWithFirstEmptyLine:
3428 return parseList(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3438template<
class Trait>
3440Parser<Trait>::clearCache()
3442 m_parsedFiles.clear();
3446template<
class Trait>
3450 if (s.contains(Trait::latin1ToChar(
'|'))) {
3453 const auto tmp = s.simplified();
3454 const auto p = tmp.startsWith(Trait::latin1ToString(
"|")) ? 1 : 0;
3455 const auto n = tmp.size() - p - (tmp.endsWith(Trait::latin1ToString(
"|")) && tmp.size() > 1 ? 1 : 0);
3456 const auto v = tmp.sliced(p, n);
3458 bool backslash =
false;
3460 for (
long long int i = 0; i < v.size(); ++i) {
3463 if (v[i] == Trait::latin1ToChar(
'\\') && !backslash) {
3466 }
else if (v[i] == Trait::latin1ToChar(
'|') && !backslash) {
3483template<
class Trait>
3485Parser<Trait>::parseText(MdBlock<Trait> &fr,
3486 std::shared_ptr<Block<Trait>> parent,
3488 typename Trait::StringList &linksToParse,
3489 const typename Trait::String &workingPath,
3490 const typename Trait::String &fileName,
3491 bool collectRefLinks,
3492 RawHtmlBlock<Trait> &html)
3497 if (c && h && c == h && !html.m_continueHtml) {
3498 parseTable(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, c);
3500 if (!fr.m_data.empty()) {
3501 StringListStream<Trait> stream(fr.m_data);
3503 Parser<Trait>::parse(stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3506 parseParagraph(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3511template<
class Trait>
3512inline std::pair<typename Trait::String, WithPosition>
3515 const auto start = s.asString().indexOf(Trait::latin1ToString(
"{#"));
3518 long long int p =
start + 2;
3520 for (; p < s.length(); ++p) {
3521 if (s[p] == Trait::latin1ToChar(
'}')) {
3526 if (p < s.length() && s[p] == Trait::latin1ToChar(
'}')) {
3533 return {label, pos};
3541template<
class Trait>
3542inline typename Trait::String
3545 typename Trait::String res;
3547 for (
long long int i = 0; i < s.length(); ++i) {
3548 const auto c = s[i];
3550 if (c.isLetter() || c.isDigit() || c == Trait::latin1ToChar(
'-') ||
3551 c == Trait::latin1ToChar(
'_')) {
3552 res.push_back(c.toLower());
3553 }
else if (c.isSpace()) {
3554 res.push_back(Trait::latin1ToString(
"-"));
3562template<
class Trait>
3563inline typename Trait::String
3566 typename Trait::String l;
3572 for (
auto it = p->
items().cbegin(), last = p->
items().cend(); it != last; ++it) {
3573 switch ((*it)->type()) {
3576 const auto text = t->text();
3583 if (!i->p()->isEmpty()) {
3585 }
else if (!i->text().isEmpty()) {
3591 auto link =
static_cast<Link<Trait> *
>(it->get());
3593 if (!link->p()->isEmpty()) {
3595 }
else if (!link->text().isEmpty()) {
3603 if (!c->text().isEmpty()) {
3617template<
class Trait>
3621 long long int end = -1;
3622 long long int start = -1;
3624 for (
long long int i = s.length() - 1; i >= 0; --i) {
3625 if (!s[i].isSpace() && s[i] != Trait::latin1ToChar(
'#') && end == -1) {
3629 if (s[i] == Trait::latin1ToChar(
'#')) {
3635 if (s[i - 1].isSpace()) {
3638 }
else if (s[i - 1] != Trait::latin1ToChar(
'#')) {
3649 if (
start != -1 && end != -1) {
3659template<
class Trait>
3661Parser<Trait>::parseHeading(MdBlock<Trait> &fr,
3662 std::shared_ptr<Block<Trait>> parent,
3664 typename Trait::StringList &linksToParse,
3665 const typename Trait::String &workingPath,
3666 const typename Trait::String &fileName,
3667 bool collectRefLinks)
3669 if (!fr.m_data.empty() && !collectRefLinks) {
3670 auto line = fr.m_data.front().first;
3674 h->setStartLine(fr.m_data.front().second.m_lineNumber);
3675 h->setEndColumn(line.virginPos(line.length() - 1));
3676 h->setEndLine(h->startLine());
3678 long long int pos = 0;
3682 line = line.sliced(pos);
3688 while (pos < line.length() && line[pos] == Trait::latin1ToChar(
'#')) {
3693 WithPosition startDelim = {h->startColumn(), h->startLine(),
3694 line.virginPos(pos - 1), h->startLine()};
3699 fr.m_data.front().first = line.sliced(pos);
3708 if (endDelim.startColumn() != -1) {
3709 endDelim.setStartLine(fr.m_data.front().second.m_lineNumber);
3710 endDelim.setEndLine(endDelim.startLine());
3712 delims.push_back(endDelim);
3715 h->setDelims(delims);
3721 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString(
"/") :
3722 Trait::latin1ToString(
"")) + fileName);
3724 label.second.setStartLine(fr.m_data.front().second.m_lineNumber);
3725 label.second.setEndLine(
label.second.startLine());
3727 h->setLabelPos(
label.second);
3734 tmp.push_back(fr.m_data.front());
3735 MdBlock<Trait> block = {tmp, 0};
3737 RawHtmlBlock<Trait> html;
3739 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
3740 false,
false, html,
false);
3742 fr.m_data.erase(fr.m_data.cbegin());
3750 if (h->isLabeled()) {
3751 doc->insertLabeledHeading(h->label(), h);
3753 typename Trait::String
label = Trait::latin1ToString(
"#") +
3756 label += Trait::latin1ToString(
"/") +
3757 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString(
"/") :
3758 Trait::latin1ToString(
"")) + fileName;
3762 doc->insertLabeledHeading(label, h);
3765 parent->appendItem(h);
3770template<
class Trait>
3771inline typename Trait::InternalString
3774 s.replace(Trait::latin1ToString(
"\\|"), Trait::latin1ToString(
"|"));
3780template<
class Trait>
3781inline std::pair<typename Trait::InternalStringList, std::vector<long long int>>
3784 typename Trait::InternalStringList res;
3785 std::vector<long long int> columns;
3787 bool backslash =
false;
3788 long long int start = 0;
3790 for (
long long int i = 0; i < s.length(); ++i) {
3793 if (s[i] == Trait::latin1ToChar(
'\\') && !backslash) {
3796 }
else if (s[i] == Trait::latin1ToChar(
'|') && !backslash) {
3798 columns.push_back(s.virginPos(i));
3809 return {res, columns};
3812template<
class Trait>
3814Parser<Trait>::parseTable(MdBlock<Trait> &fr,
3815 std::shared_ptr<Block<Trait>> parent,
3817 typename Trait::StringList &linksToParse,
3818 const typename Trait::String &workingPath,
3819 const typename Trait::String &fileName,
3820 bool collectRefLinks,
3823 static const char sep =
'|';
3825 if (fr.m_data.size() >= 2) {
3827 table->setStartColumn(fr.m_data.front().first.virginPos(0));
3828 table->setStartLine(fr.m_data.front().second.m_lineNumber);
3829 table->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
3830 table->setEndLine(fr.m_data.back().second.m_lineNumber);
3833 const auto &row = lineData.first;
3835 if (row.asString().startsWith(Trait::latin1ToString(
" "))) {
3842 if (p == line.length()) {
3846 if (line[p] == Trait::latin1ToChar(sep)) {
3847 line.remove(0, p + 1);
3850 for (p = line.length() - 1; p >= 0; --p) {
3851 if (!line[p].isSpace()) {
3860 if (line[p] == Trait::latin1ToChar(sep)) {
3861 line.remove(p, line.length() - p);
3865 columns.second.insert(columns.second.begin(), row.virginPos(0));
3866 columns.second.push_back(row.virginPos(row.length() - 1));
3869 tr->setStartColumn(row.virginPos(0));
3870 tr->setStartLine(lineData.second.m_lineNumber);
3871 tr->setEndColumn(row.virginPos(row.length() - 1));
3872 tr->setEndLine(lineData.second.m_lineNumber);
3876 for (
auto it = columns.first.begin(), last = columns.first.end(); it != last; ++it, ++col) {
3877 if (col == columnsCount) {
3882 c->setStartColumn(columns.second.at(col));
3883 c->setStartLine(lineData.second.m_lineNumber);
3884 c->setEndColumn(columns.second.at(col + 1));
3885 c->setEndLine(lineData.second.m_lineNumber);
3887 if (!it->isEmpty()) {
3888 it->replace(Trait::latin1ToString(
"|"), Trait::latin1ToChar(sep));
3891 fragment.push_back({*it, lineData.second});
3892 MdBlock<Trait> block = {fragment, 0};
3896 RawHtmlBlock<Trait> html;
3898 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
3899 collectRefLinks,
false, html,
false);
3901 if (!p->isEmpty()) {
3902 for (
auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it ) {
3903 switch ((*it)->type()) {
3905 const auto pp = std::static_pointer_cast<Paragraph<Trait>>(*it);
3907 for (
auto it = pp->items().cbegin(), last = pp->items().cend(); it != last; ++it) {
3908 c->appendItem((*it));
3914 c->appendItem((*it));
3920 if (html.m_html.get()) {
3921 c->appendItem(html.m_html);
3929 table->appendRow(tr);
3935 auto fmt = fr.m_data.at(1).first;
3937 auto columns = fmt.split(
typename Trait::InternalString(Trait::latin1ToChar(sep)));
3939 for (
auto it = columns.begin(), last = columns.end(); it != last; ++it) {
3940 *it = it->simplified();
3942 if (!it->isEmpty()) {
3945 if (it->asString().endsWith(Trait::latin1ToString(
":")) &&
3946 it->asString().startsWith(Trait::latin1ToString(
":"))) {
3948 }
else if (it->asString().endsWith(Trait::latin1ToString(
":"))) {
3952 table->setColumnAlignment(table->columnsCount(), a);
3957 fr.m_data.erase(fr.m_data.cbegin() + 1);
3959 long long int r = 0;
3961 for (
const auto &line : std::as_const(fr.m_data)) {
3962 if (!parseTableRow(line)) {
3969 fr.m_data.erase(fr.m_data.cbegin(), fr.m_data.cbegin() + r);
3971 if (!table->isEmpty() && !collectRefLinks) {
3972 parent->appendItem(table);
3978template<
class Trait>
3980isH(
const typename Trait::String &s,
3981 const typename Trait::Char &c)
3989 const auto start = p;
3991 for (; p < s.size(); ++p) {
3997 if (p -
start < 1) {
4001 for (; p < s.size(); ++p) {
4002 if (!s[p].isSpace()) {
4011template<
class Trait>
4013isH1(
const typename Trait::String &s)
4015 return isH<Trait>(s, Trait::latin1ToChar(
'='));
4019template<
class Trait>
4021isH2(
const typename Trait::String &s)
4023 return isH<Trait>(s, Trait::latin1ToChar(
'-'));
4027template<
class Trait>
4028inline std::pair<long long int, long long int>
4034 return {pos - 1, line};
4037 for (
long long int i = 0; i < static_cast<long long int>(fr.
m_data.size()); ++i) {
4038 if (fr.
m_data.at(i).second.m_lineNumber == line) {
4040 return {fr.
m_data.at(i - 1).first.virginPos(fr.
m_data.at(i - 1).first.length() - 1),
4050template<
class Trait>
4051inline std::pair<long long int, long long int>
4056 for (
long long int i = 0; i < static_cast<long long int>(fr.
m_data.size()); ++i) {
4057 if (fr.
m_data.at(i).second.m_lineNumber == line) {
4058 if (fr.
m_data.at(i).first.virginPos(fr.
m_data.at(i).first.length() - 1) >= pos + 1) {
4059 return {pos + 1, line};
4060 }
else if (i + 1 <
static_cast<long long int>(fr.
m_data.size())) {
4061 return {fr.
m_data.at(i + 1).first.virginPos(0), fr.
m_data.at(i + 1).second.m_lineNumber};
4071template<
class Trait>
4073Parser<Trait>::parseParagraph(MdBlock<Trait> &fr,
4074 std::shared_ptr<Block<Trait>> parent,
4076 typename Trait::StringList &linksToParse,
4077 const typename Trait::String &workingPath,
4078 const typename Trait::String &fileName,
4079 bool collectRefLinks,
4080 RawHtmlBlock<Trait> &html)
4082 parseFormattedTextLinksImages(fr, parent, doc, linksToParse, workingPath, fileName,
4083 collectRefLinks,
false, html,
false);
4086template<
class Trait>
4091 return html->isFreeTag();
4097 html->setFreeTag(on);
4101template<
class Trait>
4102inline typename Parser<Trait>::Delims
4107 for (
long long int line = 0; line < (
long long int)fr.size(); ++line) {
4108 const typename Trait::String &str = fr.at(line).first.asString();
4110 const auto withoutSpaces = str.sliced(p);
4113 d.push_back({Delimiter::HorizontalLine, line, 0, str.length(),
false,
false,
false});
4115 d.push_back({Delimiter::H1, line, 0, str.length(),
false,
false,
false});
4117 d.push_back({Delimiter::H2, line, 0, str.length(),
false,
false,
false});
4119 bool backslash =
false;
4122 for (
long long int i = p; i < str.size(); ++i) {
4125 if (str[i] == Trait::latin1ToChar(
'\\') && !backslash) {
4130 else if ((str[i] == Trait::latin1ToChar(
'_') || str[i] == Trait::latin1ToChar(
'*')) && !backslash) {
4131 typename Trait::String style;
4133 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() :
true);
4134 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4135 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4136 const bool alNumBefore = (i > 0 ? str[i - 1].isLetterOrNumber() :
false);
4138 const auto ch = str[i];
4140 while (i < str.length() && str[i] == ch) {
4141 style.push_back(str[i]);
4145 typename Delimiter::DelimiterType dt = Delimiter::Unknown;
4147 if (ch == Trait::latin1ToChar(
'*')) {
4148 dt = Delimiter::Emphasis1;
4150 dt = Delimiter::Emphasis2;
4153 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() :
true);
4154 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) :
true);
4155 const bool alNumAfter = (i < str.length() ? str[i].isLetterOrNumber() :
false);
4156 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore))
4157 && !(ch == Trait::latin1ToChar(
'_') && alNumBefore && alNumAfter);
4158 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)))
4159 && !(ch == Trait::latin1ToChar(
'_') && alNumBefore && alNumAfter);
4161 if (leftFlanking || rightFlanking) {
4162 for (
auto j = 0; j < style.length(); ++j) {
4163 d.push_back({dt, line, i - style.length() + j, 1,
4164 word,
false, leftFlanking, rightFlanking});
4175 else if (str[i] == Trait::latin1ToChar(
'~') && !backslash) {
4176 typename Trait::String style;
4178 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() :
true);
4179 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4180 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4182 while (i < str.length() && str[i] == Trait::latin1ToChar(
'~')) {
4183 style.push_back(str[i]);
4187 if (style.length() <= 2) {
4188 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() : true);
4189 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) : true);
4190 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore));
4191 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)));
4193 if (leftFlanking || rightFlanking) {
4194 d.push_back({Delimiter::Strikethrough,
4214 else if (str[i] == Trait::latin1ToChar(
'[') && !backslash) {
4215 d.push_back({Delimiter::SquareBracketsOpen, line, i, 1, word,
false});
4220 else if (str[i] == Trait::latin1ToChar(
'!') && !backslash) {
4221 if (i + 1 < str.length()) {
4222 if (str[i + 1] == Trait::latin1ToChar(
'[')) {
4223 d.push_back({Delimiter::ImageOpen, line, i, 2, word,
false});
4236 else if (str[i] == Trait::latin1ToChar(
'(') && !backslash) {
4237 d.push_back({Delimiter::ParenthesesOpen, line, i, 1, word,
false});
4242 else if (str[i] == Trait::latin1ToChar(
']') && !backslash) {
4243 d.push_back({Delimiter::SquareBracketsClose, line, i, 1, word,
false});
4248 else if (str[i] == Trait::latin1ToChar(
')') && !backslash) {
4249 d.push_back({Delimiter::ParenthesesClose, line, i, 1, word,
false});
4254 else if (str[i] == Trait::latin1ToChar(
'<') && !backslash) {
4255 d.push_back({Delimiter::Less, line, i, 1, word,
false});
4260 else if (str[i] == Trait::latin1ToChar(
'>') && !backslash) {
4261 d.push_back({Delimiter::Greater, line, i, 1, word,
false});
4266 else if (str[i] == Trait::latin1ToChar(
'`')) {
4267 typename Trait::String code;
4269 while (i < str.length() && str[i] == Trait::latin1ToChar(
'`')) {
4270 code.push_back(str[i]);
4274 d.push_back({Delimiter::InlineCode,
4276 i - code.length() - (backslash ? 1 : 0),
4277 code.length() + (backslash ? 1 : 0),
4286 else if (str[i] == Trait::latin1ToChar(
'$')) {
4287 typename Trait::String m;
4289 while (i < str.length() && str[i] == Trait::latin1ToChar(
'$')) {
4290 m.push_back(str[i]);
4294 if (m.length() <= 2 && !backslash) {
4295 d.push_back({Delimiter::Math, line, i - m.length(), m.length(),
4296 false,
false,
false,
false});
4317template<
class Trait>
4321 return (s.endsWith(Trait::latin1ToString(
" ")) || s.endsWith(Trait::latin1ToString(
"\\")));
4325template<
class Trait>
4329 return (s.endsWith(Trait::latin1ToString(
" ")) ? 2 : 1);
4333template<
class Trait>
4334inline typename Trait::String
4337 if (s.endsWith(Trait::latin1ToString(
"\\"))) {
4338 return s.sliced(0, s.size() - 1);
4345template<
class Trait>
4356template<
class Trait>
4360 long long int startPos,
4361 long long int startLine,
4362 long long int endPos,
4363 long long int endLine,
4364 bool doRemoveSpacesAtEnd =
false)
4366 if (endPos < 0 && endLine - 1 >= 0) {
4367 endPos = po.
m_fr.m_data.at(endLine - 1).first.length() - 1;
4371 if (endPos == po.
m_fr.m_data.at(endLine).first.length() - 1) {
4372 doRemoveSpacesAtEnd =
true;
4375 auto s = removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(text));
4377 if (doRemoveSpacesAtEnd) {
4378 removeSpacesAtEnd<typename Trait::String>(s);
4381 if (startPos == 0) {
4383 const auto p = skipSpaces<Trait>(0, s);
4397 t->setStartColumn(po.
m_fr.m_data.at(startLine).first.virginPos(startPos));
4398 t->setStartLine(po.
m_fr.m_data.at(startLine).second.m_lineNumber);
4399 t->setEndColumn(po.
m_fr.m_data.at(endLine).first.virginPos(endPos,
true));
4400 t->setEndLine(po.
m_fr.m_data.at(endLine).second.m_lineNumber);
4402 initLastItemWithOpts<Trait>(po, t);
4404 po.
m_parent->setEndColumn(t->endColumn());
4405 po.
m_parent->setEndLine(t->endLine());
4413 po.
m_pos = startPos;
4418template<
class Trait>
4422 long long int startPos,
4423 long long int startLine,
4424 long long int endPos,
4425 long long int endLine)
4427 makeTextObject(text, po, startPos, startLine, endPos, endLine,
true);
4430 hr->setText(po.
m_fr.m_data.at(endLine).first.asString().sliced(endPos + 1));
4431 hr->setStartColumn(po.
m_fr.m_data.at(endLine).first.virginPos(endPos + 1));
4432 hr->setStartLine(po.
m_fr.m_data.at(endLine).second.m_lineNumber);
4433 hr->setEndColumn(po.
m_fr.m_data.at(endLine).first.virginPos(po.
m_fr.m_data.at(endLine).first.length() - 1));
4434 hr->setEndLine(po.
m_fr.m_data.at(endLine).second.m_lineNumber);
4435 po.
m_parent->setEndColumn(hr->endColumn());
4436 po.
m_parent->setEndLine(hr->endLine());
4443template<
class Trait>
4446 long long int lastLine)
4451 for (; i <= lastLine; ++i) {
4452 const auto h = isTableHeader<Trait>(po.
m_fr.m_data[i].first.asString());
4453 const auto c = i + 1 <
static_cast<long long int>(po.
m_fr.m_data.size()) ?
4454 isTableAlignment<Trait>(po.
m_fr.m_data[i + 1].first.asString()) : 0;
4456 if (h && c && c == h) {
4473template<
class Trait>
4480 long long int lastLine,
4482 long long int lastPos,
4485 if (po.
m_line > lastLine) {
4487 }
else if (po.
m_line == lastLine && po.
m_pos >= lastPos) {
4491 typename Trait::String text;
4493 const auto isLastChar = po.
m_pos >= po.
m_fr.m_data.at(po.
m_line).first.length();
4494 long long int startPos = (isLastChar ? 0 : po.
m_pos);
4495 long long int startLine = (isLastChar ? po.
m_line + 1 : po.
m_line);
4499 (po.
m_line == lastLine ? (lastPos == po.
m_fr.m_data.at(po.
m_line).first.length() &&
4500 isLineBreak<Trait>(po.
m_fr.m_data.at(po.
m_line).first.virginSubString())) :
4501 isLineBreak<Trait>(po.
m_fr.m_data.at(po.
m_line).first.virginSubString())));
4504 auto makeTOWLB = [&]() {
4505 if (po.
m_line != (
long long int)(po.
m_fr.m_data.size() - 1)) {
4506 const auto &line = po.
m_fr.m_data.at(po.
m_line).first.asString();
4509 line.length() - lineBreakLength<Trait>(line) - 1, po.
m_line);
4512 startLine = po.
m_line + 1;
4519 text.push_back(removeLineBreak<Trait>(po.
m_fr.m_data.at(po.
m_line).first.virginSubString(po.
m_pos)));
4523 const auto length = (po.
m_line == lastLine ?
4525 const auto s = po.
m_fr.m_data.at(po.
m_line).first.virginSubString(po.
m_pos, length);
4534 po.
m_line == lastLine ? lastPos - 1 : po.
m_fr.m_data.at(po.
m_line).first.length() - 1,
4540 if (po.
m_line != lastLine) {
4547 isLineBreak<Trait>(po.
m_fr.m_data.at(po.
m_line).first.asString()));
4549 const auto s = (lineBreak ? removeLineBreak<Trait>(po.
m_fr.m_data.at(po.
m_line).first.virginSubString()) :
4550 po.
m_fr.m_data.at(po.
m_line).first.virginSubString());
4564 lastPos == po.
m_fr.m_data.at(po.
m_line).first.length() &&
4565 isLineBreak<Trait>(po.
m_fr.m_data.at(po.
m_line).first.asString()));
4567 auto s = po.
m_fr.m_data.at(po.
m_line).first.virginSubString(0, lastPos);
4576 s = removeLineBreak<Trait>(s);
4585template<
class Trait>
4591 while (l < (
long long int)fr.size()) {
4592 p = skipSpaces<Trait>(p, fr[l].first.asString());
4594 if (p < fr[l].first.length()) {
4604template<
class Trait>
4605inline std::pair<bool, bool>
4610 static const typename Trait::String notAllowed = Trait::latin1ToString(
"\"`=<'");
4612 const auto start = p;
4614 for (; p < fr[l].first.length(); ++p) {
4615 if (fr[l].first[p].isSpace()) {
4617 }
else if (notAllowed.contains(fr[l].first[p])) {
4618 return {
false,
false};
4619 }
else if (fr[l].first[p] == Trait::latin1ToChar(
'>')) {
4628template<
class Trait>
4629inline std::pair<bool, bool>
4634 if (p < fr[l].first.length() && fr[l].first[p] != Trait::latin1ToChar(
'"') &&
4635 fr[l].first[p] != Trait::latin1ToChar(
'\'')) {
4636 return readUnquotedHtmlAttrValue<Trait>(l, p, fr);
4639 const auto s = fr[l].first[p];
4643 if (p >= fr[l].first.length()) {
4644 return {
false,
false};
4647 for (; l < (
long long int)fr.size(); ++l) {
4648 bool doBreak =
false;
4650 for (; p < fr[l].first.length(); ++p) {
4651 const auto ch = fr[l].first[p];
4667 if (l >= (
long long int)fr.size()) {
4668 return {
false,
false};
4671 if (p >= fr[l].first.length()) {
4672 return {
false,
false};
4675 if (fr[l].first[p] != s) {
4676 return {
false,
false};
4681 return {
true,
true};
4685template<
class Trait>
4686inline std::pair<bool, bool>
4692 long long int tl = l, tp = p;
4694 skipSpacesInHtml<Trait>(l, p, fr);
4696 if (l >= (
long long int)fr.size()) {
4697 return {
false,
false};
4701 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar(
'/')) {
4702 return {
false,
true};
4706 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar(
'>')) {
4707 return {
false,
true};
4710 if (checkForSpace) {
4711 if (tl == l && tp == p) {
4712 return {
false,
false};
4716 const auto start = p;
4718 for (; p < fr[l].first.length(); ++p) {
4719 const auto ch = fr[l].first[p];
4721 if (ch.isSpace() || ch == Trait::latin1ToChar(
'>') || ch == Trait::latin1ToChar(
'=')) {
4726 const typename Trait::String name = fr[l].first.asString().sliced(
start, p -
start).toLower();
4728 if (!name.
startsWith(Trait::latin1ToString(
"_")) && !name.
startsWith(Trait::latin1ToString(
":")) &&
4730 return {
false,
false};
4733 static const typename Trait::String allowedInName =
4734 Trait::latin1ToString(
"abcdefghijklmnopqrstuvwxyz0123456789_.:-");
4736 for (
long long int i = 1; i < name.
length(); ++i) {
4737 if (!allowedInName.contains(name[i])) {
4738 return {
false,
false};
4743 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar(
'>')) {
4744 return {
false,
true};
4750 skipSpacesInHtml<Trait>(l, p, fr);
4752 if (l >= (
long long int)fr.size()) {
4753 return {
false,
false};
4757 if (p < fr[l].first.length()) {
4758 if (fr[l].first[p] != Trait::latin1ToChar(
'=')) {
4762 return {
true,
true};
4767 return {
true,
false};
4770 skipSpacesInHtml<Trait>(l, p, fr);
4772 if (l >= (
long long int)fr.size()) {
4773 return {
false,
false};
4776 return readHtmlAttrValue<Trait>(l, p, fr);
4780template<
class Trait>
4781inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
4782isHtmlTag(
long long int line,
long long int pos, TextParsingOpts<Trait> &po,
int rule);
4785template<
class Trait>
4792 static const std::set<typename Trait::String> s_rule1Finish = {Trait::latin1ToString(
"/pre"),
4793 Trait::latin1ToString(
"/script"),
4794 Trait::latin1ToString(
"/style"),
4795 Trait::latin1ToString(
"/textarea")};
4797 auto p = skipSpaces<Trait>(pos, po.
m_fr.m_data[line].first.asString());
4799 while (p < po.
m_fr.m_data[line].first.length()) {
4803 typename Trait::String tag;
4805 std::tie(ok, l, p, std::ignore, tag) =
isHtmlTag(line, p, po, rule);
4818 if (s_rule1Finish.find(tag.toLower()) != s_rule1Finish.cend() && l == line) {
4827 p = skipSpaces<Trait>(p, po.
m_fr.m_data[line].first.asString());
4830 if (p >= po.
m_fr.m_data[line].first.length()) {
4838template<
class Trait>
4841 long long int startLine,
4842 long long int endLine)
4844 for (; startLine <= endLine; ++startLine) {
4845 const auto pos = skipSpaces<Trait>(0, po.
m_fr.m_data.at(startLine).first.asString());
4846 const auto line = po.
m_fr.m_data.at(startLine).first.asString().sliced(pos);
4848 if ((isH1<Trait>(line) || isH2<Trait>(line)) && pos < 4) {
4857template<
class Trait>
4858inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
4864 if (po.
m_fr.m_data[line].first[pos] != Trait::latin1ToChar(
'<')) {
4865 return {
false, line, pos,
false, {}};
4868 typename Trait::String tag;
4870 long long int l = line;
4871 long long int p = pos + 1;
4875 const auto tmp = skipSpaces<Trait>(0, po.
m_fr.m_data[l].first.asString());
4876 first = (tmp == pos);
4879 if (p >= po.
m_fr.m_data[l].first.length()) {
4880 return {
false, line, pos, first, tag};
4883 bool closing =
false;
4885 if (po.
m_fr.m_data[l].first[p] == Trait::latin1ToChar(
'/')) {
4888 tag.push_back(Trait::latin1ToChar(
'/'));
4893 const auto start = p;
4896 for (; p < po.
m_fr.m_data[l].first.length(); ++p) {
4897 const auto ch = po.
m_fr.m_data[l].first[p];
4899 if (ch.isSpace() || ch == Trait::latin1ToChar(
'>') || ch == Trait::latin1ToChar(
'/')) {
4904 tag.push_back(po.
m_fr.m_data[l].first.asString().sliced(
start, p -
start));
4906 if (p < po.
m_fr.m_data[l].first.length() && po.
m_fr.m_data[l].first[p] == Trait::latin1ToChar(
'/')) {
4907 if (p + 1 < po.
m_fr.m_data[l].first.length() &&
4908 po.
m_fr.m_data[l].first[p + 1] == Trait::latin1ToChar(
'>')) {
4909 long long int tmp = 0;
4912 tmp = skipSpaces<Trait>(p + 2, po.
m_fr.m_data[l].first.asString());
4915 bool onLine = (first && (rule == 7 ? tmp == po.
m_fr.m_data[l].first.length() :
4919 return {
true, l, p + 1, onLine, tag};
4921 return {
false, line, pos, first, tag};
4924 return {
false, line, pos, first, tag};
4928 if (p < po.
m_fr.m_data[l].first.length() && po.
m_fr.m_data[l].first[p] == Trait::latin1ToChar(
'>')) {
4929 long long int tmp = 0;
4932 tmp = skipSpaces<Trait>(p + 1, po.
m_fr.m_data[l].first.asString());
4935 bool onLine = (first && (rule == 7 ? tmp == po.
m_fr.m_data[l].first.length() :
4939 return {
true, l, p, onLine, tag};
4941 return {
false, line, pos, first, tag};
4945 skipSpacesInHtml<Trait>(l, p, po.
m_fr.m_data);
4947 if (l >= (
long long int)po.
m_fr.m_data.size()) {
4948 return {
false, line, pos, first, tag};
4951 if (po.
m_fr.m_data[l].first[p] == Trait::latin1ToChar(
'>')) {
4952 long long int tmp = 0;
4955 tmp = skipSpaces<Trait>(p + 1, po.
m_fr.m_data[l].first.asString());
4958 bool onLine = (first && (rule == 7 ? tmp == po.
m_fr.m_data[l].first.length() :
4962 return {
true, l, p, onLine, tag};
4964 return {
false, line, pos, first, tag};
4969 bool firstAttr =
true;
4974 std::tie(attr, ok) = readHtmlAttr<Trait>(l, p, po.
m_fr.m_data, !firstAttr);
4978 if (closing && attr) {
4979 return {
false, line, pos, first, tag};
4983 return {
false, line, pos, first, tag};
4987 if (po.
m_fr.m_data[l].first[p] == Trait::latin1ToChar(
'/')) {
4990 skipSpacesInHtml<Trait>(l, p, po.
m_fr.m_data);
4992 if (l >= (
long long int)po.
m_fr.m_data.size()) {
4993 return {
false, line, pos, first, tag};
4997 if (po.
m_fr.m_data[l].first[p] == Trait::latin1ToChar(
'>')) {
4998 long long int tmp = 0;
5001 tmp = skipSpaces<Trait>(p + 1, po.
m_fr.m_data[l].first.asString());
5004 bool onLine = (first && (rule == 7 ? tmp == po.
m_fr.m_data[l].first.length() :
5008 return {
true, l, p, onLine, tag};
5010 return {
false, line, pos, first, tag};
5014 return {
false, line, pos, first, {}};
5018template<
class Trait>
5019inline std::pair<typename Trait::String, bool>
5020Parser<Trait>::readHtmlTag(
typename Delims::const_iterator it,
5021 TextParsingOpts<Trait> &po)
5023 long long int i = it->m_pos + 1;
5024 const auto start = i;
5026 if (
start >= po.m_fr.m_data[it->m_line].first.length()) {
5030 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5031 const auto ch = po.m_fr.m_data[it->m_line].first[i];
5033 if (ch.isSpace() || ch == Trait::latin1ToChar(
'>')) {
5038 return {po.m_fr.m_data[it->m_line].first.asString().sliced(
start, i -
start),
5039 i < po.m_fr.m_data[it->m_line].first.length() ?
5040 po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar(
'>') : false};
5043template<
class Trait>
5044inline typename Parser<Trait>::Delims::const_iterator
5045Parser<Trait>::findIt(
typename Delims::const_iterator it,
5046 typename Delims::const_iterator last,
5047 TextParsingOpts<Trait> &po)
5051 for (; it != last; ++it) {
5052 if ((it->m_line == po.m_line && it->m_pos < po.m_pos) || it->m_line < po.m_line) {
5063template<
class Trait>
5067 long long int toLine,
5068 long long int toPos,
5073 bool continueEating =
false)
5075 if (line <= toLine) {
5076 typename Trait::String h = po.
m_html.m_html->text();
5078 if (!h.isEmpty() && !continueEating) {
5079 for (
long long int i = 0; i < po.
m_fr.m_emptyLinesBefore; ++i) {
5080 h.push_back(Trait::latin1ToChar(
'\n'));
5084 const auto first = po.
m_fr.m_data[line].first.asString().sliced(
5086 (line == toLine ? (toPos >= 0 ? toPos - pos : po.
m_fr.m_data[line].first.length() - pos) :
5087 po.
m_fr.m_data[line].first.length() - pos));
5089 if (!h.isEmpty() && !first.isEmpty() && po.
m_html.m_html->endLine() != po.
m_fr.m_data[line].second.m_lineNumber) {
5090 h.push_back(Trait::latin1ToChar(
'\n'));
5093 if (!first.isEmpty()) {
5099 for (; line < toLine; ++line) {
5100 h.push_back(Trait::latin1ToChar(
'\n'));
5101 h.push_back(po.
m_fr.m_data[line].first.asString());
5104 if (line == toLine && toPos != 0) {
5105 h.push_back(Trait::latin1ToChar(
'\n'));
5106 h.push_back(po.
m_fr.m_data[line].first.asString().sliced(0, toPos > 0 ?
5107 toPos : po.
m_fr.m_data[line].first.length()));
5110 auto endColumn = toPos;
5111 auto endLine = toLine;
5113 if (endColumn == 0 && endLine > 0) {
5115 endColumn = po.
m_fr.m_data.at(endLine).first.length();
5118 po.
m_html.m_html->setEndColumn(po.
m_fr.m_data.at(endLine).first.virginPos(endColumn >= 0 ?
5119 endColumn - 1 : po.
m_fr.m_data.at(endLine).first.length() - 1));
5120 po.
m_html.m_html->setEndLine(po.
m_fr.m_data.at(endLine).second.m_lineNumber);
5122 po.
m_line = (toPos >= 0 ? toLine : toLine + 1);
5123 po.
m_pos = (toPos >= 0 ? toPos : 0);
5125 if (po.
m_line + 1 <
static_cast<long long int>(po.
m_fr.m_data.size()) &&
5131 po.
m_html.m_html->setText(h);
5137 if (po.
m_html.m_onLine || htmlRule == 7 || po.
m_line < (
long long int)po.
m_fr.m_data.size()) {
5142 initLastItemWithOpts<Trait>(po, po.
m_html.m_html);
5152 po.
m_html.m_continueHtml =
true;
5156template<
class Trait>
5158Parser<Trait>::isNewBlockIn(MdBlock<Trait> &fr,
5159 long long int startLine,
5160 long long int endLine)
5162 for (
auto i = startLine + 1; i <= endLine; ++i) {
5163 const auto type = whatIsTheLine(fr.m_data[i].first);
5166 case Parser<Trait>::BlockType::Footnote:
5167 case Parser<Trait>::BlockType::FensedCodeInList:
5168 case Parser<Trait>::BlockType::SomethingInList:
5169 case Parser<Trait>::BlockType::List:
5170 case Parser<Trait>::BlockType::ListWithFirstEmptyLine:
5171 case Parser<Trait>::BlockType::Code:
5172 case Parser<Trait>::BlockType::Blockquote:
5173 case Parser<Trait>::BlockType::Heading:
5174 case Parser<Trait>::BlockType::EmptyLine:
5181 const auto ns = skipSpaces<Trait>(0, fr.m_data[i].first.asString());
5184 const auto s = fr.m_data[i].first.asString().sliced(ns);
5186 if (isHorizontalLine<Trait>(s) || isH1<Trait>(s) || isH2<Trait>(s)) {
5195template<
class Trait>
5197Parser<Trait>::finishRule1HtmlTag(
typename Delims::const_iterator it,
5198 typename Delims::const_iterator last,
5199 TextParsingOpts<Trait> &po,
5202 static const std::set<typename Trait::String> s_finish = {Trait::latin1ToString(
"/pre"),
5203 Trait::latin1ToString(
"/script"),
5204 Trait::latin1ToString(
"/style"),
5205 Trait::latin1ToString(
"/textarea")};
5209 long long int l = -1, p = -1;
5211 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less && skipFirst) {
5212 std::tie(ok, l, p, po.m_html.m_onLine, std::ignore) =
5213 isHtmlTag(it->m_line, it->m_pos, po, 1);
5216 if (po.m_html.m_onLine) {
5217 for (it = (skipFirst && it != last ? std::next(it) : it); it != last; ++it) {
5218 if (it->m_type == Delimiter::Less) {
5219 typename Trait::String tag;
5220 bool closed =
false;
5222 std::tie(tag, closed) = readHtmlTag(it, po);
5225 if (s_finish.find(tag.toLower()) != s_finish.cend()) {
5226 eatRawHtml(po.m_line, po.m_pos, it->m_line, -1, po,
5227 true, 1, po.m_html.m_onLine);
5234 }
else if (ok && !isNewBlockIn(po.m_fr, it->m_line, l)) {
5235 eatRawHtml(po.m_line, po.m_pos, l, p + 1, po,
true, 1,
false);
5245 if (po.m_html.m_onLine) {
5246 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
false, 1, po.m_html.m_onLine);
5252template<
class Trait>
5254Parser<Trait>::finishRule2HtmlTag(
typename Delims::const_iterator it,
5255 typename Delims::const_iterator last,
5256 TextParsingOpts<Trait> &po)
5259 const auto start = it;
5261 MdLineData::CommentData commentData = {2,
true};
5262 bool onLine = po.m_html.m_onLine;
5264 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5265 long long int i = po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos);
5267 commentData = po.m_fr.m_data[it->m_line].second.m_htmlCommentData[i];
5269 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5270 po.m_html.m_onLine = onLine;
5273 if (commentData.first != -1 && commentData.second) {
5274 for (; it != last; ++it) {
5275 if (it->m_type == Delimiter::Greater) {
5278 bool doContinue =
false;
5280 for (
char i = 0; i < commentData.first; ++i) {
5281 if (!(p > 0 && po.m_fr.m_data[it->m_line].first[p - 1] == Trait::latin1ToChar(
'-'))) {
5294 if (onLine || !isNewBlockIn(po.m_fr,
start->m_line, it->m_line)) {
5296 onLine ? po.m_fr.m_data[it->m_line].first.length() : it->m_pos + 1,
5297 po,
true, 2, onLine);
5308 if (po.m_html.m_onLine) {
5309 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
false, 2, po.m_html.m_onLine);
5315template<
class Trait>
5317Parser<Trait>::finishRule3HtmlTag(
typename Delims::const_iterator it,
5318 typename Delims::const_iterator last,
5319 TextParsingOpts<Trait> &po)
5321 bool onLine = po.m_html.m_onLine;
5324 const auto start = it;
5326 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5327 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5328 po.m_html.m_onLine = onLine;
5331 for (; it != last; ++it) {
5332 if (it->m_type == Delimiter::Greater) {
5333 if (it->m_pos > 0 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar(
'?')) {
5334 long long int i = it->m_pos + 1;
5336 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5337 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar(
'<')) {
5342 if (onLine || !isNewBlockIn(po.m_fr,
start->m_line, it->m_line)) {
5343 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po,
true, 3, onLine);
5354 if (po.m_html.m_onLine) {
5355 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
false, 3, onLine);
5361template<
class Trait>
5363Parser<Trait>::finishRule4HtmlTag(
typename Delims::const_iterator it,
5364 typename Delims::const_iterator last,
5365 TextParsingOpts<Trait> &po)
5368 const auto start = it;
5370 bool onLine = po.m_html.m_onLine;
5372 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5373 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5374 po.m_html.m_onLine = onLine;
5377 for (; it != last; ++it) {
5378 if (it->m_type == Delimiter::Greater) {
5379 long long int i = it->m_pos + 1;
5381 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5382 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar(
'<')) {
5387 if (onLine || !isNewBlockIn(po.m_fr,
start->m_line, it->m_line)) {
5388 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po,
true, 4, onLine);
5398 if (po.m_html.m_onLine) {
5399 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
false, 4,
true);
5405template<
class Trait>
5407Parser<Trait>::finishRule5HtmlTag(
typename Delims::const_iterator it,
5408 typename Delims::const_iterator last,
5409 TextParsingOpts<Trait> &po)
5412 const auto start = it;
5414 bool onLine = po.m_html.m_onLine;
5416 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5417 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5418 po.m_html.m_onLine = onLine;
5421 for (; it != last; ++it) {
5422 if (it->m_type == Delimiter::Greater) {
5423 if (it->m_pos > 1 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar(
']') &&
5424 po.m_fr.m_data[it->m_line].first[it->m_pos - 2] == Trait::latin1ToChar(
']')) {
5425 long long int i = it->m_pos + 1;
5427 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5428 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar(
'<')) {
5433 if (onLine || !isNewBlockIn(po.m_fr,
start->m_line, it->m_line)) {
5434 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po,
true, 5, onLine);
5445 if (po.m_html.m_onLine) {
5446 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
false, 5,
true);
5452template<
class Trait>
5454Parser<Trait>::finishRule6HtmlTag(
typename Delims::const_iterator it,
5455 typename Delims::const_iterator last,
5456 TextParsingOpts<Trait> &po)
5458 po.m_html.m_onLine = (it != last ?
5459 it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()) : true);
5461 if (po.m_html.m_onLine) {
5462 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
5463 false, 6, po.m_html.m_onLine);
5465 const auto nit = std::find_if(std::next(it), last, [](const auto &d) {
5466 return (d.m_type == Delimiter::Greater);
5469 if (nit != last && !isNewBlockIn(po.m_fr, it->m_line, nit->m_line)) {
5470 eatRawHtml(po.m_line, po.m_pos, nit->m_line, nit->m_pos + nit->m_len, po,
5475 if (po.m_fr.m_emptyLineAfter && po.m_html.m_html) {
5476 po.m_html.m_continueHtml =
false;
5480template<
class Trait>
5481inline typename Parser<Trait>::Delims::const_iterator
5482Parser<Trait>::finishRawHtmlTag(
typename Delims::const_iterator it,
5483 typename Delims::const_iterator last,
5484 TextParsingOpts<Trait> &po,
5487 po.m_detected = TextParsingOpts<Trait>::Detected::HTML;
5489 switch (po.m_html.m_htmlBlockType) {
5491 finishRule1HtmlTag(it, last, po, skipFirst);
5495 finishRule2HtmlTag(it, last, po);
5499 finishRule3HtmlTag(it, last, po);
5503 finishRule4HtmlTag(it, last, po);
5507 finishRule5HtmlTag(it, last, po);
5511 finishRule6HtmlTag(it, last, po);
5515 return finishRule7HtmlTag(it, last, po);
5518 po.m_detected = TextParsingOpts<Trait>::Detected::Nothing;
5522 return findIt(it, last, po);
5525template<
class Trait>
5527Parser<Trait>::htmlTagRule(
typename Delims::const_iterator it,
5528 typename Delims::const_iterator last,
5529 TextParsingOpts<Trait> &po)
5533 typename Trait::String tag;
5535 std::tie(tag, std::ignore) = readHtmlTag(it, po);
5537 if (tag.startsWith(Trait::latin1ToString(
"![CDATA["))) {
5541 tag = tag.toLower();
5543 static const typename Trait::String s_validHtmlTagLetters =
5544 Trait::latin1ToString(
"abcdefghijklmnopqrstuvwxyz0123456789-");
5546 bool closing =
false;
5548 if (tag.startsWith(Trait::latin1ToString(
"/"))) {
5553 if (tag.endsWith(Trait::latin1ToString(
"/"))) {
5554 tag.remove(tag.size() - 1, 1);
5557 if (tag.isEmpty()) {
5561 if (!tag.startsWith(Trait::latin1ToString(
"!")) &&
5562 !tag.startsWith(Trait::latin1ToString(
"?")) &&
5563 !(tag[0].unicode() >= 97 && tag[0].unicode() <= 122)) {
5567 static const std::set<typename Trait::String> s_rule1 = {Trait::latin1ToString(
"pre"),
5568 Trait::latin1ToString(
"script"),
5569 Trait::latin1ToString(
"style"),
5570 Trait::latin1ToString(
"textarea")};
5572 if (!closing && s_rule1.find(tag) != s_rule1.cend()) {
5574 }
else if (tag.startsWith(Trait::latin1ToString(
"!--"))) {
5576 }
else if (tag.startsWith(Trait::latin1ToString(
"?"))) {
5578 }
else if (tag.startsWith(Trait::latin1ToString(
"!")) && tag.size() > 1 &&
5579 ((tag[1].unicode() >= 65 && tag[1].unicode() <= 90) ||
5580 (tag[1].unicode() >= 97 && tag[1].unicode() <= 122))) {
5583 static const std::set<typename Trait::String> s_rule6 = {
5584 Trait::latin1ToString(
"address"), Trait::latin1ToString(
"article"), Trait::latin1ToString(
"aside"), Trait::latin1ToString(
"base"),
5585 Trait::latin1ToString(
"basefont"), Trait::latin1ToString(
"blockquote"), Trait::latin1ToString(
"body"), Trait::latin1ToString(
"caption"),
5586 Trait::latin1ToString(
"center"), Trait::latin1ToString(
"col"), Trait::latin1ToString(
"colgroup"), Trait::latin1ToString(
"dd"),
5587 Trait::latin1ToString(
"details"), Trait::latin1ToString(
"dialog"), Trait::latin1ToString(
"dir"), Trait::latin1ToString(
"div"),
5588 Trait::latin1ToString(
"dl"), Trait::latin1ToString(
"dt"), Trait::latin1ToString(
"fieldset"), Trait::latin1ToString(
"figcaption"),
5589 Trait::latin1ToString(
"figure"), Trait::latin1ToString(
"footer"), Trait::latin1ToString(
"form"), Trait::latin1ToString(
"frame"),
5590 Trait::latin1ToString(
"frameset"), Trait::latin1ToString(
"h1"), Trait::latin1ToString(
"h2"), Trait::latin1ToString(
"h3"),
5591 Trait::latin1ToString(
"h4"), Trait::latin1ToString(
"h5"), Trait::latin1ToString(
"h6"), Trait::latin1ToString(
"head"),
5592 Trait::latin1ToString(
"header"), Trait::latin1ToString(
"hr"), Trait::latin1ToString(
"html"), Trait::latin1ToString(
"iframe"),
5593 Trait::latin1ToString(
"legend"), Trait::latin1ToString(
"li"), Trait::latin1ToString(
"link"), Trait::latin1ToString(
"main"),
5594 Trait::latin1ToString(
"menu"), Trait::latin1ToString(
"menuitem"), Trait::latin1ToString(
"nav"), Trait::latin1ToString(
"noframes"),
5595 Trait::latin1ToString(
"ol"), Trait::latin1ToString(
"optgroup"), Trait::latin1ToString(
"option"), Trait::latin1ToString(
"p"),
5596 Trait::latin1ToString(
"param"), Trait::latin1ToString(
"section"), Trait::latin1ToString(
"search"), Trait::latin1ToString(
"summary"),
5597 Trait::latin1ToString(
"table"), Trait::latin1ToString(
"tbody"), Trait::latin1ToString(
"td"), Trait::latin1ToString(
"tfoot"),
5598 Trait::latin1ToString(
"th"), Trait::latin1ToString(
"thead"), Trait::latin1ToString(
"title"), Trait::latin1ToString(
"tr"),
5599 Trait::latin1ToString(
"track"), Trait::latin1ToString(
"ul")};
5601 for (
long long int i = 1; i < tag.size(); ++i) {
5602 if (!s_validHtmlTagLetters.contains(tag[i])) {
5607 if (s_rule6.find(tag) != s_rule6.cend()) {
5612 std::tie(tag, std::ignore, std::ignore, std::ignore, std::ignore) =
5613 isHtmlTag(it->m_line, it->m_pos, po, 7);
5624template<
class Trait>
5625inline typename Parser<Trait>::Delims::const_iterator
5626Parser<Trait>::checkForRawHtml(
typename Delims::const_iterator it,
5627 typename Delims::const_iterator last,
5628 TextParsingOpts<Trait> &po)
5630 const auto rule = htmlTagRule(it, last, po);
5635 po.m_firstInParagraph =
false;
5640 po.m_html.m_htmlBlockType = rule;
5641 po.m_html.m_html.reset(
new RawHtml<Trait>);
5642 po.m_html.m_html->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
5643 po.m_html.m_html->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
5645 return finishRawHtmlTag(it, last, po,
true);
5648template<
class Trait>
5649inline typename Parser<Trait>::Delims::const_iterator
5650Parser<Trait>::finishRule7HtmlTag(
typename Delims::const_iterator it,
5651 typename Delims::const_iterator last,
5652 TextParsingOpts<Trait> &po)
5655 const auto start = it;
5656 long long int l = -1, p = -1;
5657 bool onLine =
false;
5660 std::tie(ok, l, p, onLine, std::ignore) =
isHtmlTag(it->m_line, it->m_pos, po, 7);
5662 onLine = onLine && it->m_line == 0 && l ==
start->m_line;
5665 eatRawHtml(po.m_line, po.m_pos, l, ++p, po, !onLine, 7, onLine);
5667 po.m_html.m_onLine = onLine;
5669 it = findIt(it, last, po);
5672 for (; it != last; ++it) {
5673 if (it->m_type == Delimiter::Less) {
5674 const auto rule = htmlTagRule(it, last, po);
5676 if (rule != -1 && rule != 7) {
5677 eatRawHtml(po.m_line, po.m_pos, it->m_line, it->m_pos, po,
true, 7, onLine,
true);
5679 return std::prev(it);
5684 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
false, 7, onLine,
true);
5686 return std::prev(last);
5694 if (po.m_html.m_onLine) {
5695 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
true, 7,
true);
5706template<
class Trait>
5707inline typename Parser<Trait>::Delims::const_iterator
5708Parser<Trait>::checkForMath(
typename Delims::const_iterator it,
5709 typename Delims::const_iterator last,
5710 TextParsingOpts<Trait> &po)
5712 po.m_wasRefLink =
false;
5713 po.m_firstInParagraph =
false;
5715 const auto end = std::find_if(std::next(it), last, [&](
const auto &d) {
5716 return (d.m_type == Delimiter::Math && d.m_len == it->m_len);
5719 if (end != last &&
end->m_line <= po.m_lastTextLine) {
5720 typename Trait::String math;
5722 if (it->m_line ==
end->m_line) {
5723 math = po.m_fr.m_data[it->m_line].first.asString().sliced(
5724 it->m_pos + it->m_len,
end->m_pos - (it->m_pos + it->m_len));
5726 math = po.m_fr.m_data[it->m_line].first.asString().sliced(it->m_pos + it->m_len);
5728 for (
long long int i = it->m_line + 1; i < end->m_line; ++i) {
5729 math.push_back(Trait::latin1ToChar(
'\n'));
5730 math.push_back(po.m_fr.m_data[i].first.asString());
5733 math.push_back(Trait::latin1ToChar(
'\n'));
5734 math.push_back(po.m_fr.m_data[
end->m_line].first.asString().sliced(0,
end->m_pos));
5737 if (!po.m_collectRefLinks) {
5738 std::shared_ptr<Math<Trait>> m(
new Math<Trait>);
5740 auto startLine = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
5741 auto startColumn = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len);
5743 if (it->m_pos + it->m_len >= po.m_fr.m_data.at(it->m_line).first.length()) {
5744 std::tie(startColumn, startLine) =
nextPosition(po.m_fr, startColumn, startLine);
5747 auto endColumn = po.m_fr.m_data.at(
end->m_line).first.virginPos(
end->m_pos);
5748 auto endLine = po.m_fr.m_data.at(
end->m_line).second.m_lineNumber;
5750 if (endColumn == 0) {
5751 std::tie(endColumn, endLine) =
prevPosition(po.m_fr, endColumn, endLine);
5756 m->setStartColumn(startColumn);
5757 m->setStartLine(startLine);
5758 m->setEndColumn(endColumn);
5759 m->setEndLine(endLine);
5760 m->setInline(it->m_len == 1);
5761 m->setStartDelim({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
5762 po.m_fr.m_data[it->m_line].second.m_lineNumber,
5763 po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
5764 po.m_fr.m_data[it->m_line].second.m_lineNumber});
5765 m->setEndDelim({po.m_fr.m_data[
end->m_line].first.virginPos(
end->m_pos),
5766 po.m_fr.m_data[
end->m_line].second.m_lineNumber,
5767 po.m_fr.m_data[
end->m_line].first.virginPos(
end->m_pos +
end->m_len - 1),
5768 po.m_fr.m_data[
end->m_line].second.m_lineNumber});
5769 m->setFensedCode(
false);
5771 initLastItemWithOpts<Trait>(po, m);
5773 if (math.startsWith(Trait::latin1ToString(
"`")) &&
5774 math.endsWith(Trait::latin1ToString(
"`")) &&
5775 !math.endsWith(Trait::latin1ToString(
"\\`")) &&
5776 math.length() > 1) {
5777 math = math.sliced(1, math.length() - 2);
5782 po.m_parent->appendItem(m);
5784 po.m_pos =
end->m_pos +
end->m_len;
5785 po.m_line =
end->m_line;
5786 po.m_lastText =
nullptr;
5795template<
class Trait>
5796inline typename Parser<Trait>::Delims::const_iterator
5797Parser<Trait>::checkForAutolinkHtml(
typename Delims::const_iterator it,
5798 typename Delims::const_iterator last,
5799 TextParsingOpts<Trait> &po,
5802 const auto nit = std::find_if(std::next(it), last, [](
const auto &d) {
5803 return (d.m_type == Delimiter::Greater);
5807 if (nit->m_line == it->m_line) {
5808 const auto url = po.m_fr.m_data.at(it->m_line).first.asString().sliced(
5809 it->m_pos + 1, nit->m_pos - it->m_pos - 1);
5813 for (
long long int i = 0; i < url.size(); ++i) {
5814 if (url[i].isSpace()) {
5822 if (!isValidUrl<Trait>(url) && !isEmail<Trait>(url)) {
5828 if (!po.m_collectRefLinks) {
5829 std::shared_ptr<Link<Trait>> lnk(
new Link<Trait>);
5830 lnk->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
5831 lnk->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
5832 lnk->setEndColumn(po.m_fr.m_data.at(nit->m_line).first.virginPos(nit->m_pos + nit->m_len - 1));
5833 lnk->setEndLine(po.m_fr.m_data.at(nit->m_line).second.m_lineNumber);
5835 lnk->setOpts(po.m_opts);
5836 lnk->setTextPos({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + 1),
5837 po.m_fr.m_data[it->m_line].second.m_lineNumber,
5838 po.m_fr.m_data[nit->m_line].first.virginPos(nit->m_pos - 1),
5839 po.m_fr.m_data[nit->m_line].second.m_lineNumber});
5840 lnk->setUrlPos(lnk->textPos());
5841 po.m_parent->appendItem(lnk);
5844 po.m_wasRefLink =
false;
5845 po.m_firstInParagraph =
false;
5846 po.m_lastText =
nullptr;
5849 po.m_pos = nit->m_pos + nit->m_len;
5850 po.m_line = nit->m_line;
5855 return checkForRawHtml(it, last, po);
5858 return checkForRawHtml(it, last, po);
5861 return checkForRawHtml(it, last, po);
5865template<
class Trait>
5867Parser<Trait>::makeInlineCode(
long long int startLine,
5868 long long int startPos,
5869 long long int lastLine,
5870 long long int lastPos,
5871 TextParsingOpts<Trait> &po,
5872 typename Delims::const_iterator startDelimIt,
5873 typename Delims::const_iterator endDelimIt)
5875 typename Trait::String c;
5877 for (; po.m_line <= lastLine; ++po.m_line) {
5878 c.push_back(po.m_fr.m_data.at(po.m_line).first.asString().sliced(
5879 po.m_pos, (po.m_line == lastLine ? lastPos - po.m_pos :
5880 po.m_fr.m_data.at(po.m_line).first.length() - po.m_pos)));
5882 if (po.m_line < lastLine) {
5883 c.push_back(Trait::latin1ToChar(
' '));
5889 po.m_line = lastLine;
5891 if (c[0] == Trait::latin1ToChar(
' ') && c[c.size() - 1] == Trait::latin1ToChar(
' ') &&
5892 skipSpaces<Trait>(0, c) < c.size()) {
5894 c.remove(c.size() - 1, 1);
5900 auto code = std::make_shared<Code<Trait>>(c,
false,
true);
5902 code->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
5903 code->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
5904 code->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
5905 code->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
5906 code->setStartDelim({po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
5907 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)),
5908 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber,
5909 po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
5910 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)) +
5911 startDelimIt->m_len - 1 - (startDelimIt->m_backslashed ? 1 : 0),
5912 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber});
5914 {po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
5915 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0)),
5916 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber,
5917 po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
5918 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0) +
5919 endDelimIt->m_len - 1 - (endDelimIt->m_backslashed ? 1 : 0)),
5920 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber});
5921 code->setOpts(po.m_opts);
5923 initLastItemWithOpts<Trait>(po, code);
5925 po.m_parent->appendItem(code);
5928 po.m_wasRefLink =
false;
5929 po.m_firstInParagraph =
false;
5930 po.m_lastText =
nullptr;
5933template<
class Trait>
5934inline typename Parser<Trait>::Delims::const_iterator
5935Parser<Trait>::checkForInlineCode(
typename Delims::const_iterator it,
5936 typename Delims::const_iterator last,
5937 TextParsingOpts<Trait> &po)
5939 const auto len = it->m_len;
5940 const auto start = it;
5942 po.m_wasRefLink =
false;
5943 po.m_firstInParagraph =
false;
5947 for (; it != last; ++it) {
5948 if (it->m_line <= po.m_lastTextLine) {
5949 const auto p = skipSpaces<Trait>(0, po.m_fr.m_data.at(it->m_line).first.asString());
5950 const auto withoutSpaces = po.m_fr.m_data.at(it->m_line).first.asString().sliced(p);
5952 if ((it->m_type == Delimiter::HorizontalLine && withoutSpaces[0] == Trait::latin1ToChar(
'-')) ||
5953 it->m_type == Delimiter::H1 || it->m_type == Delimiter::H2) {
5955 }
else if (it->m_type == Delimiter::InlineCode && (it->m_len - (it->m_backslashed ? 1 : 0)) == len) {
5956 if (!po.m_collectRefLinks) {
5961 makeInlineCode(
start->m_line,
start->m_pos +
start->m_len, it->m_line,
5962 it->m_pos + (it->m_backslashed ? 1 : 0), po,
start, it);
5964 po.m_line = it->m_line;
5965 po.m_pos = it->m_pos + it->m_len;
5975 if (!po.m_collectRefLinks) {
5982template<
class Trait>
5983inline std::pair<typename MdBlock<Trait>::Data,
typename Parser<Trait>::Delims::const_iterator>
5984Parser<Trait>::readTextBetweenSquareBrackets(
typename Delims::const_iterator
start,
5985 typename Delims::const_iterator it,
5986 typename Delims::const_iterator last,
5987 TextParsingOpts<Trait> &po,
5988 bool doNotCreateTextOnFail,
5991 if (it != last && it->m_line <= po.m_lastTextLine) {
5992 if (
start->m_line == it->m_line) {
5994 const auto n = it->m_pos - p;
5997 long long int startPos, startLine, endPos, endLine;
5999 po.m_fr.m_data[
start->m_line].first.virginPos(
6001 po.m_fr.m_data[
start->m_line].second.m_lineNumber);
6002 std::tie(endPos, endLine) =
6003 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6004 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6006 *pos = {startPos, startLine, endPos, endLine};
6009 return {{{po.m_fr.m_data.at(
start->m_line).first.sliced(p, n),
6010 {po.m_fr.m_data.at(
start->m_line).second.m_lineNumber}}}, it};
6012 if (it->m_line -
start->m_line < 3) {
6013 typename MdBlock<Trait>::Data res;
6014 res.push_back({po.m_fr.m_data.at(
start->m_line).first.sliced(
6015 start->m_pos +
start->m_len), po.m_fr.m_data.at(
start->m_line).second});
6017 long long int i =
start->m_line + 1;
6019 for (; i <= it->m_line; ++i) {
6020 if (i == it->m_line) {
6021 res.push_back({po.m_fr.m_data.at(i).first.sliced(0, it->m_pos),
6022 po.m_fr.m_data.at(i).second});
6024 res.push_back({po.m_fr.m_data.at(i).first, po.m_fr.m_data.at(i).second});
6029 long long int startPos, startLine, endPos, endLine;
6031 po.m_fr.m_data[
start->m_line].first.virginPos(
6033 po.m_fr.m_data[
start->m_line].second.m_lineNumber);
6034 std::tie(endPos, endLine) =
6035 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6036 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6038 *pos = {startPos, startLine, endPos, endLine};
6043 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6051 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6059template<
class Trait>
6060inline std::pair<typename MdBlock<Trait>::Data,
typename Parser<Trait>::Delims::const_iterator>
6061Parser<Trait>::checkForLinkText(
typename Delims::const_iterator it,
6062 typename Delims::const_iterator last,
6063 TextParsingOpts<Trait> &po,
6066 const auto start = it;
6068 long long int brackets = 0;
6070 const bool collectRefLinks = po.m_collectRefLinks;
6071 po.m_collectRefLinks =
true;
6072 long long int l = po.m_line, p = po.m_pos;
6074 for (it = std::next(it); it != last; ++it) {
6077 switch (it->m_type) {
6078 case Delimiter::SquareBracketsClose: {