6#ifndef MD4QT_MD_TRAITS_HPP_INCLUDED
7#define MD4QT_MD_TRAITS_HPP_INCLUDED
9#ifdef MD4QT_ICU_STL_SUPPORT
22#include <unicode/uchar.h>
23#include <unicode/unistr.h>
26#include <uriparser/Uri.h>
30#ifdef MD4QT_QT_SUPPORT
32#ifndef MD4QT_ICU_STL_SUPPORT
54template<
class String,
class Char,
class Latin1Char>
92 if (pos + len > m_str.length() || len < 0) {
93 len = m_str.length() - pos;
97 return (m_str.isEmpty() ? m_virginStr : String());
101 String startStr, endStr;
103 if (m_virginStr[virginStartPos] == Latin1Char(
'\t')) {
104 const auto spaces = countOfSpacesForTab(virginStartPos);
106 for (
long long int i = 1; i < spaces; ++i) {
107 if (
virginPos(pos + i) != virginStartPos) {
108 startStr = String(i, Latin1Char(
' '));
115 auto virginEndPos =
virginPos(pos + len - 1,
true);
117 if (m_virginStr[virginEndPos] == Latin1Char(
'\t')) {
118 const auto spaces = countOfSpacesForTab(virginEndPos);
120 for (
long long int i = 1; i < spaces; ++i) {
121 if (
virginPos(pos + len - 1 - i) != virginEndPos) {
122 endStr = String(i, Latin1Char(
' '));
129 return startStr + m_virginStr.sliced(virginStartPos, virginEndPos - virginStartPos + 1) + endStr;
139 bool end =
false)
const
141 for (
auto it = m_changedPos.crbegin(), last = m_changedPos.crend(); it != last; ++it) {
142 pos = virginPosImpl(pos, *it, end);
150 return m_str[position];
156 const auto len = m_str.length();
158 m_str.remove(pos, size);
159 m_str.insert(pos, with);
161 if (with.length() != size) {
162 m_changedPos.push_back({{0, len}, {}});
163 m_changedPos.back().second.push_back({pos, size, with.size()});
174 const auto len = m_str.length();
176 for (
long long int i = 0; i < m_str.size();) {
177 long long int p = m_str.indexOf(what, i);
180 tmp.push_back(m_str.sliced(i, p - i));
185 if (what.size() != with.size()) {
187 m_changedPos.push_back({{0, len}, {}});
191 m_changedPos.back().second.push_back({p, what.size(), with.size()});
194 tmp.push_back(m_str.sliced(i));
200 std::swap(m_str, tmp);
208 const auto len = m_str.length();
210 m_str.remove(pos, size);
212 m_changedPos.push_back({{0, len}, {}});
213 m_changedPos.back().second.push_back({pos, size, 0});
221 return m_str.isEmpty();
227 return m_str.length();
237 const auto len = m_str.length();
240 result.m_str.clear();
244 long long int spaces = 0;
249 while (i <
length() && m_str[i].isSpace()) {
257 result.m_changedPos.push_back({{0, len}, {}});
261 if (i - tmp > 1 || first) {
262 result.m_changedPos.back().second.push_back({tmp, i - tmp, (first ? 0 : 1)});
268 while (i !=
length() && !m_str[i].isSpace()) {
269 result.m_str.push_back(m_str[i]);
277 result.m_str.push_back(Latin1Char(
' '));
280 if (!result.
isEmpty() && result.m_str[result.
length() - 1] == Latin1Char(
' ')) {
281 result.m_str.remove(result.
length() - 1, 1);
284 result.m_changedPos.back().second.back().m_len = 0;
285 }
else if (spaces == 1) {
286 result.m_changedPos.back().second.push_back({m_str.length() - spaces, spaces, 0});
296 std::vector<InternalStringT> result;
297 const auto len = m_str.length();
300 for (
long long int i = 0; i < m_str.length(); ++i) {
303 is.m_changedPos.push_back({{i, len}, {}});
305 result.push_back(is);
311 long long int pos = 0;
312 long long int fpos = 0;
314 while ((fpos = m_str.indexOf(sep.
asString(), pos)) != -1 && fpos <
length()) {
315 if (fpos - pos > 0) {
317 is.m_str = m_str.sliced(pos, fpos - pos);
318 is.m_changedPos.push_back({{pos, len}, {}});
320 result.push_back(is);
323 pos = fpos + sep.
length();
326 if (pos < m_str.length()) {
328 is.m_str = m_str.sliced(pos, m_str.length() - pos);
329 is.m_changedPos.push_back({{pos, len}, {}});
331 result.push_back(is);
341 const auto oldLen = m_str.length();
342 tmp.m_str = tmp.m_str.sliced(pos, (len == -1 ? tmp.m_str.length() - pos : len));
343 tmp.m_changedPos.push_back({{pos, oldLen}, {}});
344 if (len != -1 && len <
length() - pos) {
345 tmp.m_changedPos.back().second.push_back({pos + len,
length() - pos - len, 0});
355 const auto len = m_str.length();
356 tmp.m_str = tmp.m_str.right(n);
357 tmp.m_changedPos.push_back({{
length() - n, len}, {}});
365 return insert(pos, String(1, ch));
371 const auto len = m_str.length();
372 const auto ilen = s.length();
374 m_str.insert(pos, s);
376 m_changedPos.push_back({{0, len}, {}});
377 m_changedPos.back().second.push_back({pos, 1, ilen + 1});
390 long long int m_pos = -1;
391 long long int m_oldLen = -1;
392 long long int m_len = -1;
396 struct LengthAndStartPos {
397 long long int m_firstPos = 0;
398 long long int m_length = 0;
402 std::vector<std::pair<LengthAndStartPos, std::vector<ChangedPos>>> m_changedPos;
405 long long int virginPosImpl(
long long int pos,
406 const std::pair<LengthAndStartPos, std::vector<ChangedPos>> &changed,
409 for (
const auto &c : changed.second) {
410 const auto startPos = c.m_pos;
411 const auto endPos = startPos + c.m_len - 1;
413 if (pos >= startPos && pos <= endPos) {
414 const auto oldEndPos = startPos + c.m_oldLen - 1;
416 if (pos > oldEndPos || end) {
417 return oldEndPos + changed.first.m_firstPos;
419 return pos + changed.first.m_firstPos;
421 }
else if (pos > endPos) {
422 pos += c.m_oldLen - c.m_len;
428 pos += changed.first.m_firstPos;
430 return (pos > changed.first.m_length ? changed.first.m_length : pos);
433 long long int countOfSpacesForTab(
long long int virginPos)
const
437 for (
const auto &v : std::as_const(m_changedPos)) {
438 p += v.first.m_firstPos;
444 for (
const auto &c : std::as_const(v.second)) {
457#ifdef MD4QT_ICU_STL_SUPPORT
472 UnicodeChar(UChar32 ch)
477 operator UChar32()
const
482 inline bool isSpace()
const
484 bool unicodeSpace =
false;
486 const auto type = u_charType(m_ch);
489 case U_SPACE_SEPARATOR:
490 case U_LINE_SEPARATOR:
491 case U_PARAGRAPH_SEPARATOR:
499 return m_ch == 0x20 || (m_ch <= 0x0D && m_ch >= 0x09) ||
500 (m_ch > 127 && (m_ch == 0x85 || m_ch == 0xA0 || unicodeSpace));
503 inline bool isDigit()
const
505 return (u_charType(m_ch) == U_DECIMAL_DIGIT_NUMBER);
508 inline bool isNull()
const
513 inline UChar32 unicode()
const
518 inline bool isLetter()
const
520 const auto type = u_charType(m_ch);
523 case U_UPPERCASE_LETTER:
524 case U_LOWERCASE_LETTER:
525 case U_TITLECASE_LETTER:
526 case U_MODIFIER_LETTER:
535 inline bool isLetterOrNumber()
const
537 return isLetter() || isDigit();
540 inline bool isPunct()
const
542 const auto type = u_charType(m_ch);
545 case U_DASH_PUNCTUATION:
546 case U_START_PUNCTUATION:
547 case U_END_PUNCTUATION:
548 case U_CONNECTOR_PUNCTUATION:
549 case U_OTHER_PUNCTUATION:
550 case U_INITIAL_PUNCTUATION:
551 case U_FINAL_PUNCTUATION:
559 inline bool isSymbol()
const
561 const auto type = u_charType(m_ch);
565 case U_CURRENCY_SYMBOL:
566 case U_MODIFIER_SYMBOL:
575 UnicodeChar toLower()
const
577 return icu::UnicodeString(1, m_ch, 1).toLower().char32At(0);
580 bool operator==(
const UnicodeChar &other)
const
582 return m_ch == other.m_ch;
585 bool operator!=(
const UnicodeChar &other)
const
587 return m_ch != other.m_ch;
599class UnicodeString final :
public icu::UnicodeString
606 UnicodeString(
const icu::UnicodeString &str)
607 : icu::UnicodeString(str)
611 UnicodeString(
char ch)
612 : icu::UnicodeString((char16_t)ch)
616 UnicodeString(
const char16_t *str)
617 : icu::UnicodeString(str)
621 UnicodeString(
const UnicodeChar &ch)
622 : icu::UnicodeString(1, (UChar32)ch, 1)
626 UnicodeString(
const char *str)
627 : icu::UnicodeString(icu::UnicodeString::fromUTF8(str))
631 UnicodeString(
const std::string &str)
632 : icu::UnicodeString(icu::UnicodeString::fromUTF8(str))
636 UnicodeString(
long long int count,
char ch)
637 : icu::UnicodeString((int32_t)count, (UChar32)ch, (int32_t)count)
641 ~UnicodeString()
override =
default;
643 UnicodeChar operator[](
long long int position)
const
645 return UnicodeChar(char32At((int32_t)position));
648 void push_back(
const UnicodeChar &ch)
650 icu::UnicodeString::append((UChar32)ch);
653 void push_back(
const UnicodeString &str)
655 icu::UnicodeString::append(str);
663 int toInt(
bool *ok =
nullptr,
int base = 10)
const
668 const auto result = std::stoi(tmp,
nullptr, base);
673 }
catch (
const std::invalid_argument &) {
677 }
catch (
const std::out_of_range &) {
686 bool contains(
const UnicodeChar &ch)
const
688 return (icu::UnicodeString::indexOf((UChar32)ch) != -1);
691 bool contains(
const UnicodeString &str)
const
693 return (icu::UnicodeString::indexOf(str) != -1);
696 UnicodeString simplified()
const
702 UnicodeString result;
706 while (i < length() && UnicodeChar(char32At(i)).isSpace()) {
710 while (i != length() && !UnicodeChar(char32At(i)).isSpace()) {
711 result.append(UnicodeChar(char32At(i)));
719 result.append(UnicodeChar(
' '));
722 if (!result.isEmpty() && result[result.size() - 1] == UnicodeChar(
' ')) {
723 result.remove(result.size() - 1, 1);
729 std::vector<UnicodeString> split(
const UnicodeChar &ch)
const
731 std::vector<UnicodeString> result;
736 while ((fpos = indexOf(ch, pos)) != -1 && fpos < length()) {
737 if (fpos - pos > 0) {
738 icu::UnicodeString tmp;
739 extract(pos, fpos - pos, tmp);
740 result.push_back(tmp);
746 if (pos < length()) {
747 icu::UnicodeString tmp;
748 extract(pos, length() - pos, tmp);
749 result.push_back(tmp);
755 std::vector<UnicodeString> split(
char ch)
const
757 return split(UnicodeChar(ch));
760 UnicodeString &
replace(
const UnicodeChar &before,
const UnicodeString &after)
762 for (int32_t pos = 0; (pos = indexOf(before, pos)) != -1; pos += after.size()) {
763 icu::UnicodeString::replace(pos, 1, after);
769 UnicodeString &
replace(
const UnicodeString &before,
const UnicodeString &after)
771 for (int32_t pos = 0; (pos = indexOf(before, pos)) != -1; pos += after.size()) {
772 icu::UnicodeString::replace(pos, before.length(), after);
778 UnicodeString sliced(
long long int pos,
long long int len = -1)
const
780 icu::UnicodeString tmp;
781 extract((int32_t)pos, (int32_t)(len == -1 ? length() - pos : len), tmp);
786 UnicodeString
right(
long long int n)
const
788 icu::UnicodeString tmp;
789 extract(length() - (int32_t)n, (int32_t)n, tmp);
794 UnicodeString toCaseFolded()
const
796 icu::UnicodeString tmp = *
this;
802 UnicodeString toUpper()
const
804 icu::UnicodeString tmp = *
this;
810 UnicodeString toLower()
const
812 icu::UnicodeString tmp = *
this;
820 icu::UnicodeString::remove();
831 explicit UrlUri(
const UnicodeString &uriStr)
836 std::string uriString;
837 uriStr.toUTF8String(uriString);
839 if (uriParseSingleUriA(&uri, uriString.c_str(), NULL) == URI_SUCCESS) {
841 m_relative = !(uri.scheme.first && uri.scheme.afterLast);
844 m_scheme = UnicodeString(std::string(uri.scheme.first,
845 uri.scheme.afterLast - uri.scheme.first).c_str());
848 if (uri.hostText.first && uri.hostText.afterLast) {
849 m_host = UnicodeString(std::string(uri.hostText.first,
850 uri.hostText.afterLast - uri.hostText.first).c_str());
853 uriFreeUriMembersA(&uri);
866 bool isRelative()
const
871 UnicodeString scheme()
const
876 UnicodeString host()
const
884 UnicodeString m_scheme;
885 UnicodeString m_host;
893struct UnicodeStringTrait {
895 using Vector = std::vector<T>;
897 template<
class T,
class U>
898 using Map = std::map<T, U>;
900 using String = UnicodeString;
902 using Char = UnicodeChar;
904 using InternalString = InternalStringT<String, Char, Char>;
906 using TextStream = std::istream;
908 using StringList = std::vector<String>;
910 using InternalStringList = std::vector<InternalString>;
915 static bool isUnicodeWhitespace(
const UnicodeChar &ch)
917 const auto c = ch.unicode();
919 if (u_charType(c) == U_SPACE_SEPARATOR) {
921 }
else if (c == 0x09 || c == 0x0A || c == 0x0C || c == 0x0D) {
929 static String utf16ToString(
const char16_t *u16)
931 return UnicodeString(u16);
935 static String latin1ToString(
const char *latin1)
937 return UnicodeString(latin1);
941 static Char latin1ToChar(
char latin1)
943 return UnicodeChar(latin1);
947 static String utf8ToString(
const char *utf8)
949 return UnicodeString(utf8);
953 static bool fileExists(
const String &fileName,
const String &workingPath)
956 (workingPath.isEmpty() ? fileName : String(workingPath +
"/" + fileName)).toUTF8String(path);
960 const auto result = std::filesystem::exists(path, er);
962 return (er ?
false : result);
966 static bool fileExists(
const String &fileName)
969 fileName.toUTF8String(path);
973 const auto result = std::filesystem::exists(path, er);
975 return (er ?
false : result);
979 static String absoluteFilePath(
const String &path)
982 path.toUTF8String(tmp);
984 auto p = std::filesystem::canonical(tmp, er).u8string();
986 std::replace(p.begin(), p.end(),
'\\',
'/');
988 return (er ?
"" : UnicodeString::fromUTF8(p));
992 static void appendUcs4(String &str,
char32_t ch)
994 str.push_back(Char(ch));
1000#ifdef MD4QT_QT_SUPPORT
1007struct QStringTrait {
1011 template<
class T,
class U>
1012 using Map = std::map<T, U>;
1018 using InternalString = InternalStringT<String, Char, QLatin1Char>;
1020 using InternalStringList = std::vector<InternalString>;
1029 static bool isUnicodeWhitespace(
const QChar &ch)
1035 }
else if (c == 0x09 || c == 0x0A || c == 0x0C || c == 0x0D) {
1043 static String utf16ToString(
const char16_t *u16)
1049 static String latin1ToString(
const char *latin1)
1055 static Char latin1ToChar(
char latin1)
1061 static String utf8ToString(
const char *utf8)
1067 static bool fileExists(
const String &fileName,
const String &workingPath)
1070 QString() : workingPath + latin1ToString(
"/")) + fileName);
1074 static bool fileExists(
const String &fileName)
1080 static String absoluteFilePath(
const String &path)
1086 static void appendUcs4(String &str,
char32_t ch)
Internal string, used to get virgin (original) string from transformed string.
Char operator[](long long int position) const
long long int virginPos(long long int pos, bool end=false) const
const String & asString() const
InternalStringT(const String &s)
long long int length() const
std::vector< InternalStringT > split(const InternalStringT &sep) const
Split string.
InternalStringT simplified() const
InternalStringT & insert(long long int pos, const String &s)
Insert string.
String virginSubString(long long int pos=0, long long int len=-1) const
InternalStringT & replace(const String &what, const String &with)
Replace string.
InternalStringT & remove(long long int pos, long long int size)
Remove sub-string.
InternalStringT sliced(long long int pos, long long int len=-1) const
InternalStringT & replaceOne(long long int pos, long long int size, const String &with)
Replace substring.
const String & fullVirginString() const
InternalStringT right(long long int n) const
InternalStringT & insert(long long int pos, Char ch)
Insert one character.
bool fileExists(const QUrl &path)
KIOCORE_EXPORT bool operator!=(const UDSEntry &entry, const UDSEntry &other)
KIOCORE_EXPORT bool operator==(const UDSEntry &entry, const UDSEntry &other)
QString path(const QString &relativePath)
bool isValid(QStringView ifopt)
VehicleSection::Type type(QStringView coachNumber, QStringView coachClassification)
QAction * replace(const QObject *recvr, const char *slot, QObject *parent)
Category category(char32_t ucs4)
QString absoluteFilePath() const const
bool exists() const const
QString fromUtf16(const char16_t *unicode, qsizetype size)
QString fromUtf8(QByteArrayView str)
QTextStream & right(QTextStream &stream)