28 #include <QXmlStreamReader>
29 #include <QStringList>
39 #define QL1S(x) QLatin1String(x)
46 const int maxlen = 80;
47 if (str.length() <= maxlen)
50 return str.left(maxlen).append(QLatin1String(
"..."));
54 class KuitEntityResolver :
public QXmlStreamEntityResolver
65 QString value = entityMap.value(name);
134 class KuitSemanticsStaticData
158 KuitEntityResolver xmlEntityResolver;
160 KuitSemanticsStaticData ();
163 KuitSemanticsStaticData::KuitSemanticsStaticData ()
168 #define SETUP_TAG(tag, name, atts, subs) do { \
169 knownTags.insert(QString::fromLatin1(name), Kuit::Tag::tag); \
170 tagNames.insert(Kuit::Tag::tag, QString::fromLatin1(name)); \
172 using namespace Kuit::Att; \
173 tagAtts[Kuit::Tag::tag] << atts; \
176 using namespace Kuit::Tag; \
177 tagSubs[Kuit::Tag::tag] << subs << NumIntg << NumReal; \
183 Filename << Link << Application << Command << Resource << Icode << \
184 Shortcut << Interface << Emphasis << Placeholder << Email << \
221 #define SETUP_ATT(att, name) do { \
222 knownAtts.insert(QString::fromLatin1(name), Kuit::Att::att); \
236 #define SETUP_FMT(fmt, name) do { \
237 knownFmts.insert(QString::fromLatin1(name), Kuit::Fmt::fmt); \
245 #define SETUP_ROL(rol, name, fmt, cues) do { \
246 knownRols.insert(QString::fromLatin1(name), Kuit::Rol::rol); \
247 defFmts[Kuit::Rol::rol][Kuit::Cue::None] = Kuit::Fmt::fmt; \
249 using namespace Kuit::Cue; \
250 rolCues[Kuit::Rol::rol] << cues; \
268 #undef SETUP_ROLCUEFMT
269 #define SETUP_ROLCUEFMT(rol, cue, fmt) do { \
270 defFmts[Kuit::Rol::rol][Kuit::Cue::cue] = Kuit::Fmt::fmt; \
279 #define SETUP_CUE(cue, name) do { \
280 knownCues.insert(QString::fromLatin1(name), Kuit::Cue::cue); \
311 qtHtmlTagNames <<
QL1S(
"a") <<
QL1S(
"address") <<
QL1S(
"b") <<
QL1S(
"big") <<
QL1S(
"blockquote")
324 #define SETUP_TAG_NL(tag, nlead) do { \
325 leadingNewlines.insert(Kuit::Tag::tag, nlead); \
335 xmlEntities[QString::fromLatin1(
"lt")] =
QString(QLatin1Char(
'<'));
336 xmlEntities[QString::fromLatin1(
"gt")] =
QString(QLatin1Char(
'>'));
337 xmlEntities[QString::fromLatin1(
"amp")] =
QString(QLatin1Char(
'&'));
338 xmlEntities[QString::fromLatin1(
"apos")] =
QString(QLatin1Char(
'\''));
339 xmlEntities[QString::fromLatin1(
"quot")] =
QString(QLatin1Char(
'"'));
340 xmlEntitiesInverse[
QString(QLatin1Char(
'<'))] = QString::fromLatin1(
"lt");
341 xmlEntitiesInverse[
QString(QLatin1Char(
'>'))] = QString::fromLatin1(
"gt");
342 xmlEntitiesInverse[
QString(QLatin1Char(
'&'))] = QString::fromLatin1(
"amp");
343 xmlEntitiesInverse[
QString(QLatin1Char(
'\''))] = QString::fromLatin1(
"apos");
344 xmlEntitiesInverse[
QString(QLatin1Char(
'"'))] = QString::fromLatin1(
"quot");
346 xmlEntities[QString::fromLatin1(
"nbsp")] =
QString(QChar(0xa0));
347 xmlEntityResolver.setEntities(xmlEntities);
356 class KuitSemanticsPrivate
360 KuitSemanticsPrivate (
const QString &lang_);
365 QString metaTr (
const char *ctxt,
const char *
id)
const;
368 void setFormattingPatterns ();
371 void setTextTransformData ();
393 bool hadQtTag =
false,
394 bool hadAnyHtmlTag =
false)
const;
404 typedef enum { Proper, Ignored, Dropout } Handling;
416 KuitSemanticsPrivate::OpenEl parseOpenEl (
const QXmlStreamReader &xml,
428 static void countWrappingNewlines (
const QString &ptext,
429 int &numle,
int &numtr);
453 KuitSemanticsPrivate::KuitSemanticsPrivate (
const QString &lang)
464 m_metaCat =
new KCatalog(QString::fromLatin1(
"kdelibs4"), lang);
467 setFormattingPatterns();
470 setTextTransformData();
477 QString KuitSemanticsPrivate::metaTr (
const char *ctxt,
const char *
id)
const
479 if (m_metaCat == NULL) {
480 return QString::fromLatin1(
id);
482 return m_metaCat->translate(ctxt,
id);
485 void KuitSemanticsPrivate::setFormattingPatterns ()
487 using namespace Kuit;
491 #define SET_PATTERN(tag, atts, fmt, ctxt_ptrn) do { \
494 int akey = attSetKey(aset); \
495 QString pattern = metaTr(ctxt_ptrn); \
496 m_patterns[tag][akey][fmt] = pattern; \
498 if (fmt == Fmt::Plain && !m_patterns[tag][akey].contains(Fmt::Term)) { \
499 m_patterns[tag][akey][Fmt::Term] = pattern; \
505 #define I18N_NOOP2(ctxt, msg) ctxt, msg
510 #define XXXX_NOOP2(ctxt, msg) ctxt, msg
585 "%1 is the note label, %2 is the text",
590 "%1 is the note label, %2 is the text",
602 "<b>Warning</b>: %1"));
605 "%1 is the warning label, %2 is the text",
610 "%1 is the warning label, %2 is the text",
622 "<a href=\"%1\">%1</a>"));
625 "%1 is the URL, %2 is the descriptive text",
630 "%1 is the URL, %2 is the descriptive text",
632 "<a href=\"%1\">%2</a>"));
665 "%1 is the command name, %2 is its man section",
670 "%1 is the command name, %2 is its man section",
750 "<<i>%1</i>>"));
760 "<<a href=\"mailto:%1\">%1</a>>"));
763 "%1 is name, %2 is address",
768 "%1 is name, %2 is address",
770 "<a href=\"mailto:%2\">%1</a>"));
803 void KuitSemanticsPrivate::setTextTransformData ()
807 #define I18N_NOOP2(ctxt, msg) metaTr(ctxt, msg)
828 #define SET_KEYNAME(rawname) do { \
830 QString normname = QString::fromLatin1(rawname).trimmed().toLower(); \
831 m_keyNames[normname] = metaTr("keyboard-key-name", rawname); \
836 #define I18N_NOOP2(ctxt, msg) msg
886 Kuit::FmtVar fmtExplicit = formatFromContextMarker(ctxt, text);
889 if (text.indexOf(QLatin1Char(
'<')) < 0) {
890 return finalizeVisualText(text, fmtExplicit);
897 fmtImplicit = formatFromTags(text);
903 QString wtext = equipTopTag(text, toptag);
906 QString ftext = semanticToVisualText(wtext, fmtExplicit, fmtImplicit);
907 if (ftext.isEmpty()) {
908 return salvageMarkup(text, fmtImplicit);
927 Kuit::FmtVar KuitSemanticsPrivate::formatFromContextMarker (
934 KuitSemanticsStaticData *s = semanticsStaticData;
941 QString ctxmark = ctxmark_.trimmed();
942 if (ctxmark.startsWith(QLatin1Char(
'@'))) {
943 static QRegExp wsRx(QString::fromLatin1(
"\\s"));
944 ctxmark = ctxmark.mid(1, wsRx.indexIn(ctxmark) - 1);
947 int pfmt = ctxmark.indexOf(QLatin1Char(
'/'));
949 fmtname = ctxmark.mid(pfmt + 1);
950 ctxmark = ctxmark.left(pfmt);
954 int pcue = ctxmark.indexOf(QLatin1Char(
':'));
956 cuename = ctxmark.mid(pcue + 1);
957 ctxmark = ctxmark.left(pcue);
966 rolname = rolname.trimmed().toLower();
967 cuename = cuename.trimmed().toLower();
968 fmtname = fmtname.trimmed().toLower();
972 if (s->knownRols.contains(rolname)) {
973 rol = s->knownRols[rolname];
977 if (!rolname.isEmpty()) {
978 kDebug(173) << QString::fromLatin1(
"Unknown semantic role '@%1' in "
979 "context marker for message {%2}.")
986 if (s->knownCues.contains(cuename)) {
987 cue = s->knownCues[cuename];
991 if (!cuename.isEmpty()) {
992 kDebug(173) << QString::fromLatin1(
"Unknown interface subcue ':%1' in "
993 "context marker for message {%2}.")
1000 if (s->knownFmts.contains(fmtname)) {
1001 fmt = s->knownFmts[fmtname];
1007 if (s->defFmts.contains(rol)) {
1008 if (s->defFmts[rol].contains(cue)) {
1009 fmt = s->defFmts[rol][cue];
1019 if (!fmtname.isEmpty()) {
1020 kDebug(173) << QString::fromLatin1(
"Unknown visual format '/%1' in "
1021 "context marker for message {%2}.")
1031 KuitSemanticsStaticData *s = semanticsStaticData;
1032 static QRegExp staticTagRx(QString::fromLatin1(
"<\\s*(\\w+)[^>]*>"));
1034 QRegExp tagRx = staticTagRx;
1035 int p = tagRx.indexIn(text);
1037 QString tagname = tagRx.capturedTexts().at(1).toLower();
1038 if (s->qtHtmlTagNames.contains(tagname)) {
1041 p = tagRx.indexIn(text, p + tagRx.matchedLength());
1049 KuitSemanticsStaticData *s = semanticsStaticData;
1054 static QRegExp opensWithTagRx(QString::fromLatin1(
"^\\s*<\\s*(\\w+)[^>]*>"));
1055 bool explicitTopTag =
false;
1058 int p = opensWithTagRx.indexIn(text);
1062 QString fullmatch = opensWithTagRx.capturedTexts().at(0);
1063 QString tagname = opensWithTagRx.capturedTexts().at(1).toLower();
1064 if (tagname == QLatin1String(
"qt") || tagname == QLatin1String(
"html")) {
1067 text = text.mid(fullmatch.length());
1068 p = opensWithTagRx.indexIn(text);
1074 QString tagname = opensWithTagRx.capturedTexts().at(1).toLower();
1075 if (s->knownTags.contains(tagname)) {
1080 explicitTopTag =
true;
1100 if (!explicitTopTag) {
1101 return QLatin1Char(
'<') + s->tagNames[toptag] + QLatin1Char(
'>')
1103 + QLatin1String(
"</") + s->tagNames[toptag] + QLatin1Char(
'>');
1110 #define ENTITY_SUBRX "[a-z]+|#[0-9]+|#x[0-9a-fA-F]+"
1112 QString KuitSemanticsPrivate::semanticToVisualText (
const QString &text_,
1116 KuitSemanticsStaticData *s = semanticsStaticData;
1122 int p = original.indexOf(QLatin1Char(
'&'));
1124 text.append(original.mid(0, p + 1));
1125 original.remove(0, p + 1);
1126 static QRegExp restRx(QString::fromLatin1(
"^("ENTITY_SUBRX");"));
1127 if (original.indexOf(restRx) != 0) {
1128 text.append(QLatin1String(
"amp;"));
1130 p = original.indexOf(QLatin1Char(
'&'));
1132 text.append(original);
1137 bool hadQtTag =
false;
1138 bool hadAnyHtmlTag =
false;
1139 QStack<OpenEl> openEls;
1140 QXmlStreamReader xml(text);
1141 xml.setEntityResolver(&s->xmlEntityResolver);
1142 QStringRef lastElementName;
1144 while (!xml.atEnd()) {
1147 if (xml.isStartElement()) {
1148 lastElementName = xml.name();
1152 for (
int i = openEls.size() - 1; i >= 0; --i) {
1153 if (openEls[i].handling == OpenEl::Proper) {
1154 etag = openEls[i].tag;
1160 OpenEl oel = parseOpenEl(xml, etag, text);
1161 if (oel.name == QLatin1String(
"qt") || oel.name == QLatin1String(
"html")) {
1164 if (s->qtHtmlTagNames.contains(oel.name)) {
1165 hadAnyHtmlTag =
true;
1172 fmtExp = formatFromContextMarker(oel.avals[
Kuit::Att::Ctx], text);
1184 else if (xml.isEndElement()) {
1186 OpenEl oel = openEls.pop();
1189 if (openEls.isEmpty()) {
1191 return finalizeVisualText(oel.formattedText, fmtExp,
1192 hadQtTag, hadAnyHtmlTag);
1196 QString pt = openEls.top().formattedText;
1197 openEls.top().formattedText += formatSubText(pt, oel, fmtImp, numCtx);
1204 else if (xml.isCharacters()) {
1208 QString text = xml.text().toString();
1210 foreach (
const QChar &c, text) {
1211 if (s->xmlEntitiesInverse.contains(c)) {
1212 const QString entname = s->xmlEntitiesInverse[c];
1213 ntext += QLatin1Char(
'&') + entname + QLatin1Char(
';');
1218 openEls.top().formattedText += ntext;
1222 if (xml.hasError()) {
1223 kDebug(173) << QString::fromLatin1(
"Markup error in message {%1}: %2. Last tag parsed: %3")
1224 .arg(
shorten(text), xml.errorString(), lastElementName.toString());
1232 KuitSemanticsPrivate::OpenEl
1233 KuitSemanticsPrivate::parseOpenEl (
const QXmlStreamReader &xml,
1241 KuitSemanticsStaticData *s = semanticsStaticData;
1244 oel.name = xml.name().toString().toLower();
1248 foreach (
const QXmlStreamAttribute &xatt, xml.attributes()) {
1249 attnams += xatt.name().toString().toLower();
1250 attvals += xatt.value().toString();
1251 QChar qc = attvals.last().indexOf(QLatin1Char(
'\'')) < 0 ? QLatin1Char(
'\'') : QLatin1Char(
'"');
1252 oel.astr += QLatin1Char(
' ') + attnams.last() + QLatin1Char(
'=') + qc + attvals.last() + qc;
1255 if (s->knownTags.contains(oel.name)) {
1256 oel.tag = s->knownTags[oel.name];
1261 oel.handling = OpenEl::Proper;
1264 oel.handling = OpenEl::Dropout;
1265 kDebug(173) << QString::fromLatin1(
"Tag '%1' cannot be subtag of '%2' "
1267 .arg(s->tagNames[oel.tag], s->tagNames[etag],
1273 for (
int i = 0; i < attnams.size(); ++i) {
1274 if (s->knownAtts.contains(attnams[i])) {
1276 if (s->tagAtts[oel.tag].contains(att)) {
1278 oel.avals[att] = attvals[i];
1281 kDebug(173) << QString::fromLatin1(
"Attribute '%1' cannot be used in "
1282 "tag '%2' in message {%3}.")
1283 .arg(attnams[i], oel.name,
1288 kDebug(173) << QString::fromLatin1(
"Unknown semantic tag attribute '%1' "
1290 .arg(attnams[i],
shorten(text));
1293 oel.akey = attSetKey(attset);
1295 else if (oel.name == QLatin1String(
"qt") || oel.name == QLatin1String(
"html")) {
1297 oel.handling = OpenEl::Dropout;
1300 oel.handling = OpenEl::Ignored;
1301 if (!s->qtHtmlTagNames.contains(oel.name)) {
1302 kDebug(173) << QString::fromLatin1(
"Tag '%1' is neither semantic nor HTML in "
1304 .arg(oel.name,
shorten(text));
1315 QString pattern = QString::fromLatin1(
"%1");
1318 if ( m_patterns.contains(tag)
1319 && m_patterns[tag].contains(akey)
1320 && m_patterns[tag][akey].contains(fmt))
1322 pattern = m_patterns[tag][akey][fmt];
1328 QString KuitSemanticsPrivate::formatSubText (
const QString &ptext,
1333 KuitSemanticsStaticData *s = semanticsStaticData;
1335 if (oel.handling == OpenEl::Proper) {
1337 QString pattern = visualPattern(oel.tag, oel.akey, fmt);
1340 QString mtext = modifyTagText(oel.formattedText, oel.tag, oel.avals,
1343 using namespace Kuit;
1348 ftext = pattern.arg(oel.avals[
Att::Url], mtext);
1357 ftext = pattern.arg(oel.avals[
Att::Label], mtext);
1360 ftext = pattern.arg(oel.avals[
Att::Label], mtext);
1363 ftext = pattern.arg(mtext);
1368 if (!ptext.isEmpty() && s->leadingNewlines.contains(oel.tag)) {
1370 int pnumle, pnumtr, fnumle, fnumtr;
1371 countWrappingNewlines(ptext, pnumle, pnumtr);
1372 countWrappingNewlines(ftext, fnumle, fnumtr);
1374 int numle = pnumtr + fnumle;
1377 if (numle < s->leadingNewlines[oel.tag]) {
1378 strle =
QString(s->leadingNewlines[oel.tag] - numle, QLatin1Char(
'\n'));
1380 ftext = strle + ftext;
1385 else if (oel.handling == OpenEl::Ignored) {
1386 if (oel.name == QLatin1String(
"br") || oel.name == QLatin1String(
"hr")) {
1388 return QLatin1Char(
'<') + oel.name + QLatin1String(
"/>");
1391 return QLatin1Char(
'<') + oel.name + oel.astr + QLatin1Char(
'>')
1393 + QLatin1String(
"</") + oel.name + QLatin1Char(
'>');
1397 return oel.formattedText;
1401 void KuitSemanticsPrivate::countWrappingNewlines (
const QString &text,
1402 int &numle,
int &numtr)
1404 int len = text.length();
1407 while (numle < len && text[numle] == QLatin1Char(
'\n')) {
1412 while (numtr < len && text[len - numtr - 1] == QLatin1Char(
'\n')) {
1417 QString KuitSemanticsPrivate::modifyTagText (
const QString &text,
1429 const QChar fillChar = !fillStr.isEmpty() ? fillStr[0] : QChar::fromLatin1(
' ');
1430 return QString::fromLatin1(
"%1").arg(
KGlobal::locale()->formatNumber(text,
false),
1431 fieldWidth, fillChar);
1434 return QDir::toNativeSeparators(text);
1447 QString KuitSemanticsPrivate::finalizeVisualText (
const QString &
final,
1450 bool hadAnyHtmlTag)
const
1452 KuitSemanticsStaticData *s = semanticsStaticData;
1460 static QRegExp staticEntRx(QLatin1String(
"&("ENTITY_SUBRX");"));
1463 QRegExp entRx = staticEntRx;
1464 int p = entRx.indexIn(text);
1467 QString ent = entRx.capturedTexts().at(1);
1468 plain.append(text.mid(0, p));
1469 text.remove(0, p + ent.length() + 2);
1470 if (ent.startsWith(QLatin1Char(
'#'))) {
1473 if (ent[1] == QLatin1Char(
'x')) {
1474 c = QChar(ent.mid(2).toInt(&ok, 16));
1476 c = QChar(ent.mid(1).toInt(&ok, 10));
1481 plain.append(QLatin1Char(
'&') + ent + QLatin1Char(
';'));
1484 else if (s->xmlEntities.contains(ent)) {
1485 plain.append(s->xmlEntities[ent]);
1487 plain.append(QLatin1Char(
'&') + ent + QLatin1Char(
';'));
1489 p = entRx.indexIn(text);
1497 text = QString::fromLatin1(
"<html>") + text + QLatin1String(
"</html>");
1503 QString KuitSemanticsPrivate::salvageMarkup (
const QString &text_,
1506 KuitSemanticsStaticData *s = semanticsStaticData;
1514 static QRegExp staticWrapRx(QLatin1String(
"(<\\s*(\\w+)\\b([^>]*)>)(.*)(<\\s*/\\s*\\2\\s*>)"));
1515 QRegExp wrapRx = staticWrapRx;
1516 wrapRx.setMinimal(
true);
1520 int previousPos = pos;
1521 pos = wrapRx.indexIn(text, previousPos);
1523 ntext += text.mid(previousPos);
1526 ntext += text.mid(previousPos, pos - previousPos);
1528 QString tagname = capts[2].toLower();
1529 QString content = salvageMarkup(capts[4], fmt);
1530 if (s->knownTags.contains(tagname)) {
1533 QString pattern = visualPattern(s->knownTags[tagname], 0, fmt);
1534 ntext += pattern.arg(content);
1536 ntext += capts[1] + content + capts[5];
1538 pos += wrapRx.matchedLength();
1543 static QRegExp staticNowrRx(QLatin1String(
"<\\s*(\\w+)\\b([^>]*)/\\s*>"));
1544 QRegExp nowrRx = staticNowrRx;
1545 nowrRx.setMinimal(
true);
1549 int previousPos = pos;
1550 pos = nowrRx.indexIn(text, previousPos);
1552 ntext += text.mid(previousPos);
1555 ntext += text.mid(previousPos, pos - previousPos);
1557 QString tagname = capts[1].toLower();
1558 if (s->knownTags.contains(tagname)) {
1559 QString pattern = visualPattern(s->knownTags[tagname], 0, fmt);
1560 ntext += pattern.arg(
QString());
1564 pos += nowrRx.matchedLength();
1575 : d(new KuitSemanticsPrivate(lang))
1586 return d->format(text, ctxt);
1591 KuitSemanticsStaticData *s = semanticsStaticData;
1594 int p1 = text.indexOf(QLatin1Char(
'&'));
1597 int p2 = text.indexOf(QLatin1Char(
';'), p1);
1598 return (p2 > p1 && s->xmlEntities.contains(text.mid(p1, p2 - p1)));
1602 int tlen = text.length();
1603 p1 = text.indexOf(QLatin1Char(
'<'));
1608 bool closing =
false;
1609 while (p1 < tlen && (text[p1].isSpace() || text[p1] == QLatin1Char(
'/'))) {
1610 if (text[p1] == QLatin1Char(
'/')) {
1619 for (
int p2 = p1; p2 < tlen; ++p2) {
1621 if (c == QLatin1Char(
'>') || (!closing && c == QLatin1Char(
'/')) || c.isSpace()) {
1622 return s->qtHtmlTagNames.contains(text.mid(p1, p2 - p1));
1623 }
else if (!c.isLetter()) {
1635 int tlen = text.length();
1637 ntext.reserve(tlen);
1638 for (
int i = 0; i < tlen; ++i) {
1640 if (c == QLatin1Char(
'&')) {
1641 ntext += QLatin1String(
"&");
1642 }
else if (c == QLatin1Char(
'<')) {
1643 ntext += QLatin1String(
"<");
1644 }
else if (c == QLatin1Char(
'>')) {
1645 ntext += QLatin1String(
">");
1646 }
else if (c == QLatin1Char(
'\'')) {
1647 ntext += QLatin1String(
"'");
1648 }
else if (c == QLatin1Char(
'"')) {
1649 ntext += QLatin1String(
""");
#define SET_PATTERN(tag, atts, fmt, ctxt_ptrn)
#define SETUP_TAG_NL(tag, nlead)
static QString escape(const QString &text)
Convert &, ", ', <, > characters into XML entities &, <, >, ', ", respectively.
#define K_GLOBAL_STATIC(TYPE, NAME)
This macro makes it easy to use non-POD types as global statics.
#define SETUP_ROLCUEFMT(rol, cue, fmt)
KuitSemantics(const QString &lang)
Constructor.
#define XXXX_NOOP2(ctxt, msg)
QString format(const QString &text, const QString &ctxt) const
Transforms the semantic markup in the given text into visual formatting.
#define SETUP_FMT(fmt, name)
#define SET_KEYNAME(rawname)
static bool mightBeRichText(const QString &text)
Poor man's version of Qt::mightBeRichText() (cannot link to QtGui).
#define SETUP_ROL(rol, name, fmt, cues)
KLocale * locale()
Returns the global locale object.
static QString shorten(const QString &str)
#define SETUP_TAG(tag, name, atts, subs)
#define SETUP_CUE(cue, name)
~KuitSemantics()
Destructor.
#define SETUP_ATT(att, name)
This class abstracts a gettext message catalog.
#define I18N_NOOP2(ctxt, msg)