Syndication

tools.cpp
1 /*
2  This file is part of the syndication library
3  SPDX-FileCopyrightText: 2006 Frank Osterfeld <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.0-or-later
6 */
7 
8 #include "tools.h"
9 #include "personimpl.h"
10 
11 #include <KCharsets>
12 
13 #include <QByteArray>
14 #include <QCryptographicHash>
15 #include <QDateTime>
16 #include <QRegularExpression>
17 
18 #include <ctime>
19 
20 namespace Syndication
21 {
23 
24 unsigned int calcHash(const QString &str)
25 {
26  return calcHash(str.toUtf8());
27 }
28 
29 unsigned int calcHash(const QByteArray &array)
30 {
31  if (array.isEmpty()) {
32  return 0;
33  } else {
34  const char *s = array.data();
35  unsigned int hash = 5381;
36  int c;
37  while ((c = *s++)) {
38  hash = ((hash << 5) + hash) + c; // hash*33 + c
39  }
40  return hash;
41  }
42 }
43 
44 static uint toTimeT(QDateTime &kdt)
45 {
46  if (kdt.isValid()) {
47  // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC
48  if (kdt.time().isNull() //
49  || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) {
50  kdt.setTimeSpec(Qt::UTC);
51  kdt.setTime(QTime(12, 0));
52  }
53  return kdt.toMSecsSinceEpoch() / 1000;
54  } else {
55  return 0;
56  }
57 }
58 
59 uint parseISODate(const QString &str)
60 {
62  return toTimeT(kdt);
63 }
64 
65 uint parseRFCDate(const QString &str)
66 {
68 #if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
69  // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly
70  if (!kdt.isValid() && str.endsWith(QLatin1String(" GMT"))) {
71  kdt = QDateTime::fromString(QStringView(str).chopped(4), Qt::RFC2822Date);
72  }
73 #endif
74  return toTimeT(kdt);
75 }
76 
77 uint parseDate(const QString &str, DateFormat hint)
78 {
79  if (str.isEmpty()) {
80  return 0;
81  }
82 
83  if (hint == RFCDate) {
84  time_t t = parseRFCDate(str);
85  return t != 0 ? t : parseISODate(str);
86  } else {
87  time_t t = parseISODate(str);
88  return t != 0 ? t : parseRFCDate(str);
89  }
90 }
91 
92 QString dateTimeToString(uint date)
93 {
94  if (date == 0) {
95  return QString();
96  }
97 
98  const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy");
99  QDateTime dt;
100  dt.setMSecsSinceEpoch(quint64(date) * 1000);
101  return dt.toUTC().toString(format);
102 }
103 
104 QString calcMD5Sum(const QString &str)
105 {
106  md5Machine.reset();
107  md5Machine.addData(str.toUtf8());
108  return QLatin1String(md5Machine.result().toHex().constData());
109 }
110 
111 QString resolveEntities(const QString &str)
112 {
113  return KCharsets::resolveEntities(str);
114 }
115 
116 QString escapeSpecialCharacters(const QString &strp)
117 {
118  QString str(strp);
119  str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
120  str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
121  str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
122  str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
123  str.replace(QLatin1Char('\''), QLatin1String("&apos;"));
124  return str.trimmed();
125 }
126 
127 QString convertNewlines(const QString &strp)
128 {
129  QString str(strp);
130  str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
131  return str;
132 }
133 
134 QString plainTextToHtml(const QString &plainText)
135 {
136  QString str(plainText);
137  str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
138  str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
139  str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
140  // str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
141  str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
142  return str.trimmed();
143 }
144 
145 QString htmlToPlainText(const QString &html)
146 {
147  QString str(html);
148  // TODO: preserve some formatting, such as line breaks
149  str.remove(QRegularExpression(QStringLiteral("<[^>]*?>"))); // remove tags
150  str = resolveEntities(str);
151  return str.trimmed();
152 }
153 
154 static QRegularExpression tagRegExp()
155 {
156  static QRegularExpression exp(QStringLiteral("<\\w+.*/?>"));
157  return exp;
158 }
159 
160 bool stringContainsMarkup(const QString &str)
161 {
162  // check for entities
163  if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
164  return true;
165  }
166 
167  const int ltc = str.count(QLatin1Char('<'));
168  if (ltc == 0) {
169  return false;
170  }
171 
172  return str.contains(tagRegExp());
173 }
174 
175 bool isHtml(const QString &str)
176 {
177  // check for entities
178  if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
179  return true;
180  }
181 
182  const int ltc = str.count(QLatin1Char('<'));
183  if (ltc == 0) {
184  return false;
185  }
186 
187  return str.contains(tagRegExp());
188 }
189 
190 QString normalize(const QString &str)
191 {
192  return isHtml(str) ? str.trimmed() : plainTextToHtml(str);
193 }
194 
195 QString normalize(const QString &strp, bool isCDATA, bool containsMarkup)
196 {
197  if (containsMarkup) {
198  return strp.trimmed();
199  } else {
200  if (isCDATA) {
201  QString str = resolveEntities(strp);
202  str = escapeSpecialCharacters(str);
203  str = convertNewlines(str);
204  str = str.trimmed();
205  return str;
206  } else {
207  QString str = escapeSpecialCharacters(strp);
208  str = str.trimmed();
209  return str;
210  }
211  }
212 }
213 
214 PersonPtr personFromString(const QString &strp)
215 {
216  QString str = strp.trimmed();
217  if (str.isEmpty()) {
218  return PersonPtr(new PersonImpl());
219  }
220 
221  str = resolveEntities(str);
222  QString name;
223  QString uri;
224  QString email;
225 
226  // look for something looking like a mail address ("[email protected]",
227  // "<[email protected]>") and extract it
228 
229  const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?")); // FIXME: user "proper" regexp,
230  // search kmail source for it
231 
232  QRegularExpressionMatch match = remail.match(str);
233  if (match.hasMatch()) {
234  const QString all = match.captured(0);
235  email = match.captured(1);
236  str.remove(all); // remove mail address
237  }
238 
239  // replace "mailto", "(", ")" (to be extended)
240  email.remove(QStringLiteral("mailto:"));
241  email.remove(QRegularExpression(QStringLiteral("[()]")));
242 
243  // simplify the rest and use it as name
244 
245  name = str.simplified();
246 
247  // after removing the email, str might have
248  // the format "(Foo M. Bar)". We cut off
249  // parentheses if there are any. However, if
250  // str is of the format "Foo M. Bar (President)",
251  // we should not cut anything.
252 
253  QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)")));
254  match = rename.match(name);
255  if (match.hasMatch()) {
256  name = match.captured(1);
257  }
258 
259  name = name.isEmpty() ? QString() : name;
260  email = email.isEmpty() ? QString() : email;
261  uri = uri.isEmpty() ? QString() : uri;
262 
263  if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) {
264  return PersonPtr(new PersonImpl());
265  }
266 
267  return PersonPtr(new PersonImpl(name, uri, email));
268 }
269 
270 ElementType::ElementType(const QString &localnamep, const QString &nsp)
271  : ns(nsp)
272  , localname(localnamep)
273 {
274 }
275 
276 bool ElementType::operator==(const ElementType &other) const
277 {
278  return localname == other.localname && ns == other.ns;
279 }
280 
281 } // namespace Syndication
QString anchoredPattern(const QString &expression)
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const const
void addData(const char *data, int length)
LocalTime
QTime time() const const
QString trimmed() const const
QByteArray result() const const
QString normalize(QStringView str)
QString simplified() const const
bool isEmpty() const const
QByteArray toUtf8() const const
void setTimeSpec(Qt::TimeSpec spec)
qint64 toMSecsSinceEpoch() const const
static QString resolveEntities(const QString &text)
QDateTime fromString(const QString &string, Qt::DateFormat format)
KCALUTILS_EXPORT QString dateTimeToString(const QDateTime &date, bool dateOnly=false, bool shortfmt=true)
QString & replace(int position, int n, QChar after)
QString & remove(int position, int n)
QDateTime toUTC() const const
QByteArray toHex() const const
void setMSecsSinceEpoch(qint64 msecs)
bool isEmpty() const const
int count() const const
const char * constData() const const
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
Qt::TimeSpec timeSpec() const const
const char * name(StandardAction id)
bool isValid() const const
int rename(const QString &in, const QString &out)
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
QString toString(Qt::DateFormat format) const const
void setTime(const QTime &time)
bool isNull() const const
char * data()
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Sun Dec 3 2023 03:52:05 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.