Syndication

tools.cpp
1 /*
2  This file is part of the syndication library
3  SPDX-FileCopyrightText: 2006 Frank Osterfeld <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.0-or-later
6 */
7 
8 #include "tools.h"
9 #include "personimpl.h"
10 
11 #include <KCharsets>
12 
13 #include <QByteArray>
14 #include <QCryptographicHash>
15 #include <QDateTime>
16 #include <QRegularExpression>
17 
18 #include <ctime>
19 
20 namespace Syndication
21 {
23 
24 unsigned int calcHash(const QString &str)
25 {
26  return calcHash(str.toUtf8());
27 }
28 
29 unsigned int calcHash(const QByteArray &array)
30 {
31  if (array.isEmpty()) {
32  return 0;
33  } else {
34  const char *s = array.data();
35  unsigned int hash = 5381;
36  int c;
37  while ((c = *s++)) {
38  hash = ((hash << 5) + hash) + c; // hash*33 + c
39  }
40  return hash;
41  }
42 }
43 
44 static uint toTimeT(QDateTime &kdt)
45 {
46  if (kdt.isValid()) {
47  // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC
48  if (kdt.time().isNull() //
49  || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) {
50  kdt.setTimeSpec(Qt::UTC);
51  kdt.setTime(QTime(12, 0));
52  }
53  return kdt.toMSecsSinceEpoch() / 1000;
54  } else {
55  return 0;
56  }
57 }
58 
59 uint parseISODate(const QString &str)
60 {
62  return toTimeT(kdt);
63 }
64 
65 uint parseRFCDate(const QString &str)
66 {
68  // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly
69  if (!kdt.isValid() && str.endsWith(QLatin1String(" GMT"))) {
70  kdt = QDateTime::fromString(QStringView(str).chopped(4), Qt::RFC2822Date);
71  }
72  return toTimeT(kdt);
73 }
74 
75 uint parseDate(const QString &str, DateFormat hint)
76 {
77  if (str.isEmpty()) {
78  return 0;
79  }
80 
81  if (hint == RFCDate) {
82  time_t t = parseRFCDate(str);
83  return t != 0 ? t : parseISODate(str);
84  } else {
85  time_t t = parseISODate(str);
86  return t != 0 ? t : parseRFCDate(str);
87  }
88 }
89 
90 QString dateTimeToString(uint date)
91 {
92  if (date == 0) {
93  return QString();
94  }
95 
96  const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy");
97  QDateTime dt;
98  dt.setMSecsSinceEpoch(quint64(date) * 1000);
99  return dt.toUTC().toString(format);
100 }
101 
102 QString calcMD5Sum(const QString &str)
103 {
104  md5Machine.reset();
105  md5Machine.addData(str.toUtf8());
106  return QLatin1String(md5Machine.result().toHex().constData());
107 }
108 
109 QString resolveEntities(const QString &str)
110 {
111  return KCharsets::resolveEntities(str);
112 }
113 
114 QString escapeSpecialCharacters(const QString &strp)
115 {
116  QString str(strp);
117  str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
118  str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
119  str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
120  str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
121  str.replace(QLatin1Char('\''), QLatin1String("&apos;"));
122  return str.trimmed();
123 }
124 
125 QString convertNewlines(const QString &strp)
126 {
127  QString str(strp);
128  str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
129  return str;
130 }
131 
132 QString plainTextToHtml(const QString &plainText)
133 {
134  QString str(plainText);
135  str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
136  str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
137  str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
138  // str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
139  str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
140  return str.trimmed();
141 }
142 
143 QString htmlToPlainText(const QString &html)
144 {
145  QString str(html);
146  // TODO: preserve some formatting, such as line breaks
147  str.remove(QRegularExpression(QStringLiteral("<[^>]*?>"))); // remove tags
148  str = resolveEntities(str);
149  return str.trimmed();
150 }
151 
152 static QRegularExpression tagRegExp()
153 {
154  static QRegularExpression exp(QStringLiteral("<\\w+.*/?>"));
155  return exp;
156 }
157 
158 bool stringContainsMarkup(const QString &str)
159 {
160  // check for entities
161  if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
162  return true;
163  }
164 
165  const int ltc = str.count(QLatin1Char('<'));
166  if (ltc == 0) {
167  return false;
168  }
169 
170  return str.contains(tagRegExp());
171 }
172 
173 bool isHtml(const QString &str)
174 {
175  // check for entities
176  if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
177  return true;
178  }
179 
180  const int ltc = str.count(QLatin1Char('<'));
181  if (ltc == 0) {
182  return false;
183  }
184 
185  return str.contains(tagRegExp());
186 }
187 
188 QString normalize(const QString &str)
189 {
190  return isHtml(str) ? str.trimmed() : plainTextToHtml(str);
191 }
192 
193 QString normalize(const QString &strp, bool isCDATA, bool containsMarkup)
194 {
195  if (containsMarkup) {
196  return strp.trimmed();
197  } else {
198  if (isCDATA) {
199  QString str = resolveEntities(strp);
200  str = escapeSpecialCharacters(str);
201  str = convertNewlines(str);
202  str = str.trimmed();
203  return str;
204  } else {
205  QString str = escapeSpecialCharacters(strp);
206  str = str.trimmed();
207  return str;
208  }
209  }
210 }
211 
212 PersonPtr personFromString(const QString &strp)
213 {
214  QString str = strp.trimmed();
215  if (str.isEmpty()) {
216  return PersonPtr(new PersonImpl());
217  }
218 
219  str = resolveEntities(str);
220  QString name;
221  QString uri;
222  QString email;
223 
224  // look for something looking like a mail address ("[email protected]",
225  // "<[email protected]>") and extract it
226 
227  const QRegularExpression remail(QStringLiteral("<?([^@\\s<][email protected][^>\\s]+)>?")); // FIXME: user "proper" regexp,
228  // search kmail source for it
229 
230  QRegularExpressionMatch match = remail.match(str);
231  if (match.hasMatch()) {
232  const QString all = match.captured(0);
233  email = match.captured(1);
234  str.remove(all); // remove mail address
235  }
236 
237  // replace "mailto", "(", ")" (to be extended)
238  email.remove(QStringLiteral("mailto:"));
239  email.remove(QRegularExpression(QStringLiteral("[()]")));
240 
241  // simplify the rest and use it as name
242 
243  name = str.simplified();
244 
245  // after removing the email, str might have
246  // the format "(Foo M. Bar)". We cut off
247  // parentheses if there are any. However, if
248  // str is of the format "Foo M. Bar (President)",
249  // we should not cut anything.
250 
251  QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)")));
252  match = rename.match(name);
253  if (match.hasMatch()) {
254  name = match.captured(1);
255  }
256 
257  name = name.isEmpty() ? QString() : name;
258  email = email.isEmpty() ? QString() : email;
259  uri = uri.isEmpty() ? QString() : uri;
260 
261  if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) {
262  return PersonPtr(new PersonImpl());
263  }
264 
265  return PersonPtr(new PersonImpl(name, uri, email));
266 }
267 
268 ElementType::ElementType(const QString &localnamep, const QString &nsp)
269  : ns(nsp)
270  , localname(localnamep)
271 {
272 }
273 
274 bool ElementType::operator==(const ElementType &other) const
275 {
276  return localname == other.localname && ns == other.ns;
277 }
278 
279 } // namespace Syndication
QString anchoredPattern(const QString &expression)
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const const
KIOCORE_EXPORT SimpleJob * rename(const QUrl &src, const QUrl &dest, JobFlags flags=DefaultFlags)
void addData(const char *data, int length)
LocalTime
QTime time() const const
QString trimmed() const const
QByteArray result() const const
QString normalize(QStringView str)
QString simplified() const const
bool isEmpty() const const
QByteArray toUtf8() const const
void setTimeSpec(Qt::TimeSpec spec)
qint64 toMSecsSinceEpoch() const const
static QString resolveEntities(const QString &text)
QDateTime fromString(const QString &string, Qt::DateFormat format)
KCALUTILS_EXPORT QString dateTimeToString(const QDateTime &date, bool dateOnly=false, bool shortfmt=true)
QString & replace(int position, int n, QChar after)
QString & remove(int position, int n)
QDateTime toUTC() const const
QByteArray toHex() const const
void setMSecsSinceEpoch(qint64 msecs)
bool isEmpty() const const
int count() const const
const char * constData() const const
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
Qt::TimeSpec timeSpec() const const
QString name(StandardShortcut id)
bool isValid() const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
QString toString(Qt::DateFormat format) const const
void setTime(const QTime &time)
bool isNull() const const
char * data()
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Tue Jun 6 2023 03:56:27 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.