Syndication

tools.cpp
1/*
2 This file is part of the syndication library
3 SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include "tools.h"
9#include "personimpl.h"
10
11#include <KCharsets>
12
13#include <QByteArray>
14#include <QCryptographicHash>
15#include <QDateTime>
16#include <QRegularExpression>
17#include <QTimeZone>
18
19#include <ctime>
20
21namespace Syndication
22{
23QCryptographicHash md5Machine(QCryptographicHash::Md5);
24
25unsigned int calcHash(const QString &str)
26{
27 return calcHash(str.toUtf8());
28}
29
30unsigned int calcHash(const QByteArray &array)
31{
32 if (array.isEmpty()) {
33 return 0;
34 } else {
35 const char *s = array.data();
36 unsigned int hash = 5381;
37 int c;
38 while ((c = *s++)) {
39 hash = ((hash << 5) + hash) + c; // hash*33 + c
40 }
41 return hash;
42 }
43}
44
45static uint toTimeT(QDateTime &kdt)
46{
47 if (kdt.isValid()) {
48 // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC
49 if (kdt.time().isNull() //
50 || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) {
52 kdt.setTime(QTime(12, 0));
53 }
54 return kdt.toMSecsSinceEpoch() / 1000;
55 } else {
56 return 0;
57 }
58}
59
60uint parseISODate(const QString &str)
61{
62 QDateTime kdt = QDateTime::fromString(str, Qt::ISODate);
63 return toTimeT(kdt);
64}
65
66uint parseRFCDate(const QString &str)
67{
68 QDateTime kdt = QDateTime::fromString(str, Qt::RFC2822Date);
69 // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly
70 if (!kdt.isValid() && str.endsWith(QLatin1String(" GMT"))) {
71 kdt = QDateTime::fromString(QStringView(str).chopped(4), Qt::RFC2822Date);
72 }
73 return toTimeT(kdt);
74}
75
76uint parseDate(const QString &str, DateFormat hint)
77{
78 if (str.isEmpty()) {
79 return 0;
80 }
81
82 if (hint == RFCDate) {
83 time_t t = parseRFCDate(str);
84 return t != 0 ? t : parseISODate(str);
85 } else {
86 time_t t = parseISODate(str);
87 return t != 0 ? t : parseRFCDate(str);
88 }
89}
90
91QString dateTimeToString(uint date)
92{
93 if (date == 0) {
94 return QString();
95 }
96
97 const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy");
98 QDateTime dt;
99 dt.setMSecsSinceEpoch(quint64(date) * 1000);
100 return dt.toUTC().toString(format);
101}
102
103QString calcMD5Sum(const QString &str)
104{
105 md5Machine.reset();
106 md5Machine.addData(str.toUtf8());
107 return QLatin1String(md5Machine.result().toHex().constData());
108}
109
110QString resolveEntities(const QString &str)
111{
112 return KCharsets::resolveEntities(str);
113}
114
115QString escapeSpecialCharacters(const QString &strp)
116{
117 QString str(strp);
118 str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
119 str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
120 str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
121 str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
122 str.replace(QLatin1Char('\''), QLatin1String("&apos;"));
123 return str.trimmed();
124}
125
126QString convertNewlines(const QString &strp)
127{
128 QString str(strp);
129 str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
130 return str;
131}
132
133QString plainTextToHtml(const QString &plainText)
134{
135 QString str(plainText);
136 str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
137 str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
138 str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
139 // str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
140 str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
141 return str.trimmed();
142}
143
144QString htmlToPlainText(const QString &html)
145{
146 QString str(html);
147 // TODO: preserve some formatting, such as line breaks
148 str.remove(QRegularExpression(QStringLiteral("<[^>]*?>"))); // remove tags
149 str = resolveEntities(str);
150 return str.trimmed();
151}
152
153static QRegularExpression tagRegExp()
154{
155 static QRegularExpression exp(QStringLiteral("<\\w+.*/?>"));
156 return exp;
157}
158
159bool stringContainsMarkup(const QString &str)
160{
161 // check for entities
162 if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
163 return true;
164 }
165
166 const int ltc = str.count(QLatin1Char('<'));
167 if (ltc == 0) {
168 return false;
169 }
170
171 return str.contains(tagRegExp());
172}
173
174bool isHtml(const QString &str)
175{
176 // check for entities
177 if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
178 return true;
179 }
180
181 const int ltc = str.count(QLatin1Char('<'));
182 if (ltc == 0) {
183 return false;
184 }
185
186 return str.contains(tagRegExp());
187}
188
189QString normalize(const QString &str)
190{
191 return isHtml(str) ? str.trimmed() : plainTextToHtml(str);
192}
193
194QString normalize(const QString &strp, bool isCDATA, bool containsMarkup)
195{
196 if (containsMarkup) {
197 return strp.trimmed();
198 } else {
199 if (isCDATA) {
200 QString str = resolveEntities(strp);
201 str = escapeSpecialCharacters(str);
202 str = convertNewlines(str);
203 str = str.trimmed();
204 return str;
205 } else {
206 QString str = escapeSpecialCharacters(strp);
207 str = str.trimmed();
208 return str;
209 }
210 }
211}
212
213PersonPtr personFromString(const QString &strp)
214{
215 QString str = strp.trimmed();
216 if (str.isEmpty()) {
217 return PersonPtr(new PersonImpl());
218 }
219
220 str = resolveEntities(str);
221 QString name;
222 QString uri;
223 QString email;
224
225 // look for something looking like a mail address ("foo@bar.com",
226 // "<foo@bar.com>") and extract it
227
228 const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?")); // FIXME: user "proper" regexp,
229 // search kmail source for it
230
231 QRegularExpressionMatch match = remail.match(str);
232 if (match.hasMatch()) {
233 const QString all = match.captured(0);
234 email = match.captured(1);
235 str.remove(all); // remove mail address
236 }
237
238 // replace "mailto", "(", ")" (to be extended)
239 email.remove(QStringLiteral("mailto:"));
240 email.remove(QRegularExpression(QStringLiteral("[()]")));
241
242 // simplify the rest and use it as name
243
244 name = str.simplified();
245
246 // after removing the email, str might have
247 // the format "(Foo M. Bar)". We cut off
248 // parentheses if there are any. However, if
249 // str is of the format "Foo M. Bar (President)",
250 // we should not cut anything.
251
252 QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)")));
253 match = rename.match(name);
254 if (match.hasMatch()) {
255 name = match.captured(1);
256 }
257
258 name = name.isEmpty() ? QString() : name;
259 email = email.isEmpty() ? QString() : email;
260 uri = uri.isEmpty() ? QString() : uri;
261
262 if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) {
263 return PersonPtr(new PersonImpl());
264 }
265
266 return PersonPtr(new PersonImpl(name, uri, email));
267}
268
269ElementType::ElementType(const QString &localnamep, const QString &nsp)
270 : ns(nsp)
271 , localname(localnamep)
272{
273}
274
275bool ElementType::operator==(const ElementType &other) const
276{
277 return localname == other.localname && ns == other.ns;
278}
279
280} // namespace Syndication
static QString resolveEntities(const QString &text)
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
KIOCORE_EXPORT SimpleJob * rename(const QUrl &src, const QUrl &dest, JobFlags flags=DefaultFlags)
QString name(StandardAction id)
char * data()
bool isEmpty() const const
QDateTime fromString(QStringView string, QStringView format, QCalendar cal)
bool isValid() const const
void setMSecsSinceEpoch(qint64 msecs)
void setTime(QTime time)
void setTimeZone(const QTimeZone &toZone)
QTime time() const const
Qt::TimeSpec timeSpec() const const
qint64 toMSecsSinceEpoch() const const
QString toString(QStringView format, QCalendar cal) const const
QDateTime toUTC() const const
QString anchoredPattern(QStringView expression)
qsizetype count() const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
QString simplified() const const
QByteArray toUtf8() const const
QString trimmed() const const
LocalTime
bool isNull() const const
QTimeZone utc()
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Thu Jan 23 2025 19:01:16 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.