Syndication

rss2/document.cpp
1/*
2 This file is part of the syndication library
3 SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include <rss2/category.h>
9#include <rss2/cloud.h>
10#include <rss2/document.h>
11#include <rss2/image.h>
12#include <rss2/item.h>
13#include <rss2/textinput.h>
14
15#include <constants.h>
16#include <documentvisitor.h>
17#include <tools.h>
18
19#include <QDomDocument>
20#include <QList>
21#include <QSet>
22#include <QString>
23
24#include <vector>
25
26namespace Syndication
27{
28namespace RSS2
29{
30class SYNDICATION_NO_EXPORT Document::DocumentPrivate
31{
32public:
33 DocumentPrivate()
34 : itemDescriptionIsCDATA(false)
35 , itemDescriptionContainsMarkup(false)
36 , itemDescGuessed(false)
37 , itemTitleIsCDATA(false)
38 , itemTitleContainsMarkup(false)
39 , itemTitlesGuessed(false)
40 {
41 }
42 mutable bool itemDescriptionIsCDATA;
43 mutable bool itemDescriptionContainsMarkup;
44 mutable bool itemDescGuessed;
45 mutable bool itemTitleIsCDATA;
46 mutable bool itemTitleContainsMarkup;
47 mutable bool itemTitlesGuessed;
48};
49
50Document::Document(const QDomElement &element)
51 : SpecificDocument()
52 , ElementWrapper(element)
53 , d(new DocumentPrivate)
54{
55}
56
57Document Document::fromXML(const QDomDocument &doc)
58{
59 QDomNode channelNode = doc.namedItem(QStringLiteral("rss")).namedItem(QStringLiteral("channel"));
60
61 return Document(channelNode.toElement());
62}
63
64Document::Document()
65 : SpecificDocument()
66 , ElementWrapper()
67 , d(new DocumentPrivate)
68{
69}
70
71Document::Document(const Document &other)
72 : SpecificDocument(other)
73 , ElementWrapper(other)
74{
75 d = other.d;
76}
77
78Document::~Document()
79{
80}
81
82Document &Document::operator=(const Document &other)
83{
84 ElementWrapper::operator=(other);
85 d = other.d;
86 return *this;
87}
88bool Document::isValid() const
89{
90 return !isNull();
91}
92
93QString Document::title() const
94{
95 return extractElementTextNS(QString(), QStringLiteral("title"));
96}
97
98QString Document::link() const
99{
100 return extractElementTextNS(QString(), QStringLiteral("link"));
101}
102
103QString Document::description() const
104{
105 const QString desc = extractElementTextNS(QString(), QStringLiteral("description"));
106 return normalize(desc);
107}
108
109QString Document::language() const
110{
111 const QString lang = extractElementTextNS(QString(), QStringLiteral("language"));
112
113 if (!lang.isNull()) {
114 return lang;
115 } else {
116 return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("language"));
117 }
118}
119
120QString Document::copyright() const
121{
122 const QString rights = extractElementTextNS(QString(), QStringLiteral("copyright"));
123 if (!rights.isNull()) {
124 return rights;
125 } else {
126 // if <copyright> is not provided, use <dc:rights>
127 return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("rights"));
128 }
129}
130
131QString Document::managingEditor() const
132{
133 return extractElementTextNS(QString(), QStringLiteral("managingEditor"));
134}
135
136QString Document::webMaster() const
137{
138 return extractElementTextNS(QString(), QStringLiteral("webMaster"));
139}
140
141time_t Document::pubDate() const
142{
143 QString str = extractElementTextNS(QString(), QStringLiteral("pubDate"));
144
145 if (!str.isNull()) {
146 return parseDate(str, RFCDate);
147 } else {
148 // if there is no pubDate, check for dc:date
149 str = extractElementTextNS(dublinCoreNamespace(), QStringLiteral("date"));
150 return parseDate(str, ISODate);
151 }
152}
153
154time_t Document::lastBuildDate() const
155{
156 const QString str = extractElementTextNS(QString(), QStringLiteral("lastBuildDate"));
157
158 return parseDate(str, RFCDate);
159}
160
161QList<Category> Document::categories() const
162{
163 const QList<QDomElement> catNodes = elementsByTagNameNS(QString(), QStringLiteral("category"));
164
165 QList<Category> categories;
166 categories.reserve(catNodes.count());
167
168 std::transform(catNodes.cbegin(), catNodes.cend(), std::back_inserter(categories), [](const QDomElement &element) {
169 return Category(element);
170 });
171
172 return categories;
173}
174
175QString Document::generator() const
176{
177 return extractElementTextNS(QString(), QStringLiteral("generator"));
178}
179
180QString Document::docs() const
181{
182 return extractElementTextNS(QString(), QStringLiteral("docs"));
183}
184
185Cloud Document::cloud() const
186{
187 return Cloud(firstElementByTagNameNS(QString(), QStringLiteral("cloud")));
188}
189
190int Document::ttl() const
191{
192 bool ok;
193 int c;
194
195 QString text = extractElementTextNS(QString(), QStringLiteral("ttl"));
196 c = text.toInt(&ok);
197 return ok ? c : 0;
198}
199
200Image Document::image() const
201{
202 return Image(firstElementByTagNameNS(QString(), QStringLiteral("image")));
203}
204
205TextInput Document::textInput() const
206{
207 TextInput ti = TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textInput")));
208
209 if (!ti.isNull()) {
210 return ti;
211 }
212
213 // Netscape's version of RSS 0.91 has textinput, not textInput
214 return TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textinput")));
215}
216
217QSet<int> Document::skipHours() const
218{
219 QSet<int> skipHours;
220 QDomElement skipHoursNode = firstElementByTagNameNS(QString(), QStringLiteral("skipHours"));
221 if (!skipHoursNode.isNull()) {
222 ElementWrapper skipHoursWrapper(skipHoursNode);
223 bool ok = false;
224 const QList<QDomElement> hours = skipHoursWrapper.elementsByTagNameNS(QString(), QStringLiteral("hour"));
225 for (const auto &element : hours) {
226 const int h = element.text().toInt(&ok);
227 if (ok) {
228 skipHours.insert(h);
229 }
230 }
231 }
232
233 return skipHours;
234}
235
236QSet<Document::DayOfWeek> Document::skipDays() const
237{
238 QSet<DayOfWeek> skipDays;
239 QDomElement skipDaysNode = firstElementByTagNameNS(QString(), QStringLiteral("skipDays"));
240 if (!skipDaysNode.isNull()) {
241 ElementWrapper skipDaysWrapper(skipDaysNode);
242 struct DayInfo {
244 DayOfWeek enumValue;
245 };
246 static const std::vector<DayInfo> weekDays = {
247 {QLatin1String("Monday"), Monday},
248 {QLatin1String("Tuesday"), Tuesday},
249 {QLatin1String("Wednesday"), Wednesday},
250 {QLatin1String("Thursday"), Thursday},
251 {QLatin1String("Friday"), Friday},
252 {QLatin1String("Saturday"), Saturday},
253 {QLatin1String("Sunday"), Sunday},
254 };
255
256 const QList<QDomElement> days = skipDaysWrapper.elementsByTagNameNS(QString(), QStringLiteral("day"));
257 for (const auto &element : days) {
258 const QString day = element.text();
259 auto it = std::find_if(weekDays.cbegin(), weekDays.cend(), [&day](const DayInfo &info) {
260 return info.name == day;
261 });
262 if (it != weekDays.cend()) {
263 skipDays.insert(it->enumValue);
264 }
265 }
266 }
267
268 return skipDays;
269}
270
271QList<Item> Document::items() const
272{
273 const QList<QDomElement> itemNodes = elementsByTagNameNS(QString(), QStringLiteral("item"));
274
275 QList<Item> items;
276 items.reserve(itemNodes.count());
277
278 DocumentPtr doccpy(new Document(*this));
279
280 std::transform(itemNodes.cbegin(), itemNodes.cend(), std::back_inserter(items), [&doccpy](const QDomElement &element) {
281 return Item(element, doccpy);
282 });
283
284 return items;
285}
286QList<QDomElement> Document::unhandledElements() const
287{
288 // TODO: do not hardcode this list here
289 static std::vector<ElementType> handled; // QVector would require a default ctor, and ElementType is too big for QList
290 if (handled.empty()) {
291 handled.reserve(22);
292 handled.push_back(ElementType(QStringLiteral("title")));
293 handled.push_back(ElementType(QStringLiteral("link")));
294 handled.push_back(ElementType(QStringLiteral("description")));
295 handled.push_back(ElementType(QStringLiteral("language")));
296 handled.push_back(ElementType(QStringLiteral("copyright")));
297 handled.push_back(ElementType(QStringLiteral("managingEditor")));
298 handled.push_back(ElementType(QStringLiteral("webMaster")));
299 handled.push_back(ElementType(QStringLiteral("pubDate")));
300 handled.push_back(ElementType(QStringLiteral("lastBuildDate")));
301 handled.push_back(ElementType(QStringLiteral("skipDays")));
302 handled.push_back(ElementType(QStringLiteral("skipHours")));
303 handled.push_back(ElementType(QStringLiteral("item")));
304 handled.push_back(ElementType(QStringLiteral("textinput")));
305 handled.push_back(ElementType(QStringLiteral("textInput")));
306 handled.push_back(ElementType(QStringLiteral("image")));
307 handled.push_back(ElementType(QStringLiteral("ttl")));
308 handled.push_back(ElementType(QStringLiteral("generator")));
309 handled.push_back(ElementType(QStringLiteral("docs")));
310 handled.push_back(ElementType(QStringLiteral("cloud")));
311 handled.push_back(ElementType(QStringLiteral("language"), dublinCoreNamespace()));
312 handled.push_back(ElementType(QStringLiteral("rights"), dublinCoreNamespace()));
313 handled.push_back(ElementType(QStringLiteral("date"), dublinCoreNamespace()));
314 }
315
316 QList<QDomElement> notHandled;
317
318 QDomNodeList children = element().childNodes();
319 const int numChildren = children.size();
320 for (int i = 0; i < numChildren; ++i) {
321 QDomElement el = children.at(i).toElement();
322 if (!el.isNull() //
323 && std::find(handled.cbegin(), handled.cend(), ElementType(el.localName(), el.namespaceURI())) == handled.cend()) {
324 notHandled.append(el);
325 }
326 }
327
328 return notHandled;
329}
330
331QString Document::debugInfo() const
332{
333 QString info;
334 info += QLatin1String("### Document: ###################\n");
335 if (!title().isNull()) {
336 info += QLatin1String("title: #") + title() + QLatin1String("#\n");
337 }
338 if (!description().isNull()) {
339 info += QLatin1String("description: #") + description() + QLatin1String("#\n");
340 }
341 if (!link().isNull()) {
342 info += QLatin1String("link: #") + link() + QLatin1String("#\n");
343 }
344 if (!language().isNull()) {
345 info += QLatin1String("language: #") + language() + QLatin1String("#\n");
346 }
347 if (!copyright().isNull()) {
348 info += QLatin1String("copyright: #") + copyright() + QLatin1String("#\n");
349 }
350 if (!managingEditor().isNull()) {
351 info += QLatin1String("managingEditor: #") + managingEditor() + QLatin1String("#\n");
352 }
353 if (!webMaster().isNull()) {
354 info += QLatin1String("webMaster: #") + webMaster() + QLatin1String("#\n");
355 }
356
357 QString dpubdate = dateTimeToString(pubDate());
358 if (!dpubdate.isNull()) {
359 info += QLatin1String("pubDate: #") + dpubdate + QLatin1String("#\n");
360 }
361
362 QString dlastbuilddate = dateTimeToString(lastBuildDate());
363 if (!dlastbuilddate.isNull()) {
364 info += QLatin1String("lastBuildDate: #") + dlastbuilddate + QLatin1String("#\n");
365 }
366
367 if (!textInput().isNull()) {
368 info += textInput().debugInfo();
369 }
370 if (!cloud().isNull()) {
371 info += cloud().debugInfo();
372 }
373 if (!image().isNull()) {
374 info += image().debugInfo();
375 }
376
377 const QList<Category> cats = categories();
378
379 for (const auto &c : cats) {
380 info += c.debugInfo();
381 }
382
383 const QList<Item> litems = items();
384 for (const auto &item : litems) {
385 info += item.debugInfo();
386 }
387 info += QLatin1String("### Document end ################\n");
388 return info;
389}
390
391void Document::getItemTitleFormatInfo(bool *isCDATA, bool *containsMarkup) const
392{
393 if (!d->itemTitlesGuessed) {
395 QList<Item> litems = items();
396
397 if (litems.isEmpty()) {
398 d->itemTitlesGuessed = true;
399 return;
400 }
401
402 QDomElement titleEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("title"));
403 d->itemTitleIsCDATA = titleEl.firstChild().isCDATASection();
404
405 const int nmax = std::min<int>(litems.size(), 10); // we check a maximum of 10 items
406 int i = 0;
407
408 for (const auto &item : litems) {
409 if (i++ >= nmax) {
410 break;
411 }
412 titles += item.originalTitle();
413 }
414
415 d->itemTitleContainsMarkup = stringContainsMarkup(titles);
416 d->itemTitlesGuessed = true;
417 }
418
419 if (isCDATA != nullptr) {
420 *isCDATA = d->itemTitleIsCDATA;
421 }
422 if (containsMarkup != nullptr) {
423 *containsMarkup = d->itemTitleContainsMarkup;
424 }
425}
426
427void Document::getItemDescriptionFormatInfo(bool *isCDATA, bool *containsMarkup) const
428{
429 if (!d->itemDescGuessed) {
430 QString desc;
431 QList<Item> litems = items();
432
433 if (litems.isEmpty()) {
434 d->itemDescGuessed = true;
435 return;
436 }
437
438 QDomElement descEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("description"));
439 d->itemDescriptionIsCDATA = descEl.firstChild().isCDATASection();
440
441 const int nmax = std::min<int>(litems.size(), 10); // we check a maximum of 10 items
442 int i = 0;
443
444 for (const auto &item : litems) {
445 if (i++ >= nmax) {
446 break;
447 }
448 desc += item.originalDescription();
449 }
450
451 d->itemDescriptionContainsMarkup = stringContainsMarkup(desc);
452 d->itemDescGuessed = true;
453 }
454
455 if (isCDATA != nullptr) {
456 *isCDATA = d->itemDescriptionIsCDATA;
457 }
458 if (containsMarkup != nullptr) {
459 *containsMarkup = d->itemDescriptionContainsMarkup;
460 }
461}
462
463bool Document::accept(DocumentVisitor *visitor)
464{
465 return visitor->visitRSS2Document(this);
466}
467
468} // namespace RSS2
469} // namespace Syndication
KCALUTILS_EXPORT QString dateTimeToString(const QDateTime &date, bool dateOnly=false, bool shortfmt=true)
KIOCORE_EXPORT CopyJob * link(const QList< QUrl > &src, const QUrl &destDir, JobFlags flags=DefaultFlags)
QString normalize(QStringView str)
QString name(StandardShortcut id)
KEDUVOCDOCUMENT_EXPORT QStringList titles(const QString &language=QString())
QDomNode firstChild() const const
bool isCDATASection() const const
bool isNull() const const
QString localName() const const
QDomNode namedItem(const QString &name) const const
QString namespaceURI() const const
QDomElement toElement() const const
int size() const const
void append(QList< T > &&value)
iterator begin()
const_iterator cbegin() const const
const_iterator cend() const const
qsizetype count() const const
bool isEmpty() const const
void reserve(qsizetype size)
qsizetype size() const const
const QObjectList & children() const const
iterator insert(const T &value)
bool isNull() const const
int toInt(bool *ok, int base) const const
DayOfWeek
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:14:15 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.