Syndication

rss2/document.cpp
1/*
2 This file is part of the syndication library
3 SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include <rss2/category.h>
9#include <rss2/cloud.h>
10#include <rss2/document.h>
11#include <rss2/image.h>
12#include <rss2/item.h>
13#include <rss2/textinput.h>
14
15#include <constants.h>
16#include <documentvisitor.h>
17#include <tools.h>
18
19#include <QDomDocument>
20#include <QList>
21#include <QSet>
22#include <QString>
23
24#include <vector>
25
26namespace Syndication
27{
28namespace RSS2
29{
30class SYNDICATION_NO_EXPORT Document::DocumentPrivate
31{
32public:
33 DocumentPrivate()
34 : itemDescriptionIsCDATA(false)
35 , itemDescriptionContainsMarkup(false)
36 , itemDescGuessed(false)
37 , itemTitleIsCDATA(false)
38 , itemTitleContainsMarkup(false)
39 , itemTitlesGuessed(false)
40 {
41 }
42 mutable bool itemDescriptionIsCDATA;
43 mutable bool itemDescriptionContainsMarkup;
44 mutable bool itemDescGuessed;
45 mutable bool itemTitleIsCDATA;
46 mutable bool itemTitleContainsMarkup;
47 mutable bool itemTitlesGuessed;
48};
49
52 , ElementWrapper(element)
53 , d(new DocumentPrivate)
54{
55}
56
58{
59 QDomNode channelNode = doc.namedItem(QStringLiteral("rss")).namedItem(QStringLiteral("channel"));
60
61 return Document(channelNode.toElement());
62}
63
67 , d(new DocumentPrivate)
68{
69}
70
72 : SpecificDocument(other)
73 , ElementWrapper(other)
74{
75 d = other.d;
76}
77
81
83{
85 d = other.d;
86 return *this;
87}
89{
90 return !isNull();
91}
92
94{
95 return extractElementTextNS(QString(), QStringLiteral("title"));
96}
97
99{
100 return extractElementTextNS(QString(), QStringLiteral("link"));
101}
102
104{
105 const QString desc = extractElementTextNS(QString(), QStringLiteral("description"));
106 return normalize(desc);
107}
108
110{
111 const QString lang = extractElementTextNS(QString(), QStringLiteral("language"));
112
113 if (!lang.isNull()) {
114 return lang;
115 } else {
116 return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("language"));
117 }
118}
119
121{
122 const QString rights = extractElementTextNS(QString(), QStringLiteral("copyright"));
123 if (!rights.isNull()) {
124 return rights;
125 } else {
126 // if <copyright> is not provided, use <dc:rights>
127 return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("rights"));
128 }
129}
130
132{
133 return extractElementTextNS(QString(), QStringLiteral("managingEditor"));
134}
135
137{
138 return extractElementTextNS(QString(), QStringLiteral("webMaster"));
139}
140
141time_t Document::pubDate() const
142{
143 QString str = extractElementTextNS(QString(), QStringLiteral("pubDate"));
144
145 if (!str.isNull()) {
146 return parseDate(str, RFCDate);
147 } else {
148 // if there is no pubDate, check for dc:date
149 str = extractElementTextNS(dublinCoreNamespace(), QStringLiteral("date"));
150 return parseDate(str, ISODate);
151 }
152}
153
155{
156 const QString str = extractElementTextNS(QString(), QStringLiteral("lastBuildDate"));
157
158 return parseDate(str, RFCDate);
159}
160
162{
163 const QList<QDomElement> catNodes = elementsByTagNameNS(QString(), QStringLiteral("category"));
164
166 categories.reserve(catNodes.count());
167
168 std::transform(catNodes.cbegin(), catNodes.cend(), std::back_inserter(categories), [](const QDomElement &element) {
169 return Category(element);
170 });
171
172 return categories;
173}
174
176{
177 return extractElementTextNS(QString(), QStringLiteral("generator"));
178}
179
181{
182 return extractElementTextNS(QString(), QStringLiteral("docs"));
183}
184
186{
187 return Cloud(firstElementByTagNameNS(QString(), QStringLiteral("cloud")));
188}
189
190int Document::ttl() const
191{
192 bool ok;
193 int c;
194
195 QString text = extractElementTextNS(QString(), QStringLiteral("ttl"));
196 c = text.toInt(&ok);
197 return ok ? c : 0;
198}
199
201{
202 return Image(firstElementByTagNameNS(QString(), QStringLiteral("image")));
203}
204
206{
207 TextInput ti = TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textInput")));
208
209 if (!ti.isNull()) {
210 return ti;
211 }
212
213 // Netscape's version of RSS 0.91 has textinput, not textInput
214 return TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textinput")));
215}
216
218{
220 QDomElement skipHoursNode = firstElementByTagNameNS(QString(), QStringLiteral("skipHours"));
221 if (!skipHoursNode.isNull()) {
222 ElementWrapper skipHoursWrapper(skipHoursNode);
223 bool ok = false;
224 const QList<QDomElement> hours = skipHoursWrapper.elementsByTagNameNS(QString(), QStringLiteral("hour"));
225 for (const auto &element : hours) {
226 const int h = element.text().toInt(&ok);
227 if (ok) {
228 skipHours.insert(h);
229 }
230 }
231 }
232
233 return skipHours;
234}
235
237{
239 QDomElement skipDaysNode = firstElementByTagNameNS(QString(), QStringLiteral("skipDays"));
240 if (!skipDaysNode.isNull()) {
241 ElementWrapper skipDaysWrapper(skipDaysNode);
242 struct DayInfo {
243 QLatin1String name;
244 DayOfWeek enumValue;
245 };
246 static const std::vector<DayInfo> weekDays = {
247 {QLatin1String("Monday"), Monday},
248 {QLatin1String("Tuesday"), Tuesday},
249 {QLatin1String("Wednesday"), Wednesday},
250 {QLatin1String("Thursday"), Thursday},
251 {QLatin1String("Friday"), Friday},
252 {QLatin1String("Saturday"), Saturday},
253 {QLatin1String("Sunday"), Sunday},
254 };
255
256 const QList<QDomElement> days = skipDaysWrapper.elementsByTagNameNS(QString(), QStringLiteral("day"));
257 for (const auto &element : days) {
258 const QString day = element.text();
259 auto it = std::find_if(weekDays.cbegin(), weekDays.cend(), [&day](const DayInfo &info) {
260 return info.name == day;
261 });
262 if (it != weekDays.cend()) {
263 skipDays.insert(it->enumValue);
264 }
265 }
266 }
267
268 return skipDays;
269}
270
272{
273 const QList<QDomElement> itemNodes = elementsByTagNameNS(QString(), QStringLiteral("item"));
274
276 items.reserve(itemNodes.count());
277
278 DocumentPtr doccpy(new Document(*this));
279
280 std::transform(itemNodes.cbegin(), itemNodes.cend(), std::back_inserter(items), [&doccpy](const QDomElement &element) {
281 return Item(element, doccpy);
282 });
283
284 return items;
285}
287{
288 // TODO: do not hardcode this list here
289 static std::vector<ElementType> handled; // QVector would require a default ctor, and ElementType is too big for QList
290 if (handled.empty()) {
291 handled.reserve(22);
292 handled.push_back(ElementType(QStringLiteral("title")));
293 handled.push_back(ElementType(QStringLiteral("link")));
294 handled.push_back(ElementType(QStringLiteral("description")));
295 handled.push_back(ElementType(QStringLiteral("language")));
296 handled.push_back(ElementType(QStringLiteral("copyright")));
297 handled.push_back(ElementType(QStringLiteral("managingEditor")));
298 handled.push_back(ElementType(QStringLiteral("webMaster")));
299 handled.push_back(ElementType(QStringLiteral("pubDate")));
300 handled.push_back(ElementType(QStringLiteral("lastBuildDate")));
301 handled.push_back(ElementType(QStringLiteral("skipDays")));
302 handled.push_back(ElementType(QStringLiteral("skipHours")));
303 handled.push_back(ElementType(QStringLiteral("item")));
304 handled.push_back(ElementType(QStringLiteral("textinput")));
305 handled.push_back(ElementType(QStringLiteral("textInput")));
306 handled.push_back(ElementType(QStringLiteral("image")));
307 handled.push_back(ElementType(QStringLiteral("ttl")));
308 handled.push_back(ElementType(QStringLiteral("generator")));
309 handled.push_back(ElementType(QStringLiteral("docs")));
310 handled.push_back(ElementType(QStringLiteral("cloud")));
311 handled.push_back(ElementType(QStringLiteral("language"), dublinCoreNamespace()));
312 handled.push_back(ElementType(QStringLiteral("rights"), dublinCoreNamespace()));
313 handled.push_back(ElementType(QStringLiteral("date"), dublinCoreNamespace()));
314 }
315
316 QList<QDomElement> notHandled;
317
318 QDomNodeList children = element().childNodes();
319 const int numChildren = children.size();
320 for (int i = 0; i < numChildren; ++i) {
321 QDomElement el = children.at(i).toElement();
322 if (!el.isNull() //
323 && std::find(handled.cbegin(), handled.cend(), ElementType(el.localName(), el.namespaceURI())) == handled.cend()) {
324 notHandled.append(el);
325 }
326 }
327
328 return notHandled;
329}
330
332{
333 QString info;
334 info += QLatin1String("### Document: ###################\n");
335 if (!title().isNull()) {
336 info += QLatin1String("title: #") + title() + QLatin1String("#\n");
337 }
338 if (!description().isNull()) {
339 info += QLatin1String("description: #") + description() + QLatin1String("#\n");
340 }
341 if (!link().isNull()) {
342 info += QLatin1String("link: #") + link() + QLatin1String("#\n");
343 }
344 if (!language().isNull()) {
345 info += QLatin1String("language: #") + language() + QLatin1String("#\n");
346 }
347 if (!copyright().isNull()) {
348 info += QLatin1String("copyright: #") + copyright() + QLatin1String("#\n");
349 }
350 if (!managingEditor().isNull()) {
351 info += QLatin1String("managingEditor: #") + managingEditor() + QLatin1String("#\n");
352 }
353 if (!webMaster().isNull()) {
354 info += QLatin1String("webMaster: #") + webMaster() + QLatin1String("#\n");
355 }
356
357 QString dpubdate = dateTimeToString(pubDate());
358 if (!dpubdate.isNull()) {
359 info += QLatin1String("pubDate: #") + dpubdate + QLatin1String("#\n");
360 }
361
362 QString dlastbuilddate = dateTimeToString(lastBuildDate());
363 if (!dlastbuilddate.isNull()) {
364 info += QLatin1String("lastBuildDate: #") + dlastbuilddate + QLatin1String("#\n");
365 }
366
367 if (!textInput().isNull()) {
368 info += textInput().debugInfo();
369 }
370 if (!cloud().isNull()) {
371 info += cloud().debugInfo();
372 }
373 if (!image().isNull()) {
374 info += image().debugInfo();
375 }
376
377 const QList<Category> cats = categories();
378
379 for (const auto &c : cats) {
380 info += c.debugInfo();
381 }
382
383 const QList<Item> litems = items();
384 for (const auto &item : litems) {
385 info += item.debugInfo();
386 }
387 info += QLatin1String("### Document end ################\n");
388 return info;
389}
390
391void Document::getItemTitleFormatInfo(bool *isCDATA, bool *containsMarkup) const
392{
393 if (!d->itemTitlesGuessed) {
394 QString titles;
395 QList<Item> litems = items();
396
397 if (litems.isEmpty()) {
398 d->itemTitlesGuessed = true;
399 return;
400 }
401
402 QDomElement titleEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("title"));
403 d->itemTitleIsCDATA = titleEl.firstChild().isCDATASection();
404
405 const int nmax = std::min<int>(litems.size(), 10); // we check a maximum of 10 items
406 int i = 0;
407
408 for (const auto &item : litems) {
409 if (i++ >= nmax) {
410 break;
411 }
412 titles += item.originalTitle();
413 }
414
415 d->itemTitleContainsMarkup = stringContainsMarkup(titles);
416 d->itemTitlesGuessed = true;
417 }
418
419 if (isCDATA != nullptr) {
420 *isCDATA = d->itemTitleIsCDATA;
421 }
422 if (containsMarkup != nullptr) {
423 *containsMarkup = d->itemTitleContainsMarkup;
424 }
425}
426
427void Document::getItemDescriptionFormatInfo(bool *isCDATA, bool *containsMarkup) const
428{
429 if (!d->itemDescGuessed) {
430 QString desc;
431 QList<Item> litems = items();
432
433 if (litems.isEmpty()) {
434 d->itemDescGuessed = true;
435 return;
436 }
437
438 QDomElement descEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("description"));
439 d->itemDescriptionIsCDATA = descEl.firstChild().isCDATASection();
440
441 const int nmax = std::min<int>(litems.size(), 10); // we check a maximum of 10 items
442 int i = 0;
443
444 for (const auto &item : litems) {
445 if (i++ >= nmax) {
446 break;
447 }
448 desc += item.originalDescription();
449 }
450
451 d->itemDescriptionContainsMarkup = stringContainsMarkup(desc);
452 d->itemDescGuessed = true;
453 }
454
455 if (isCDATA != nullptr) {
456 *isCDATA = d->itemDescriptionIsCDATA;
457 }
458 if (containsMarkup != nullptr) {
459 *containsMarkup = d->itemDescriptionContainsMarkup;
460 }
461}
462
464{
465 return visitor->visitRSS2Document(this);
466}
467
468} // namespace RSS2
469} // namespace Syndication
Visitor interface, following the Visitor design pattern.
virtual bool visitRSS2Document(Syndication::RSS2::Document *document)
reimplement this method to handle RSS2-like (RSS 0.9x, 2.0) documents.
A wrapper for XML elements.
const QDomElement & element() const
returns the wrapped resource.
QString text() const
Returns the wrapped element's text or an empty string.
QDomElement firstElementByTagNameNS(const QString &nsURI, const QString &tagName) const
searches the direct children of the wrapped element for an element with a given namespace and tag nam...
bool isNull() const
returns whether the wrapped element is a null element
QList< QDomElement > elementsByTagNameNS(const QString &nsURI, const QString &tagName) const
returns all child elements with tag name tagname and namespace URI nsURI.
ElementWrapper & operator=(const ElementWrapper &other)
Assigns another element wrapper to this one.
QString extractElementTextNS(const QString &namespaceURI, const QString &localName) const
extracts the text from a child element, respecting namespaces.
Cloud information for an RSS channel.
Definition cloud.h:46
QString debugInfo() const
Returns a description of the object for debugging purposes.
Definition cloud.cpp:57
document implementation, representing an RSS feed from the 0.91-0.94/2.0 family.
DayOfWeek
days of week, used for skip days
@ Wednesday
self-explanatory
QList< Item > items() const
the items contained in this document
QString debugInfo() const override
Returns a description of the object and its children for debugging purposes.
time_t lastBuildDate() const
The last time the content of the channel changed.
QString link() const
The URL to the HTML website corresponding to the channel.
QSet< int > skipHours() const
Contains a set of hours (from 0 to 23), time in GMT, when the channel is not updated.
bool accept(DocumentVisitor *visitor) override
Used by visitors for double dispatch.
static Document fromXML(const QDomDocument &document)
Parses an RSS2 document from an XML document.
time_t pubDate() const
The publication date for the content in the channel.
int ttl() const
ttl stands for time to live.
QString docs() const
A URL that points to the documentation for the format used in the RSS file.
Image image() const
Specifies a GIF, JPEG or PNG image that can be displayed with the channel.
TextInput textInput() const
Specifies a text input box that can be displayed with the channel.
QString description() const
Phrase or sentence describing the channel.
bool isValid() const override
returns whether this document is valid or not.
QString copyright() const
Copyright notice for content in the channel.
QString managingEditor() const
Email address for person responsible for editorial content.
QSet< DayOfWeek > skipDays() const
A set of week days where aggregators shouldn't read the channel.
Document & operator=(const Document &other)
assigns another document.
QList< QDomElement > unhandledElements() const
returns all child elements of this document not covered by this class.
~Document() override
destructor
Cloud cloud() const
Allows processes to register with a cloud to be notified of updates to the channel,...
QString generator() const
A string indicating the program used to generate the channel.
QString webMaster() const
Email address for person responsible for technical issues relating to channel.
Document()
Default constructor, creates a null object, for which isNull() is true and isValid() is false.
QString title() const
The title of the channel.
QList< Category > categories() const
Specifies one or more categories that the channel belongs to.
An RSS2 image, used to describe feed logos.
Definition rss2/image.h:24
QString debugInfo() const
Returns a description of the object for debugging purposes.
"The purpose of the <textInput> element is something of a mystery.
QString debugInfo() const
Returns a description of the object for debugging purposes.
Document interface for format-specific feed documents as parsed from a document source (see DocumentS...
QString text() const const
QDomNodeList childNodes() const const
QDomNode firstChild() const const
bool isCDATASection() const const
bool isNull() const const
QString localName() const const
QDomNode namedItem(const QString &name) const const
QString namespaceURI() const const
QDomElement toElement() const const
QDomNode at(int index) const const
int size() const const
void append(QList< T > &&value)
iterator begin()
const_iterator cbegin() const const
const_iterator cend() const const
qsizetype count() const const
bool isEmpty() const const
void reserve(qsizetype size)
qsizetype size() const const
bool isNull() const const
int toInt(bool *ok, int base) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 12:01:30 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.