Syndication

rss2/document.cpp
1 /*
2  This file is part of the syndication library
3  SPDX-FileCopyrightText: 2005 Frank Osterfeld <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.0-or-later
6 */
7 
8 #include <rss2/category.h>
9 #include <rss2/cloud.h>
10 #include <rss2/document.h>
11 #include <rss2/image.h>
12 #include <rss2/item.h>
13 #include <rss2/textinput.h>
14 
15 #include <constants.h>
16 #include <documentvisitor.h>
17 #include <tools.h>
18 
19 #include <QDomDocument>
20 #include <QList>
21 #include <QSet>
22 #include <QString>
23 
24 #include <vector>
25 
26 namespace Syndication
27 {
28 namespace RSS2
29 {
30 class SYNDICATION_NO_EXPORT Document::DocumentPrivate
31 {
32 public:
33  DocumentPrivate()
34  : itemDescriptionIsCDATA(false)
35  , itemDescriptionContainsMarkup(false)
36  , itemDescGuessed(false)
37  , itemTitleIsCDATA(false)
38  , itemTitleContainsMarkup(false)
39  , itemTitlesGuessed(false)
40  {
41  }
42  mutable bool itemDescriptionIsCDATA;
43  mutable bool itemDescriptionContainsMarkup;
44  mutable bool itemDescGuessed;
45  mutable bool itemTitleIsCDATA;
46  mutable bool itemTitleContainsMarkup;
47  mutable bool itemTitlesGuessed;
48 };
49 
50 Document::Document(const QDomElement &element)
51  : SpecificDocument()
52  , ElementWrapper(element)
53  , d(new DocumentPrivate)
54 {
55 }
56 
57 Document Document::fromXML(const QDomDocument &doc)
58 {
59  QDomNode channelNode = doc.namedItem(QStringLiteral("rss")).namedItem(QStringLiteral("channel"));
60 
61  return Document(channelNode.toElement());
62 }
63 
64 Document::Document()
65  : SpecificDocument()
66  , ElementWrapper()
67  , d(new DocumentPrivate)
68 {
69 }
70 
71 Document::Document(const Document &other)
72  : SpecificDocument(other)
73  , ElementWrapper(other)
74 {
75  d = other.d;
76 }
77 
78 Document::~Document()
79 {
80 }
81 
82 Document &Document::operator=(const Document &other)
83 {
84  ElementWrapper::operator=(other);
85  d = other.d;
86  return *this;
87 }
88 bool Document::isValid() const
89 {
90  return !isNull();
91 }
92 
93 QString Document::title() const
94 {
95  return extractElementTextNS(QString(), QStringLiteral("title"));
96 }
97 
98 QString Document::link() const
99 {
100  return extractElementTextNS(QString(), QStringLiteral("link"));
101 }
102 
103 QString Document::description() const
104 {
105  const QString desc = extractElementTextNS(QString(), QStringLiteral("description"));
106  return normalize(desc);
107 }
108 
109 QString Document::language() const
110 {
111  const QString lang = extractElementTextNS(QString(), QStringLiteral("language"));
112 
113  if (!lang.isNull()) {
114  return lang;
115  } else {
116  return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("language"));
117  }
118 }
119 
120 QString Document::copyright() const
121 {
122  const QString rights = extractElementTextNS(QString(), QStringLiteral("copyright"));
123  if (!rights.isNull()) {
124  return rights;
125  } else {
126  // if <copyright> is not provided, use <dc:rights>
127  return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("rights"));
128  }
129 }
130 
131 QString Document::managingEditor() const
132 {
133  return extractElementTextNS(QString(), QStringLiteral("managingEditor"));
134 }
135 
136 QString Document::webMaster() const
137 {
138  return extractElementTextNS(QString(), QStringLiteral("webMaster"));
139 }
140 
141 time_t Document::pubDate() const
142 {
143  QString str = extractElementTextNS(QString(), QStringLiteral("pubDate"));
144 
145  if (!str.isNull()) {
146  return parseDate(str, RFCDate);
147  } else {
148  // if there is no pubDate, check for dc:date
149  str = extractElementTextNS(dublinCoreNamespace(), QStringLiteral("date"));
150  return parseDate(str, ISODate);
151  }
152 }
153 
154 time_t Document::lastBuildDate() const
155 {
156  const QString str = extractElementTextNS(QString(), QStringLiteral("lastBuildDate"));
157 
158  return parseDate(str, RFCDate);
159 }
160 
161 QList<Category> Document::categories() const
162 {
163  const QList<QDomElement> catNodes = elementsByTagNameNS(QString(), QStringLiteral("category"));
164 
165  QList<Category> categories;
166  categories.reserve(catNodes.count());
167 
168  std::transform(catNodes.cbegin(), catNodes.cend(), std::back_inserter(categories), [](const QDomElement &element) {
169  return Category(element);
170  });
171 
172  return categories;
173 }
174 
175 QString Document::generator() const
176 {
177  return extractElementTextNS(QString(), QStringLiteral("generator"));
178 }
179 
180 QString Document::docs() const
181 {
182  return extractElementTextNS(QString(), QStringLiteral("docs"));
183 }
184 
185 Cloud Document::cloud() const
186 {
187  return Cloud(firstElementByTagNameNS(QString(), QStringLiteral("cloud")));
188 }
189 
190 int Document::ttl() const
191 {
192  bool ok;
193  int c;
194 
195  QString text = extractElementTextNS(QString(), QStringLiteral("ttl"));
196  c = text.toInt(&ok);
197  return ok ? c : 0;
198 }
199 
200 Image Document::image() const
201 {
202  return Image(firstElementByTagNameNS(QString(), QStringLiteral("image")));
203 }
204 
205 TextInput Document::textInput() const
206 {
207  TextInput ti = TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textInput")));
208 
209  if (!ti.isNull()) {
210  return ti;
211  }
212 
213  // Netscape's version of RSS 0.91 has textinput, not textInput
214  return TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textinput")));
215 }
216 
217 QSet<int> Document::skipHours() const
218 {
219  QSet<int> skipHours;
220  QDomElement skipHoursNode = firstElementByTagNameNS(QString(), QStringLiteral("skipHours"));
221  if (!skipHoursNode.isNull()) {
222  ElementWrapper skipHoursWrapper(skipHoursNode);
223  bool ok = false;
224  const QList<QDomElement> hours = skipHoursWrapper.elementsByTagNameNS(QString(), QStringLiteral("hour"));
225  for (const auto &element : hours) {
226  const int h = element.text().toInt(&ok);
227  if (ok) {
228  skipHours.insert(h);
229  }
230  }
231  }
232 
233  return skipHours;
234 }
235 
236 QSet<Document::DayOfWeek> Document::skipDays() const
237 {
238  QSet<DayOfWeek> skipDays;
239  QDomElement skipDaysNode = firstElementByTagNameNS(QString(), QStringLiteral("skipDays"));
240  if (!skipDaysNode.isNull()) {
241  ElementWrapper skipDaysWrapper(skipDaysNode);
242  struct DayInfo {
244  DayOfWeek enumValue;
245  };
246  static const std::vector<DayInfo> weekDays = {
247  {QLatin1String("Monday"), Monday},
248  {QLatin1String("Tuesday"), Tuesday},
249  {QLatin1String("Wednesday"), Wednesday},
250  {QLatin1String("Thursday"), Thursday},
251  {QLatin1String("Friday"), Friday},
252  {QLatin1String("Saturday"), Saturday},
253  {QLatin1String("Sunday"), Sunday},
254  };
255 
256  const QList<QDomElement> days = skipDaysWrapper.elementsByTagNameNS(QString(), QStringLiteral("day"));
257  for (const auto &element : days) {
258  const QString day = element.text();
259  auto it = std::find_if(weekDays.cbegin(), weekDays.cend(), [&day](const DayInfo &info) {
260  return info.name == day;
261  });
262  if (it != weekDays.cend()) {
263  skipDays.insert(it->enumValue);
264  }
265  }
266  }
267 
268  return skipDays;
269 }
270 
271 QList<Item> Document::items() const
272 {
273  const QList<QDomElement> itemNodes = elementsByTagNameNS(QString(), QStringLiteral("item"));
274 
275  QList<Item> items;
276  items.reserve(itemNodes.count());
277 
278  DocumentPtr doccpy(new Document(*this));
279 
280  std::transform(itemNodes.cbegin(), itemNodes.cend(), std::back_inserter(items), [&doccpy](const QDomElement &element) {
281  return Item(element, doccpy);
282  });
283 
284  return items;
285 }
286 QList<QDomElement> Document::unhandledElements() const
287 {
288  // TODO: do not hardcode this list here
289  static std::vector<ElementType> handled; // QVector would require a default ctor, and ElementType is too big for QList
290  if (handled.empty()) {
291  handled.reserve(22);
292  handled.push_back(ElementType(QStringLiteral("title")));
293  handled.push_back(ElementType(QStringLiteral("link")));
294  handled.push_back(ElementType(QStringLiteral("description")));
295  handled.push_back(ElementType(QStringLiteral("language")));
296  handled.push_back(ElementType(QStringLiteral("copyright")));
297  handled.push_back(ElementType(QStringLiteral("managingEditor")));
298  handled.push_back(ElementType(QStringLiteral("webMaster")));
299  handled.push_back(ElementType(QStringLiteral("pubDate")));
300  handled.push_back(ElementType(QStringLiteral("lastBuildDate")));
301  handled.push_back(ElementType(QStringLiteral("skipDays")));
302  handled.push_back(ElementType(QStringLiteral("skipHours")));
303  handled.push_back(ElementType(QStringLiteral("item")));
304  handled.push_back(ElementType(QStringLiteral("textinput")));
305  handled.push_back(ElementType(QStringLiteral("textInput")));
306  handled.push_back(ElementType(QStringLiteral("image")));
307  handled.push_back(ElementType(QStringLiteral("ttl")));
308  handled.push_back(ElementType(QStringLiteral("generator")));
309  handled.push_back(ElementType(QStringLiteral("docs")));
310  handled.push_back(ElementType(QStringLiteral("cloud")));
311  handled.push_back(ElementType(QStringLiteral("language"), dublinCoreNamespace()));
312  handled.push_back(ElementType(QStringLiteral("rights"), dublinCoreNamespace()));
313  handled.push_back(ElementType(QStringLiteral("date"), dublinCoreNamespace()));
314  }
315 
316  QList<QDomElement> notHandled;
317 
318  QDomNodeList children = element().childNodes();
319  const int numChildren = children.size();
320  for (int i = 0; i < numChildren; ++i) {
321  QDomElement el = children.at(i).toElement();
322  if (!el.isNull() //
323  && std::find(handled.cbegin(), handled.cend(), ElementType(el.localName(), el.namespaceURI())) == handled.cend()) {
324  notHandled.append(el);
325  }
326  }
327 
328  return notHandled;
329 }
330 
331 QString Document::debugInfo() const
332 {
333  QString info;
334  info += QLatin1String("### Document: ###################\n");
335  if (!title().isNull()) {
336  info += QLatin1String("title: #") + title() + QLatin1String("#\n");
337  }
338  if (!description().isNull()) {
339  info += QLatin1String("description: #") + description() + QLatin1String("#\n");
340  }
341  if (!link().isNull()) {
342  info += QLatin1String("link: #") + link() + QLatin1String("#\n");
343  }
344  if (!language().isNull()) {
345  info += QLatin1String("language: #") + language() + QLatin1String("#\n");
346  }
347  if (!copyright().isNull()) {
348  info += QLatin1String("copyright: #") + copyright() + QLatin1String("#\n");
349  }
350  if (!managingEditor().isNull()) {
351  info += QLatin1String("managingEditor: #") + managingEditor() + QLatin1String("#\n");
352  }
353  if (!webMaster().isNull()) {
354  info += QLatin1String("webMaster: #") + webMaster() + QLatin1String("#\n");
355  }
356 
357  QString dpubdate = dateTimeToString(pubDate());
358  if (!dpubdate.isNull()) {
359  info += QLatin1String("pubDate: #") + dpubdate + QLatin1String("#\n");
360  }
361 
362  QString dlastbuilddate = dateTimeToString(lastBuildDate());
363  if (!dlastbuilddate.isNull()) {
364  info += QLatin1String("lastBuildDate: #") + dlastbuilddate + QLatin1String("#\n");
365  }
366 
367  if (!textInput().isNull()) {
368  info += textInput().debugInfo();
369  }
370  if (!cloud().isNull()) {
371  info += cloud().debugInfo();
372  }
373  if (!image().isNull()) {
374  info += image().debugInfo();
375  }
376 
377  const QList<Category> cats = categories();
378 
379  for (const auto &c : cats) {
380  info += c.debugInfo();
381  }
382 
383  const QList<Item> litems = items();
384  for (const auto &item : litems) {
385  info += item.debugInfo();
386  }
387  info += QLatin1String("### Document end ################\n");
388  return info;
389 }
390 
391 void Document::getItemTitleFormatInfo(bool *isCDATA, bool *containsMarkup) const
392 {
393  if (!d->itemTitlesGuessed) {
394  QString titles;
395  QList<Item> litems = items();
396 
397  if (litems.isEmpty()) {
398  d->itemTitlesGuessed = true;
399  return;
400  }
401 
402  QDomElement titleEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("title"));
403  d->itemTitleIsCDATA = titleEl.firstChild().isCDATASection();
404 
405  const int nmax = std::min<int>(litems.size(), 10); // we check a maximum of 10 items
406  int i = 0;
407 
408  for (const auto &item : litems) {
409  if (i++ >= nmax) {
410  break;
411  }
412  titles += item.originalTitle();
413  }
414 
415  d->itemTitleContainsMarkup = stringContainsMarkup(titles);
416  d->itemTitlesGuessed = true;
417  }
418 
419  if (isCDATA != nullptr) {
420  *isCDATA = d->itemTitleIsCDATA;
421  }
422  if (containsMarkup != nullptr) {
423  *containsMarkup = d->itemTitleContainsMarkup;
424  }
425 }
426 
427 void Document::getItemDescriptionFormatInfo(bool *isCDATA, bool *containsMarkup) const
428 {
429  if (!d->itemDescGuessed) {
430  QString desc;
431  QList<Item> litems = items();
432 
433  if (litems.isEmpty()) {
434  d->itemDescGuessed = true;
435  return;
436  }
437 
438  QDomElement descEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("description"));
439  d->itemDescriptionIsCDATA = descEl.firstChild().isCDATASection();
440 
441  const int nmax = std::min<int>(litems.size(), 10); // we check a maximum of 10 items
442  int i = 0;
443 
444  for (const auto &item : litems) {
445  if (i++ >= nmax) {
446  break;
447  }
448  desc += item.originalDescription();
449  }
450 
451  d->itemDescriptionContainsMarkup = stringContainsMarkup(desc);
452  d->itemDescGuessed = true;
453  }
454 
455  if (isCDATA != nullptr) {
456  *isCDATA = d->itemDescriptionIsCDATA;
457  }
458  if (containsMarkup != nullptr) {
459  *containsMarkup = d->itemDescriptionContainsMarkup;
460  }
461 }
462 
463 bool Document::accept(DocumentVisitor *visitor)
464 {
465  return visitor->visitRSS2Document(this);
466 }
467 
468 } // namespace RSS2
469 } // namespace Syndication
void append(const T &value)
bool isNull() const const
QDomNode firstChild() const const
QDomElement toElement() const const
int count(const T &value) const const
bool isNull() const const
QString normalize(QStringView str)
QDomNode at(int index) const const
int size() const const
QString namespaceURI() const const
void reserve(int alloc)
KIOCORE_EXPORT CopyJob * link(const QList< QUrl > &src, const QUrl &destDir, JobFlags flags=DefaultFlags)
int size() const const
bool isCDATASection() const const
QString localName() const const
int toInt(bool *ok, int base) const const
bool isEmpty() const const
QList::const_iterator cend() const const
KCALUTILS_EXPORT QString dateTimeToString(const QDateTime &date, bool dateOnly=false, bool shortfmt=true)
QList::const_iterator cbegin() const const
const char * name(StandardAction id)
QSet::iterator insert(const T &value)
QList::iterator begin()
DayOfWeek
QDomNode namedItem(const QString &name) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Thu Nov 30 2023 03:51:25 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.