Syndication

loaderutil.cpp
1 /*
2  This file is part of the syndication library
3  SPDX-FileCopyrightText: 2019 Laurent Montel <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.0-or-later
6 */
7 
8 #include "loaderutil_p.h"
9 #include <QDebug>
10 #include <QRegularExpression>
11 
12 //#define DEBUG_PARSING_FEED
13 #ifdef DEBUG_PARSING_FEED
14 #include <QFile>
15 #include <QTextStream>
16 #endif
17 QUrl Syndication::LoaderUtil::parseFeed(const QByteArray &data, const QUrl &url)
18 {
19 #ifdef DEBUG_PARSING_FEED
20  qDebug() << " QUrl Syndication::LoaderUtil::parseFeed(const QByteArray &data, const QUrl &url)";
21  QFile headerFile(QStringLiteral("/tmp/bb.txt"));
22  headerFile.open(QIODevice::WriteOnly | QIODevice::Text);
23  QTextStream outHeaderStream(&headerFile);
24  outHeaderStream << data;
25  headerFile.close();
26 #endif
27  QUrl discoveredFeedURL;
28  QString str = QString::fromLatin1(data.constData()).simplified();
29  QString s2;
30  // QTextStream ts( &str, QIODevice::WriteOnly );
31  // ts << data.data();
32 
33  // "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>"
34  // "type[\\s]=[\\s]\\\"application/rss+xml\\\""
35  // "href[\\s]=[\\s]\\\"application/rss+xml\\\""
36 
37  QRegularExpression rx(QStringLiteral("(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[^sAa]*"
38  "[\\s]*type[^=]*=\"application/rss\\+xml\"[^s][^s](?:[^>]*)"
39  "[\\s]*[\\s]*[^s]*(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)"),
42  if ((match = rx.match(str)).hasMatch()) {
43  s2 = match.captured(1);
44  } else {
45  const QRegularExpression rx2(QStringLiteral("(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)"
46  "[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)"),
48  if ((match = rx2.match(str)).hasMatch()) {
49  s2 = match.captured(1);
50  } else {
51  // does not support Atom/RSS autodiscovery.. try finding feeds by brute force....
52  QStringList feeds;
53  QString host = url.host();
54  rx.setPattern(QStringLiteral("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)"));
55  QRegularExpressionMatchIterator iter = rx.globalMatch(str);
56  while (iter.hasNext()) {
57  match = iter.next();
58  s2 = match.captured(1);
59  if (s2.endsWith(QLatin1String(".rdf")) //
60  || s2.endsWith(QLatin1String(".rss")) //
61  || s2.endsWith(QLatin1String(".xml"))) {
62  feeds.append(s2);
63  }
64  }
65 
66  // Prefer feeds on same host
67  auto it = std::find_if(feeds.cbegin(), feeds.cend(), [&host](const QString &s) {
68  return QUrl(s).host() == host;
69  });
70  if (it != feeds.cend()) {
71  s2 = *it;
72  }
73  }
74  }
75 
76  if (s2.isNull()) {
77  return discoveredFeedURL;
78  }
79 
80  if (QUrl(s2).isRelative()) {
81  if (s2.startsWith(QLatin1String("//"))) {
82  s2.prepend(url.scheme() + QLatin1Char(':'));
83  discoveredFeedURL = QUrl(s2);
84  } else if (s2.startsWith(QLatin1Char('/'))) {
85  discoveredFeedURL = url;
86  discoveredFeedURL.setPath(s2);
87  } else {
88  discoveredFeedURL = url;
89  discoveredFeedURL.setPath(discoveredFeedURL.path() + QLatin1Char('/') + s2);
90  }
91  } else {
92  discoveredFeedURL = QUrl(s2);
93  }
94 
95  return discoveredFeedURL;
96 }
void append(const T &value)
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const const
bool isNull() const const
QString scheme() const const
QString & prepend(QChar ch)
QRegularExpressionMatch next()
QString simplified() const const
QList::const_iterator cend() const const
bool startsWith(const QString &s, Qt::CaseSensitivity cs) const const
ScriptableExtension * host() const
QString host(QUrl::ComponentFormattingOptions options) const const
QString path(QUrl::ComponentFormattingOptions options) const const
QString fromLatin1(const char *str, int size)
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
void setPath(const QString &path, QUrl::ParsingMode mode)
QList::const_iterator cbegin() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon May 8 2023 03:57:11 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.