Syndication

loaderutil.cpp
1/*
2 This file is part of the syndication library
3 SPDX-FileCopyrightText: 2019 Laurent Montel <montel@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include "loaderutil_p.h"
9#include <QDebug>
10#include <QRegularExpression>
11
12//#define DEBUG_PARSING_FEED
13#ifdef DEBUG_PARSING_FEED
14#include <QFile>
15#include <QTextStream>
16#endif
17QUrl Syndication::LoaderUtil::parseFeed(const QByteArray &data, const QUrl &url)
18{
19#ifdef DEBUG_PARSING_FEED
20 qDebug() << " QUrl Syndication::LoaderUtil::parseFeed(const QByteArray &data, const QUrl &url)";
21 QFile headerFile(QStringLiteral("/tmp/bb.txt"));
22 headerFile.open(QIODevice::WriteOnly | QIODevice::Text);
23 QTextStream outHeaderStream(&headerFile);
24 outHeaderStream << data;
25 headerFile.close();
26#endif
27 QUrl discoveredFeedURL;
28 QString str = QString::fromLatin1(data.constData()).simplified();
29 QString s2;
30 // QTextStream ts( &str, QIODevice::WriteOnly );
31 // ts << data.data();
32
33 // "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>"
34 // "type[\\s]=[\\s]\\\"application/rss+xml\\\""
35 // "href[\\s]=[\\s]\\\"application/rss+xml\\\""
36
37 QRegularExpression rx(QStringLiteral("(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[^sAa]*"
38 "[\\s]*type[^=]*=\"application/rss\\+xml\"[^s][^s](?:[^>]*)"
39 "[\\s]*[\\s]*[^s]*(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)"),
42 if ((match = rx.match(str)).hasMatch()) {
43 s2 = match.captured(1);
44 } else {
45 const QRegularExpression rx2(QStringLiteral("(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)"
46 "[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)"),
48 if ((match = rx2.match(str)).hasMatch()) {
49 s2 = match.captured(1);
50 } else {
51 // does not support Atom/RSS autodiscovery.. try finding feeds by brute force....
52 QStringList feeds;
53 QString host = url.host();
54 rx.setPattern(QStringLiteral("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)"));
55 QRegularExpressionMatchIterator iter = rx.globalMatch(str);
56 while (iter.hasNext()) {
57 match = iter.next();
58 s2 = match.captured(1);
59 if (s2.endsWith(QLatin1String(".rdf")) //
60 || s2.endsWith(QLatin1String(".rss")) //
61 || s2.endsWith(QLatin1String(".xml"))) {
62 feeds.append(s2);
63 }
64 }
65
66 // Prefer feeds on same host
67 auto it = std::find_if(feeds.cbegin(), feeds.cend(), [&host](const QString &s) {
68 return QUrl(s).host() == host;
69 });
70 if (it != feeds.cend()) {
71 s2 = *it;
72 }
73 }
74 }
75
76 if (s2.isNull()) {
77 return discoveredFeedURL;
78 }
79
80 if (QUrl(s2).isRelative()) {
81 if (s2.startsWith(QLatin1String("//"))) {
82 s2.prepend(url.scheme() + QLatin1Char(':'));
83 discoveredFeedURL = QUrl(s2);
84 } else if (s2.startsWith(QLatin1Char('/'))) {
85 discoveredFeedURL = url;
86 discoveredFeedURL.setPath(s2);
87 } else {
88 discoveredFeedURL = url;
89 discoveredFeedURL.setPath(discoveredFeedURL.path() + QLatin1Char('/') + s2);
90 }
91 } else {
92 discoveredFeedURL = QUrl(s2);
93 }
94
95 return discoveredFeedURL;
96}
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
void append(QList< T > &&value)
const_iterator cbegin() const const
const_iterator cend() const const
QRegularExpressionMatch next()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromLatin1(QByteArrayView str)
bool isNull() const const
QString & prepend(QChar ch)
QString simplified() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QString host(ComponentFormattingOptions options) const const
QString path(ComponentFormattingOptions options) const const
QString scheme() const const
void setPath(const QString &path, ParsingMode mode)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:14:15 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.