Messagelib

urlhashing.cpp
1/*
2 SPDX-FileCopyrightText: 2016-2025 Laurent Montel <montel@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "urlhashing.h"
8#include <QCryptographicHash>
9#include <QDebug>
10
11using namespace Qt::Literals;
12using namespace WebEngineViewer;
13
14UrlHashing::UrlHashing(const QUrl &url)
15 : mUrl(url)
16{
17}
18
19UrlHashing::~UrlHashing() = default;
20
21QString UrlHashing::canonicalizeUrl(QUrl url)
22{
23 if (url.isEmpty()) {
24 return {};
25 }
26 QString path = url.path();
27 if (url.path().isEmpty()) {
28 url.setPath(QStringLiteral("/"));
29 } else {
30 // First, remove tab (0x09), CR (0x0d), and LF (0x0a) characters from the URL. Do not remove escape sequences for these characters (e.g. '%0a').
31 path.remove(QLatin1Char('\t'));
32 path.remove(QLatin1Char('\r'));
33 path.remove(QLatin1Char('\n'));
34
35 // remove repeated leading slashes
36 while (path.startsWith("//"_L1)) {
37 path.removeAt(0);
38 }
39
40 // In the URL, percent-escape all characters that are <= ASCII 32, >= 127, "#", or "%". The escapes should use uppercase hex characters.
41 // TODO
42
43 url.setPath(path);
44 }
45 // Remove all leading and trailing dots.
46#if 0
47 QString hostname = url.host();
48 qDebug() << " hostname" << hostname;
49 while (!hostname.isEmpty() && hostname.at(0) == QLatin1Char('.')) {
50 hostname.remove(0, 1);
51 }
52 qDebug() << "BEFORE hostname" << hostname;
53 for (int i = hostname.length(); i >= 0; --i) {
54 if (hostname.at(i) == QLatin1Char('.')) {
56 } else {
57 break;
58 }
59 }
60 qDebug() << "AFTER hostname" << hostname;
61 mUrl.setHost(hostname);
62#endif
65 // qDebug() << "BEFORE urlEncoded" <<urlEncoded;
66 urlEncoded.replace(QByteArrayLiteral("%25"), QByteArrayLiteral("%"));
67 // qDebug() << "AFTER urlEncoded" <<urlEncoded;
68 return QString::fromLatin1(urlEncoded);
69}
70
71QStringList UrlHashing::generatePathsToCheck(const QString &str, const QString &query)
72{
73 QStringList pathToCheck;
74 if (str.isEmpty()) {
75 return pathToCheck;
76 }
77 const int strLength(str.length());
78 for (int i = 0; i < strLength; ++i) {
79 // We check 5 element => 4 here and host if necessary
80 if (pathToCheck.count() == 4) {
81 break;
82 }
83 if (str.at(i) == QLatin1Char('/')) {
84 if (i == 0) {
85 pathToCheck << QStringLiteral("/");
86 } else {
87 pathToCheck << str.left(i + 1);
88 }
89 }
90 }
91 if (!pathToCheck.isEmpty() && pathToCheck.at(pathToCheck.count() - 1) != str) {
92 pathToCheck << str;
93 }
94 if (!query.isEmpty()) {
95 pathToCheck << str + QLatin1Char('?') + query;
96 }
97 return pathToCheck;
98}
99
100QStringList UrlHashing::generateHostsToCheck(const QString &str)
101{
102 QStringList hostToCheck;
103 if (str.isEmpty()) {
104 return hostToCheck;
105 }
106 const int strLength(str.length());
107 bool lastElement = true;
108 for (int i = (strLength - 1); i > 0; --i) {
109 // We need to check just 5 element => 4 splits hosts + current host
110 if (hostToCheck.count() == 4) {
111 break;
112 }
113 if (str.at(i) == QLatin1Char('.')) {
114 if (lastElement) {
115 lastElement = false;
116 } else {
117 hostToCheck << str.right(strLength - i - 1);
118 }
119 }
120 }
121 hostToCheck << str;
122 return hostToCheck;
123}
124
125QHash<QByteArray, QByteArray> UrlHashing::hashList() const
126{
127 QHash<QByteArray, QByteArray> lst;
128 if (mUrl.isValid()) {
129 const QString result = WebEngineViewer::UrlHashing::canonicalizeUrl(mUrl);
130 const QUrl url(result);
131 const QStringList hosts = WebEngineViewer::UrlHashing::generateHostsToCheck(url.host());
132 const QStringList paths = WebEngineViewer::UrlHashing::generatePathsToCheck(url.path(), url.query());
133
134 for (const QString &host : hosts) {
135 for (const QString &path : paths) {
136 const QString str = host + path;
138 QByteArray baShort = ba;
139 baShort.truncate(4);
140 lst.insert(ba, baShort);
141 // qDebug() << " ba " << ba.toBase64();
142 }
143 }
144 }
145 return lst;
146}
KSERVICE_EXPORT KService::List query(FilterFunc filterFunc)
QString path(const QString &relativePath)
NETWORKMANAGERQT_EXPORT QString hostname()
QByteArray & replace(QByteArrayView before, QByteArrayView after)
void truncate(qsizetype pos)
QByteArray hash(QByteArrayView data, Algorithm method)
iterator insert(const Key &key, const T &value)
const_reference at(qsizetype i) const const
qsizetype count() const const
bool isEmpty() const const
const QChar at(qsizetype position) const const
QString fromLatin1(QByteArrayView str)
bool isEmpty() const const
QString left(qsizetype n) const const
qsizetype length() const const
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString & removeAt(qsizetype pos)
QString right(qsizetype n) const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QByteArray toLatin1() const const
EncodeUnicode
RemoveFragment
QString host(ComponentFormattingOptions options) const const
bool isEmpty() const const
QString path(ComponentFormattingOptions options) const const
QString query(ComponentFormattingOptions options) const const
void setPath(const QString &path, ParsingMode mode)
QByteArray toEncoded(FormattingOptions options) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Mar 7 2025 11:48:21 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.