Messagelib

urlhashing.cpp
1 /*
2  SPDX-FileCopyrightText: 2016-2023 Laurent Montel <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.0-or-later
5 */
6 
7 #include "urlhashing.h"
8 #include <QCryptographicHash>
9 #include <QDebug>
10 
11 using namespace WebEngineViewer;
12 
13 UrlHashing::UrlHashing(const QUrl &url)
14  : mUrl(url)
15 {
16 }
17 
18 UrlHashing::~UrlHashing() = default;
19 
20 QString UrlHashing::canonicalizeUrl(QUrl url)
21 {
22  if (url.isEmpty()) {
23  return {};
24  }
25  QString path = url.path();
26  if (url.path().isEmpty()) {
27  url.setPath(QStringLiteral("/"));
28  } else {
29  // First, remove tab (0x09), CR (0x0d), and LF (0x0a) characters from the URL. Do not remove escape sequences for these characters (e.g. '%0a').
30  path.remove(QLatin1Char('\t'));
31  path.remove(QLatin1Char('\r'));
32  path.remove(QLatin1Char('\n'));
33 
34  // In the URL, percent-escape all characters that are <= ASCII 32, >= 127, "#", or "%". The escapes should use uppercase hex characters.
35  // TODO
36 
37  url.setPath(path);
38  }
39  // Remove all leading and trailing dots.
40 #if 0
41  QString hostname = url.host();
42  qDebug() << " hostname" << hostname;
43  while (!hostname.isEmpty() && hostname.at(0) == QLatin1Char('.')) {
44  hostname.remove(0, 1);
45  }
46  qDebug() << "BEFORE hostname" << hostname;
47  for (int i = hostname.length(); i >= 0; --i) {
48  if (hostname.at(i) == QLatin1Char('.')) {
49  hostname.remove(i);
50  } else {
51  break;
52  }
53  }
54  qDebug() << "AFTER hostname" << hostname;
55  mUrl.setHost(hostname);
56 #endif
59  // qDebug() << "BEFORE urlEncoded" <<urlEncoded;
60  urlEncoded.replace(QByteArrayLiteral("%25"), QByteArrayLiteral("%"));
61  // qDebug() << "AFTER urlEncoded" <<urlEncoded;
62  return QString::fromLatin1(urlEncoded);
63 }
64 
65 QStringList UrlHashing::generatePathsToCheck(const QString &str, const QString &query)
66 {
67  QStringList pathToCheck;
68  if (str.isEmpty()) {
69  return pathToCheck;
70  }
71  const int strLength(str.length());
72  for (int i = 0; i < strLength; ++i) {
73  // We check 5 element => 4 here and host if necessary
74  if (pathToCheck.count() == 4) {
75  break;
76  }
77  if (str.at(i) == QLatin1Char('/')) {
78  if (i == 0) {
79  pathToCheck << QStringLiteral("/");
80  } else {
81  pathToCheck << str.left(i + 1);
82  }
83  }
84  }
85  if (!pathToCheck.isEmpty() && pathToCheck.at(pathToCheck.count() - 1) != str) {
86  pathToCheck << str;
87  }
88  if (!query.isEmpty()) {
89  pathToCheck << str + QLatin1Char('?') + query;
90  }
91  return pathToCheck;
92 }
93 
94 QStringList UrlHashing::generateHostsToCheck(const QString &str)
95 {
96  QStringList hostToCheck;
97  if (str.isEmpty()) {
98  return hostToCheck;
99  }
100  const int strLength(str.length());
101  bool lastElement = true;
102  for (int i = (strLength - 1); i > 0; --i) {
103  // We need to check just 5 element => 4 splits hosts + current host
104  if (hostToCheck.count() == 4) {
105  break;
106  }
107  if (str.at(i) == QLatin1Char('.')) {
108  if (lastElement) {
109  lastElement = false;
110  } else {
111  hostToCheck << str.right(strLength - i - 1);
112  }
113  }
114  }
115  hostToCheck << str;
116  return hostToCheck;
117 }
118 
119 QHash<QByteArray, QByteArray> UrlHashing::hashList() const
120 {
122  if (mUrl.isValid()) {
123  const QString result = WebEngineViewer::UrlHashing::canonicalizeUrl(mUrl);
124  const QUrl url(result);
125  const QStringList hosts = WebEngineViewer::UrlHashing::generateHostsToCheck(url.host());
126  const QStringList paths = WebEngineViewer::UrlHashing::generatePathsToCheck(url.path(), url.query());
127 
128  for (const QString &host : hosts) {
129  for (const QString &path : paths) {
130  const QString str = host + path;
132  QByteArray baShort = ba;
133  baShort.truncate(4);
134  lst.insert(ba, baShort);
135  // qDebug() << " ba " << ba.toBase64();
136  }
137  }
138  }
139  return lst;
140 }
QByteArray toEncoded(QUrl::FormattingOptions options) const const
int count(const T &value) const const
QString query(QUrl::ComponentFormattingOptions options) const const
QByteArray toLatin1() const const
QHash::iterator insert(const Key &key, const T &value)
KSERVICE_EXPORT KService::List query(FilterFunc filterFunc)
NETWORKMANAGERQT_EXPORT QString hostname()
bool isEmpty() const const
RemoveFragment
EncodeUnicode
bool isEmpty() const const
int length() const const
const T & at(int i) const const
bool isEmpty() const const
QByteArray & replace(int pos, int len, const char *after)
QString & remove(int position, int n)
ScriptableExtension * host() const
QByteArray hash(const QByteArray &data, QCryptographicHash::Algorithm method)
QString host(QUrl::ComponentFormattingOptions options) const const
QString path(QUrl::ComponentFormattingOptions options) const const
QString path(const QString &relativePath)
QString left(int n) const const
QString right(int n) const const
QString fromLatin1(const char *str, int size)
void setPath(const QString &path, QUrl::ParsingMode mode)
const QChar at(int position) const const
void truncate(int pos)
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Wed Mar 22 2023 04:07:15 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.