Messagelib

scamdetectionwebengine.cpp
1 /*
2  SPDX-FileCopyrightText: 2016-2022 Laurent Montel <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.0-or-later
5 
6 */
7 #include "scamdetectionwebengine.h"
8 #include "MessageViewer/ScamCheckShortUrl"
9 #include "scamcheckshorturlmanager.h"
10 #include "scamdetectiondetailsdialog.h"
11 #include "settings/messageviewersettings.h"
12 #include "webengineviewer/webenginescript.h"
13 #include <WebEngineViewer/WebEngineManageScript>
14 
15 #include <KLocalizedString>
16 
17 #include <QPointer>
18 #include <QRegularExpression>
19 #include <QWebEnginePage>
20 
21 using namespace MessageViewer;
22 
23 template<typename Arg, typename R, typename C>
24 struct InvokeWrapper {
25  QPointer<R> receiver;
26  void (C::*memberFunction)(Arg);
27  void operator()(Arg result)
28  {
29  if (receiver) {
30  (receiver->*memberFunction)(result);
31  }
32  }
33 };
34 
35 template<typename Arg, typename R, typename C>
36 
37 InvokeWrapper<Arg, R, C> invoke(R *receiver, void (C::*memberFunction)(Arg))
38 {
39  InvokeWrapper<Arg, R, C> wrapper = {receiver, memberFunction};
40  return wrapper;
41 }
42 
43 static QString addWarningColor(const QString &url)
44 {
45  const QString error = QStringLiteral("<font color=#FF0000>%1</font>").arg(url);
46  return error;
47 }
48 
49 class MessageViewer::ScamDetectionWebEnginePrivate
50 {
51 public:
52  ScamDetectionWebEnginePrivate() = default;
53 
54  QString mDetails;
56 };
57 
58 ScamDetectionWebEngine::ScamDetectionWebEngine(QObject *parent)
59  : QObject(parent)
60  , d(new MessageViewer::ScamDetectionWebEnginePrivate)
61 {
62 }
63 
64 ScamDetectionWebEngine::~ScamDetectionWebEngine() = default;
65 
66 void ScamDetectionWebEngine::scanPage(QWebEnginePage *page)
67 {
68  if (MessageViewer::MessageViewerSettings::self()->scamDetectionEnabled()) {
69  page->runJavaScript(WebEngineViewer::WebEngineScript::findAllAnchorsAndForms(),
70  WebEngineViewer::WebEngineManageScript::scriptWordId(),
71  invoke(this, &ScamDetectionWebEngine::handleScanPage));
72  }
73 }
74 
75 void ScamDetectionWebEngine::handleScanPage(const QVariant &result)
76 {
77  bool foundScam = false;
78 
79  d->mDetails.clear();
80  const QVariantList resultList = result.toList();
81  if (resultList.count() != 1) {
82  Q_EMIT resultScanDetection(foundScam);
83  return;
84  }
85  static const QRegularExpression ip4regExp(QStringLiteral("\\b[0-9]{1,3}\\.[0-9]{1,3}(?:\\.[0-9]{0,3})?(?:\\.[0-9]{0,3})?"));
86  const QVariantMap mapResult = resultList.at(0).toMap();
87  const QList<QVariant> lst = mapResult.value(QStringLiteral("anchors")).toList();
88  for (const QVariant &var : lst) {
89  QMap<QString, QVariant> mapVariant = var.toMap();
90  // qDebug()<<" mapVariant"<<mapVariant;
91 
92  // 1) detect if title has a url and title != href
93  const QString title = mapVariant.value(QStringLiteral("title")).toString();
94  const QString href = mapVariant.value(QStringLiteral("src")).toString();
95  const QUrl url(href);
96  if (!title.isEmpty()) {
97  if (title.startsWith(QLatin1String("http:")) || title.startsWith(QLatin1String("https:")) || title.startsWith(QLatin1String("www."))) {
98  if (title.startsWith(QLatin1String("www."))) {
99  const QString completUrl = url.scheme() + QLatin1String("://") + title;
100  if (completUrl != href && href != (completUrl + QLatin1Char('/'))) {
101  foundScam = true;
102  }
103  } else {
104  if (href != title) {
105  // http://www.kde.org == http://www.kde.org/
106  if (href != (title + QLatin1Char('/'))) {
107  foundScam = true;
108  }
109  }
110  }
111  if (foundScam) {
112  d->mDetails += QLatin1String("<li>")
113  + i18n("This email contains a link which reads as '%1' in the text, but actually points to '%2'. This is often the case in scam emails "
114  "to mislead the recipient",
115  addWarningColor(title),
116  addWarningColor(href))
117  + QLatin1String("</li>");
118  }
119  }
120  }
121  if (!foundScam) {
122  // 2) detect if url href has ip and not server name.
123  const QString hostname = url.host();
124  if (hostname.contains(ip4regExp) && !hostname.contains(QLatin1String("127.0.0.1"))) { // hostname
125  d->mDetails += QLatin1String("<li>")
126  + i18n("This email contains a link which points to a numerical IP address (%1) instead of a typical textual website address. This is often "
127  "the case in scam emails.",
128  addWarningColor(hostname))
129  + QLatin1String("</li>");
130  foundScam = true;
131  } else if (hostname.contains(QLatin1Char('%'))) { // Hexa value for ip
132  d->mDetails += QLatin1String("<li>")
133  + i18n("This email contains a link which points to a hexadecimal IP address (%1) instead of a typical textual website address. This is "
134  "often the case in scam emails.",
135  addWarningColor(hostname))
136  + QLatin1String("</li>");
137  foundScam = true;
138  } else if (url.toString().contains(QLatin1String("url?q="))) { // 4) redirect url.
139  d->mDetails += QLatin1String("<li>") + i18n("This email contains a link (%1) which has a redirection", addWarningColor(url.toString()))
140  + QLatin1String("</li>");
141  foundScam = true;
142  } else if ((url.toString().count(QStringLiteral("http://")) > 1)
143  || (url.toString().count(QStringLiteral("https://")) > 1)) { // 5) more that 1 http in url.
144  if (!url.toString().contains(QLatin1String("kmail:showAuditLog"))) {
145  d->mDetails += QLatin1String("<li>")
146  + i18n("This email contains a link (%1) which contains multiple http://. This is often the case in scam emails.",
147  addWarningColor(url.toString()))
148  + QLatin1String("</li>");
149  foundScam = true;
150  }
151  }
152  }
153  // Check shortUrl
154  if (!foundScam) {
155  if (ScamCheckShortUrl::isShortUrl(url)) {
156  d->mDetails += QLatin1String("<li>")
157  + i18n("This email contains a shorturl (%1). It can redirect to another server.", addWarningColor(url.toString())) + QLatin1String("</li>");
158  foundScam = true;
159  }
160  }
161  if (!foundScam) {
162  QUrl displayUrl = QUrl(mapVariant.value(QStringLiteral("text")).toString());
163  // Special case if https + port 443 it will return url without port
164  QString text = (displayUrl.port() == 443 && displayUrl.scheme() == QLatin1String("https"))
167  if (text.endsWith(QLatin1String("%22"))) {
168  text.chop(3);
169  }
170  const QUrl normalizedHrefUrl = QUrl(href);
171  QString normalizedHref = normalizedHrefUrl.toDisplayString(QUrl::StripTrailingSlash | QUrl::NormalizePathSegments);
172  if (text != normalizedHref) {
173  if (normalizedHref.contains(QStringLiteral("%5C"))) {
174  normalizedHref.replace(QStringLiteral("%5C"), QStringLiteral("/"));
175  }
176  }
177  if (normalizedHref.endsWith(QLatin1String("%22"))) {
178  normalizedHref.chop(3);
179  }
180  // qDebug() << "text " << text << " href "<<href << " normalizedHref " << normalizedHref;
181 
182  if (!text.isEmpty()) {
183  if (text.startsWith(QLatin1String("http:/")) || text.startsWith(QLatin1String("https:/"))) {
184  if (text != normalizedHref) {
185  if (normalizedHref != (text + QLatin1Char('/'))) {
186  if (normalizedHref.toHtmlEscaped() != text) {
187  if (QString::fromUtf8(QUrl(text).toEncoded()) != normalizedHref) {
188  if (QUrl(normalizedHref).toDisplayString() != text) {
189  const bool qurlqueryequal = displayUrl.query() == normalizedHrefUrl.query();
190  const QString displayUrlWithoutQuery =
192  const QString hrefUrlWithoutQuery =
194  // qDebug() << "displayUrlWithoutQuery " << displayUrlWithoutQuery << " hrefUrlWithoutQuery " << hrefUrlWithoutQuery <<
195  // " text " << text;
196  if (qurlqueryequal && (displayUrlWithoutQuery + QLatin1Char('/') != hrefUrlWithoutQuery)) {
197  d->mDetails += QLatin1String("<li>")
198  + i18n("This email contains a link which reads as '%1' in the text, but actually points to '%2'. This is often "
199  "the case in scam emails to mislead the recipient",
200  addWarningColor(text),
201  addWarningColor(normalizedHref))
202  + QLatin1String("</li>");
203  foundScam = true;
204  }
205  }
206  }
207  }
208  }
209  }
210  }
211  }
212  }
213  }
214  if (mapResult.value(QStringLiteral("forms")).toInt() > 0) {
215  d->mDetails += QLatin1String("<li></b>") + i18n("Message contains form element. This is often the case in scam emails.") + QLatin1String("</b></li>");
216  foundScam = true;
217  }
218  if (foundScam) {
219  d->mDetails.prepend(QLatin1String("<b>") + i18n("Details:") + QLatin1String("</b><ul>"));
220  d->mDetails += QLatin1String("</ul>");
221  Q_EMIT messageMayBeAScam();
222  }
223  Q_EMIT resultScanDetection(foundScam);
224 }
225 
226 void ScamDetectionWebEngine::showDetails()
227 {
228  if (!d->mDetailsDialog) {
229  d->mDetailsDialog = new MessageViewer::ScamDetectionDetailsDialog;
230  }
231  d->mDetailsDialog->setDetails(d->mDetails);
232  d->mDetailsDialog->show();
233 }
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const const
QString fromUtf8(const char *str, int size)
QString toHtmlEscaped() const const
Q_EMITQ_EMIT
QString scheme() const const
const T value(const Key &key, const T &defaultValue) const const
void chop(int n)
QString query(QUrl::ComponentFormattingOptions options) const const
void error(QWidget *parent, const QString &text, const QString &caption=QString(), Options options=Notify)
NETWORKMANAGERQT_EXPORT QString hostname()
QString i18n(const char *text, const TYPE &arg...)
constexpr bool isEmpty() const
StripTrailingSlash
char * toString(const T &value)
bool isEmpty() const const
QString toDisplayString(QUrl::FormattingOptions options) const const
QString & replace(int position, int n, QChar after)
bool startsWith(const QString &s, Qt::CaseSensitivity cs) const const
int count() const const
int port(int defaultPort) const const
QList< QVariant > toList() const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
T value(int i) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2022 The KDE developers.
Generated on Wed May 25 2022 03:55:40 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.