Messagelib

scamdetectionwebengine.cpp
1 /*
2  SPDX-FileCopyrightText: 2016-2023 Laurent Montel <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.0-or-later
5 
6 */
7 #include "scamdetectionwebengine.h"
8 #include "MessageViewer/ScamCheckShortUrl"
9 #include "scamdetectiondetailsdialog.h"
10 #include "settings/messageviewersettings.h"
11 #include "webengineviewer/webenginescript.h"
12 #include <WebEngineViewer/WebEngineManageScript>
13 
14 #include <KLocalizedString>
15 
16 #include <QPointer>
17 #include <QRegularExpression>
18 #include <QWebEnginePage>
19 
20 using namespace MessageViewer;
21 
22 template<typename Arg, typename R, typename C>
23 struct InvokeWrapper {
24  QPointer<R> receiver;
25  void (C::*memberFunction)(Arg);
26  void operator()(Arg result)
27  {
28  if (receiver) {
29  (receiver->*memberFunction)(result);
30  }
31  }
32 };
33 
34 template<typename Arg, typename R, typename C>
35 
36 InvokeWrapper<Arg, R, C> invoke(R *receiver, void (C::*memberFunction)(Arg))
37 {
38  InvokeWrapper<Arg, R, C> wrapper = {receiver, memberFunction};
39  return wrapper;
40 }
41 
42 static QString addWarningColor(const QString &url)
43 {
44  const QString error = QStringLiteral("<font color=#FF0000>%1</font>").arg(url);
45  return error;
46 }
47 
48 class MessageViewer::ScamDetectionWebEnginePrivate
49 {
50 public:
51  ScamDetectionWebEnginePrivate() = default;
52 
53  QString mDetails;
55 };
56 
57 ScamDetectionWebEngine::ScamDetectionWebEngine(QObject *parent)
58  : QObject(parent)
59  , d(new MessageViewer::ScamDetectionWebEnginePrivate)
60 {
61 }
62 
63 ScamDetectionWebEngine::~ScamDetectionWebEngine() = default;
64 
65 void ScamDetectionWebEngine::scanPage(QWebEnginePage *page)
66 {
67  if (MessageViewer::MessageViewerSettings::self()->scamDetectionEnabled()) {
68  page->runJavaScript(WebEngineViewer::WebEngineScript::findAllAnchorsAndForms(),
69  WebEngineViewer::WebEngineManageScript::scriptWordId(),
70  invoke(this, &ScamDetectionWebEngine::handleScanPage));
71  }
72 }
73 
74 void ScamDetectionWebEngine::handleScanPage(const QVariant &result)
75 {
76  bool foundScam = false;
77 
78  d->mDetails.clear();
79  const QVariantList resultList = result.toList();
80  if (resultList.count() != 1) {
81  Q_EMIT resultScanDetection(foundScam);
82  return;
83  }
84  static const QRegularExpression ip4regExp(QStringLiteral("\\b[0-9]{1,3}\\.[0-9]{1,3}(?:\\.[0-9]{0,3})?(?:\\.[0-9]{0,3})?"));
85  const QVariantMap mapResult = resultList.at(0).toMap();
86  const QList<QVariant> lst = mapResult.value(QStringLiteral("anchors")).toList();
87  for (const QVariant &var : lst) {
88  QMap<QString, QVariant> mapVariant = var.toMap();
89  // qDebug()<<" mapVariant"<<mapVariant;
90 
91  // 1) detect if title has a url and title != href
92  const QString title = mapVariant.value(QStringLiteral("title")).toString();
93  QString href = mapVariant.value(QStringLiteral("src")).toString();
94  if (!QUrl(href).toString().contains(QLatin1String("kmail:showAuditLog"))) {
95  href = href.toLower();
96  }
97  const QUrl url(href);
98  if (!title.isEmpty()) {
99  if (title.startsWith(QLatin1String("http:")) || title.startsWith(QLatin1String("https:")) || title.startsWith(QLatin1String("www."))) {
100  if (title.startsWith(QLatin1String("www."))) {
101  const QString completUrl = url.scheme() + QLatin1String("://") + title;
102  if (completUrl != href && href != (completUrl + QLatin1Char('/'))) {
103  foundScam = true;
104  }
105  } else {
106  if (href != title) {
107  // http://www.kde.org == http://www.kde.org/
108  if (href != (title + QLatin1Char('/'))) {
109  foundScam = true;
110  }
111  }
112  }
113  if (foundScam) {
114  d->mDetails += QLatin1String("<li>")
115  + i18n("This email contains a link which reads as '%1' in the text, but actually points to '%2'. This is often the case in scam emails "
116  "to mislead the recipient",
117  addWarningColor(title),
118  addWarningColor(href))
119  + QLatin1String("</li>");
120  }
121  }
122  }
123  if (!foundScam) {
124  // 2) detect if url href has ip and not server name.
125  const QString hostname = url.host();
126  if (hostname.contains(ip4regExp) && !hostname.contains(QLatin1String("127.0.0.1"))) { // hostname
127  d->mDetails += QLatin1String("<li>")
128  + i18n("This email contains a link which points to a numerical IP address (%1) instead of a typical textual website address. This is often "
129  "the case in scam emails.",
130  addWarningColor(hostname))
131  + QLatin1String("</li>");
132  foundScam = true;
133  } else if (hostname.contains(QLatin1Char('%'))) { // Hexa value for ip
134  d->mDetails += QLatin1String("<li>")
135  + i18n("This email contains a link which points to a hexadecimal IP address (%1) instead of a typical textual website address. This is "
136  "often the case in scam emails.",
137  addWarningColor(hostname))
138  + QLatin1String("</li>");
139  foundScam = true;
140  } else if (url.toString().contains(QLatin1String("url?q="))) { // 4) redirect url.
141  d->mDetails += QLatin1String("<li>") + i18n("This email contains a link (%1) which has a redirection", addWarningColor(url.toString()))
142  + QLatin1String("</li>");
143  foundScam = true;
144  } else if ((url.toString().count(QStringLiteral("http://")) > 1)
145  || (url.toString().count(QStringLiteral("https://")) > 1)) { // 5) more that 1 http in url.
146  if (!url.toString().contains(QLatin1String("kmail:showAuditLog"))) {
147  d->mDetails += QLatin1String("<li>")
148  + i18n("This email contains a link (%1) which contains multiple http://. This is often the case in scam emails.",
149  addWarningColor(url.toString()))
150  + QLatin1String("</li>");
151  foundScam = true;
152  }
153  }
154  }
155  // Check shortUrl
156  if (!foundScam) {
157  if (ScamCheckShortUrl::isShortUrl(url)) {
158  d->mDetails += QLatin1String("<li>")
159  + i18n("This email contains a shorturl (%1). It can redirect to another server.", addWarningColor(url.toString())) + QLatin1String("</li>");
160  foundScam = true;
161  }
162  }
163  if (!foundScam) {
164  QUrl displayUrl = QUrl(mapVariant.value(QStringLiteral("text")).toString());
165  // Special case if https + port 443 it will return url without port
166  QString text = (displayUrl.port() == 443 && displayUrl.scheme() == QLatin1String("https"))
169  if (text.endsWith(QLatin1String("%22"))) {
170  text.chop(3);
171  }
172  const QUrl normalizedHrefUrl = QUrl(href.toLower());
173  QString normalizedHref = normalizedHrefUrl.toDisplayString(QUrl::StripTrailingSlash | QUrl::NormalizePathSegments);
174  if (text != normalizedHref) {
175  if (normalizedHref.contains(QStringLiteral("%5C"))) {
176  normalizedHref.replace(QStringLiteral("%5C"), QStringLiteral("/"));
177  }
178  }
179  if (normalizedHref.endsWith(QLatin1String("%22"))) {
180  normalizedHref.chop(3);
181  }
182  // qDebug() << "text " << text << " href "<<href << " normalizedHref " << normalizedHref;
183 
184  if (!text.isEmpty()) {
185  if (text.startsWith(QLatin1String("http:/")) || text.startsWith(QLatin1String("https:/"))) {
186  if (text.toLower() != normalizedHref.toLower()) {
187  if (text != normalizedHref) {
188  if (normalizedHref != (text + QLatin1Char('/'))) {
189  if (normalizedHref.toHtmlEscaped() != text) {
190  if (QString::fromUtf8(QUrl(text).toEncoded()) != normalizedHref) {
191  if (QUrl(normalizedHref).toDisplayString() != text) {
192  const bool qurlqueryequal = displayUrl.query() == normalizedHrefUrl.query();
193  const QString displayUrlWithoutQuery =
195  const QString hrefUrlWithoutQuery =
197  // qDebug() << "displayUrlWithoutQuery " << displayUrlWithoutQuery << " hrefUrlWithoutQuery " <<
198  // hrefUrlWithoutQuery << " text " << text;
199  if (qurlqueryequal && (displayUrlWithoutQuery + QLatin1Char('/') != hrefUrlWithoutQuery)) {
200  d->mDetails += QLatin1String("<li>")
201  + i18n("This email contains a link which reads as '%1' in the text, but actually points to '%2'. This is "
202  "often "
203  "the case in scam emails to mislead the recipient",
204  addWarningColor(text),
205  addWarningColor(normalizedHref))
206  + QLatin1String("</li>");
207  foundScam = true;
208  }
209  }
210  }
211  }
212  }
213  }
214  }
215  }
216  }
217  }
218  }
219  if (mapResult.value(QStringLiteral("forms")).toInt() > 0) {
220  d->mDetails += QLatin1String("<li></b>") + i18n("Message contains form element. This is often the case in scam emails.") + QLatin1String("</b></li>");
221  foundScam = true;
222  }
223  if (foundScam) {
224  d->mDetails.prepend(QLatin1String("<b>") + i18n("Details:") + QLatin1String("</b><ul>"));
225  d->mDetails += QLatin1String("</ul>");
226  Q_EMIT messageMayBeAScam();
227  }
228  Q_EMIT resultScanDetection(foundScam);
229 }
230 
231 void ScamDetectionWebEngine::showDetails()
232 {
233  if (!d->mDetailsDialog) {
234  d->mDetailsDialog = new MessageViewer::ScamDetectionDetailsDialog;
235  }
236  d->mDetailsDialog->setDetails(d->mDetails);
237  d->mDetailsDialog->show();
238 }
239 
240 #include "moc_scamdetectionwebengine.cpp"
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const const
QString fromUtf8(const char *str, int size)
QString toHtmlEscaped() const const
Q_EMITQ_EMIT
QString scheme() const const
const T value(const Key &key, const T &defaultValue) const const
void chop(int n)
QString query(QUrl::ComponentFormattingOptions options) const const
NETWORKMANAGERQT_EXPORT QString hostname()
QString i18n(const char *text, const TYPE &arg...)
constexpr bool isEmpty() const
StripTrailingSlash
bool isEmpty() const const
QString toDisplayString(QUrl::FormattingOptions options) const const
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
QString & replace(int position, int n, QChar after)
bool startsWith(const QString &s, Qt::CaseSensitivity cs) const const
QString toLower() const const
int count() const const
int port(int defaultPort) const const
QList< QVariant > toList() const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
T value(int i) const const
char * toString(const EngineQuery &query)
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Sun Dec 3 2023 03:57:07 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.