KCoreAddons

kemoticonsparser.cpp
1/*
2 SPDX-FileCopyrightText: 2002-2008 The Kopete developers <kopete-devel@kde.org>
3 SPDX-FileCopyrightText: 2008 Carlo Segato <brandon.ml@gmail.com>
4 SPDX-FileCopyrightText: 2002-2003 Stefan Gehn <metz@gehn.net>
5 SPDX-FileCopyrightText: 2005 Engin AYDOGAN <engin@bzzzt.biz>
6
7 SPDX-License-Identifier: LGPL-2.1-or-later
8*/
9
10#include "kemoticonsparser_p.h"
11
12#include <QDebug>
13#include <QString>
14
15#include <cstring>
16
17// ### keep sorted by first column and HTML entity-encoded!
18struct Emoticon {
19 const char *match;
20 const char *replacement;
21};
22// clang-format off
23static constexpr const Emoticon emoticons_map[] = {
24 {"&gt;-(", "😠"},
25 {"&gt;:(", "😠"},
26 {"&gt;:)", "😈"},
27 {"&gt;:-(", "😠"},
28 {"&gt;w&lt;", "😟"},
29 {"&lt;-.-&gt;", "😴"},
30 {"&lt;3", "â™Ĩī¸"},
31 {"&lt;]:o){", "🤡"},
32 {"&lt;|:^0|", "🤡"},
33 {"()-()", "🤓"},
34 {"(-_o)zzZ", "😴"},
35 {"(:|", "đŸĨą"},
36 {"(@_@)", "😕"},
37 {"(c:&gt;*", "🤡"},
38 {"({)", "🤗"},
39 {"(})", "🤗"},
40 {"*&lt;:^)", "🤡"},
41 {"*&lt;:o)", "🤡"},
42 {"*:o)", "🤡"},
43 {"*:oB", "🤡"},
44 {"*:oP", "🤡"},
45 {"+o(", "đŸ¤ĸ"},
46 {",':(", "😕"},
47 {"-_-", "😴"},
48 {"-_-+", "😠"},
49 {"-o-o-", "🤓"},
50 {"/00\\", "😟"},
51 {"0:)", "😇"},
52 {"0:-)", "😇"},
53 {"0;)", "😇"},
54 {"0=)", "😇"},
55 {"3:)", "😈"},
56 {"8)", "😎"},
57 {"8-)", "😎"},
58 {"8:::(", "😭"},
59 {":\"-(", "đŸ˜ĸ"},
60 {":'(", "đŸ˜ĸ"},
61 {":'-(", "đŸ˜ĸ"},
62 {":'D", "😆"},
63 {":(", "🙁"},
64 {":((", "đŸ˜ĸ"},
65 {":)", "🙂"},
66 {":))", "😆"},
67 {":*", "😗"},
68 {":*(", "đŸ˜ĸ"},
69 {":*)", "😗"},
70 {":-$", "đŸ˜¯"},
71 {":-&amp;", "đŸ¤ĸ"},
72 {":-&gt;", "â˜ēī¸"},
73 {":-&gt;&gt;", "â˜ēī¸"},
74 {":-(", "🙁"},
75 {":-)", "🙂"},
76 {":-))", "😀"},
77 {":-)*", "😗"},
78 {":-*", "😗"},
79 {":-/", "😕"},
80 {":-@", "😠"},
81 {":-D", "😀"},
82 {":-O", "😮"},
83 {":-P", "😛"},
84 {":-Q", "😕"},
85 {":-S", "😕"},
86 {":-X", "đŸ¤Ģ"},
87 {":-[", "đŸ˜¯"},
88 {":-o", "😮"},
89 {":-p", "😛"},
90 {":-s", "😕"},
91 {":-t", "😛"},
92 {":-x", "đŸ¤Ģ"},
93 {":-|", "😐"},
94 {":-||", "😠"},
95 {":/", "đŸĢ¤"},
96 {":@", "😠"},
97 {":C", "☚ī¸"},
98 {":D", "😀"},
99 {":O", "😮"},
100 {":P", "😛"},
101 {":S", "😕"},
102 {":X", "đŸ¤Ģ"},
103 {":\\", "đŸĢ¤"},
104 {":_(", "đŸ˜ĸ"},
105 {":c", "☚ī¸"},
106 {":o", "😮"},
107 {":o)", "🤡"},
108 {":p", "😛"},
109 {":s", "😕"},
110 {":x", "đŸ¤Ģ"},
111 {":|))", "😀"},
112 {";(", "đŸ˜ĸ"},
113 {";)", "😉"},
114 {";-(!)", "😗"},
115 {";-(", "đŸ˜ĸ"},
116 {";-)", "😉"},
117 {";_;", "đŸ˜ĸ"},
118 {"= #", "😗"},
119 {"='(", "đŸ˜ĸ"},
120 {"=(", "🙁"},
121 {"=[", "🙁"},
122 {"=^D", "😆"},
123 {"B-)", "😎"},
124 {"D:", "🙁"},
125 {"D=", "🙁"},
126 {"O-)", "😇"},
127 {"O.o", "🤔"},
128 {"O.o?", "🤔"},
129 {"O:)", "😇"},
130 {"O:-)", "😇"},
131 {"O;", "😇"},
132 {"T.T", "🙁"},
133 {"T_T", "😭"},
134 {"X-(", "😠"},
135 {"Y_Y", "🙁"},
136 {"Z_Z", "😴"},
137 {"\\o-o/", "🤓"},
138 {"\\~/", "🤓"},
139 {"]:-&gt;", "😈"},
140 {"^j^", "😇"},
141 {"i_i", "😭"},
142 {"t.t", "🙁"},
143 {"y_y", "🙁"},
144 {"|-O", "đŸĨą"},
145 {"}:-)", "😈"},
146};
147// clang-format on
148
149static const Emoticon *findEmoticon(QStringView s)
150{
151 auto it = std::lower_bound(std::begin(emoticons_map), std::end(emoticons_map), s, [](const auto &emoticon, auto s) {
152 return QLatin1String(emoticon.match) < s;
153 });
154 if (it != std::end(emoticons_map) && s.startsWith(QLatin1String((*it).match))) {
155 return it;
156 }
157 // if we don't have an exact match but a prefix, that will be in the item before the one returned by lower_bound
158 if (it != std::begin(emoticons_map)) {
159 it = std::prev(it);
160 if (s.startsWith(QLatin1String((*it).match))) {
161 return it;
162 }
163 }
164 return nullptr;
165}
166
167QString KEmoticonsParser::parseEmoticons(const QString &message)
168{
169 QString result;
170
171 /* previous char, in the firs iteration assume that it is space since we want
172 * to let emoticons at the beginning, the very first previous QChar must be a space. */
173 QChar p = QLatin1Char(' ');
174
175 int pos = 0;
176 int previousPos = 0;
177
178 bool inHTMLTag = false;
179 bool inHTMLLink = false;
180 bool inHTMLEntity = false;
181
182 for (; pos < message.length(); ++pos) {
183 const QChar c = message[pos];
184
185 if (!inHTMLTag) { // Are we already in an HTML tag ?
186 if (c == QLatin1Char('<')) { // If not check if are going into one
187 inHTMLTag = true; // If we are, change the state to inHTML
188 p = c;
189 continue;
190 }
191 } else { // We are already in a HTML tag
192 if (c == QLatin1Char('>')) { // Check if it ends
193 inHTMLTag = false; // If so, change the state
194
195 if (p == QLatin1Char('a')) {
196 inHTMLLink = false;
197 }
198 } else if (c == QLatin1Char('a') && p == QLatin1Char('<')) { // check if we just entered an anchor tag
199 inHTMLLink = true; // don't put smileys in urls
200 }
201 p = c;
202 continue;
203 }
204
205 if (!inHTMLEntity) { // are we
206 if (c == QLatin1Char('&')) {
207 inHTMLEntity = true;
208 }
209 }
210
211 if (inHTMLLink) { // i can't think of any situation where a link address might need emoticons
212 p = c;
213 continue;
214 }
215
216 if (!p.isSpace() && p != QLatin1Char('>')) { // '>' may mark the end of an html tag
217 p = c;
218 continue;
219 } /* strict requires space before the emoticon */
220
221 const auto emoticon = findEmoticon(QStringView(message).mid(pos));
222 if (emoticon) {
223 bool found = true;
224 /* check if the character after this match is space or end of string*/
225 const int matchLen = std::strlen(emoticon->match);
226 if (message.length() > pos + matchLen) {
227 const QChar n = message[pos + matchLen];
228 //<br/> marks the end of a line
229 if (n != QLatin1Char('<') && !n.isSpace() && !n.isNull() && n != QLatin1Char('&')) {
230 found = false;
231 }
232 }
233
234 if (found) {
235 result += QStringView(message).mid(previousPos, pos - previousPos);
236 result += QString::fromUtf8(emoticon->replacement);
237
238 /* Skip the matched emoticon's matchText */
239 pos += matchLen - 1;
240 previousPos = pos + 1;
241 } else {
242 if (inHTMLEntity) {
243 // If we are in an HTML entity such as &gt;
244 const int htmlEnd = message.indexOf(QLatin1Char(';'), pos);
245 // Search for where it ends
246 if (htmlEnd == -1) {
247 // Apparently this HTML entity isn't ended, something is wrong, try skip the '&'
248 // and continue
249 // qCDebug(KEMOTICONS_CORE) << "Broken HTML entity, trying to recover.";
250 inHTMLEntity = false;
251 pos++;
252 } else {
253 pos = htmlEnd;
254 inHTMLEntity = false;
255 }
256 }
257 }
258 } /* else no emoticons begin with this character, so don't do anything */
259 p = c;
260 }
261
262 if (result.isEmpty()) {
263 return message;
264 }
265 if (previousPos < message.length()) {
266 result += QStringView(message).mid(previousPos);
267 }
268 return result;
269}
bool isNull() const const
bool isSpace(char32_t ucs4)
QString fromUtf8(QByteArrayView str)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
qsizetype length() const const
QStringView mid(qsizetype start, qsizetype length) const const
bool startsWith(QChar ch) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Jun 7 2024 12:01:00 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.