KCodecs

kcharsets.cpp
1/*
2 This file is part of the KDE libraries
3
4 SPDX-FileCopyrightText: 1999 Lars Knoll <knoll@kde.org>
5 SPDX-FileCopyrightText: 2001, 2003, 2004, 2005, 2006 Nicolas GOUTTE <goutte@kde.org>
6 SPDX-FileCopyrightText: 2007 Nick Shaforostoff <shafff@ukr.net>
7
8 SPDX-License-Identifier: LGPL-2.0-or-later
9*/
10#include "kcharsets.h"
11#include "kcharsets_p.h"
12#include "kcodecs_debug.h"
13
14#include <QHash>
15
16#include <algorithm>
17#include <assert.h>
18
19/*
20 * The encoding names (like "ISO 8859-1") in this list are user-visible,
21 * and should be mostly uppercase.
22 * Generate with generate_string_table.pl (located in kde-dev-scripts),
23 * input data:
24ISO 8859-1
25i18n:Western European
26ISO 8859-15
27i18n:Western European
28ISO 8859-14
29i18n:Western European
30cp 1252
31i18n:Western European
32IBM850
33i18n:Western European
34ISO 8859-2
35i18n:Central European
36ISO 8859-3
37i18n:Central European
38ISO 8859-4
39i18n:Baltic
40ISO 8859-13
41i18n:Baltic
42ISO 8859-16
43i18n:South-Eastern Europe
44cp 1250
45i18n:Central European
46cp 1254
47i18n:Turkish
48cp 1257
49i18n:Baltic
50KOI8-R
51i18n:Cyrillic
52ISO 8859-5
53i18n:Cyrillic
54cp 1251
55i18n:Cyrillic
56KOI8-U
57i18n:Cyrillic
58IBM866
59i18n:Cyrillic
60Big5
61i18n:Chinese Traditional
62Big5-HKSCS
63i18n:Chinese Traditional
64GB18030
65i18n:Chinese Simplified
66GBK
67i18n:Chinese Simplified
68GB2312
69i18n:Chinese Simplified
70EUC-KR
71i18n:Korean
72windows-949
73i18n:Korean
74sjis
75i18n:Japanese
76ISO-2022-JP
77i18n:Japanese
78EUC-JP
79i18n:Japanese
80ISO 8859-7
81i18n:Greek
82cp 1253
83i18n:Greek
84ISO 8859-6
85i18n:Arabic
86cp 1256
87i18n:Arabic
88ISO 8859-8
89i18n:Hebrew
90ISO 8859-8-I
91i18n:Hebrew
92cp 1255
93i18n:Hebrew
94ISO 8859-9
95i18n:Turkish
96TIS620
97i18n:Thai
98ISO 8859-11
99i18n:Thai
100UTF-8
101i18n:Unicode
102UTF-16
103i18n:Unicode
104utf7
105i18n:Unicode
106ucs2
107i18n:Unicode
108ISO 10646-UCS-2
109i18n:Unicode
110windows-1258
111i18n:Other
112IBM874
113i18n:Other
114TSCII
115i18n:Other
116 */
117/*
118 * Notes about the table:
119 *
120 * - The following entries were disabled and removed from the table:
121ibm852
122i18n:Central European
123pt 154
124i18n:Cyrillic // ### TODO "PT 154" seems to have been removed from Qt
125 *
126 * - ISO 8559-11 is the deprecated name of TIS-620
127 * - utf7 is not in Qt
128 * - UTF-16 is duplicated as "ucs2" and "ISO 10646-UCS-2"
129 * - windows-1258: TODO
130 * - IBM874: TODO
131 * - TSCII: TODO
132 */
133
134/*
135 * This redefines the QT_TRANSLATE_NOOP3 macro provided by Qt to indicate that
136 * statically initialised text should be translated so that it expands to just
137 * the string that should be translated, making it possible to use it in the
138 * single string construct below.
139 */
140#undef QT_TRANSLATE_NOOP3
141#define QT_TRANSLATE_NOOP3(a, b, c) b
142
143/*
144 * THE FOLLOWING CODE IS GENERATED. PLEASE DO NOT EDIT BY HAND.
145 * The script used was generate_string_table.pl which can be found in kde-dev-scripts.
146 * It was then edited to use QT_TRANSLATE_NOOP3 instead of I18N_NOOP.
147 */
148
149static const char language_for_encoding_string[] =
150 "ISO 8859-1\0"
151 QT_TRANSLATE_NOOP3("KCharsets", "Western European", "@item Text character set")"\0"
152 "ISO 8859-15\0"
153 "ISO 8859-14\0"
154 "cp 1252\0"
155 "IBM850\0"
156 "ISO 8859-2\0"
157 QT_TRANSLATE_NOOP3("KCharsets", "Central European", "@item Text character set")"\0"
158 "ISO 8859-3\0"
159 "ISO 8859-4\0"
160 QT_TRANSLATE_NOOP3("KCharsets", "Baltic", "@item Text character set")"\0"
161 "ISO 8859-13\0"
162 "ISO 8859-16\0"
163 QT_TRANSLATE_NOOP3("KCharsets", "South-Eastern Europe", "@item Text character set")"\0"
164 "cp 1250\0"
165 "cp 1254\0"
166 QT_TRANSLATE_NOOP3("KCharsets", "Turkish", "@item Text character set")"\0"
167 "cp 1257\0"
168 "KOI8-R\0"
169 QT_TRANSLATE_NOOP3("KCharsets", "Cyrillic", "@item Text character set")"\0"
170 "ISO 8859-5\0"
171 "cp 1251\0"
172 "KOI8-U\0"
173 "IBM866\0"
174 "Big5\0"
175 QT_TRANSLATE_NOOP3("KCharsets", "Chinese Traditional", "@item Text character set")"\0"
176 "Big5-HKSCS\0"
177 "GB18030\0"
178 QT_TRANSLATE_NOOP3("KCharsets", "Chinese Simplified", "@item Text character set")"\0"
179 "GBK\0"
180 "GB2312\0"
181 "EUC-KR\0"
182 QT_TRANSLATE_NOOP3("KCharsets", "Korean", "@item Text character set")"\0"
183 "windows-949\0"
184 "sjis\0"
185 QT_TRANSLATE_NOOP3("KCharsets", "Japanese", "@item Text character set")"\0"
186 "ISO-2022-JP\0"
187 "EUC-JP\0"
188 "ISO 8859-7\0"
189 QT_TRANSLATE_NOOP3("KCharsets", "Greek", "@item Text character set")"\0"
190 "cp 1253\0"
191 "ISO 8859-6\0"
192 QT_TRANSLATE_NOOP3("KCharsets", "Arabic", "@item Text character set")"\0"
193 "cp 1256\0"
194 "ISO 8859-8\0"
195 QT_TRANSLATE_NOOP3("KCharsets", "Hebrew", "@item Text character set")"\0"
196 "ISO 8859-8-I\0"
197 "cp 1255\0"
198 "ISO 8859-9\0"
199 "TIS620\0"
200 QT_TRANSLATE_NOOP3("KCharsets", "Thai", "@item Text character set")"\0"
201 "ISO 8859-11\0"
202 "UTF-8\0"
203 QT_TRANSLATE_NOOP3("KCharsets", "Unicode", "@item Text character set")"\0"
204 "UTF-16\0"
205 "utf7\0"
206 "ucs2\0"
207 "ISO 10646-UCS-2\0"
208 "windows-1258\0"
209 QT_TRANSLATE_NOOP3("KCharsets", "Other", "@item Text character set")"\0"
210 "IBM874\0"
211 "TSCII\0"
212 "\0";
213
214static const int language_for_encoding_indices[] = {
215 0, 11, 28, 11, 40, 11, 52, 11, 60, 11, 67, 78, 95, 78, 106, 117, 124, 117, 136, 148, 169, 78, 177, 185, 193, 117, 201, 208, 217, 208, 228,
216 208, 236, 208, 243, 208, 250, 255, 275, 255, 286, 294, 313, 294, 317, 294, 324, 331, 338, 331, 350, 355, 364, 355, 376, 355, 383, 394, 400, 394, 408, 419,
217 426, 419, 434, 445, 452, 445, 465, 445, 473, 185, 484, 491, 496, 491, 508, 514, 522, 514, 529, 514, 534, 514, 539, 514, 555, 568, 574, 568, 581, 568, -1};
218
219/*
220 * GENERATED CODE ENDS HERE
221 */
222
223struct KCharsetsSingletonPrivate {
224 KCharsets instance;
225};
226
227Q_GLOBAL_STATIC(KCharsetsSingletonPrivate, globalCharsets)
228
229// search an array of items index/data, find first matching index
230// and return data, or return 0
231static inline const char *kcharsets_array_search(const char *start, const int *indices, const char *entry)
232{
233 for (int i = 0; indices[i] != -1; i += 2) {
234 if (qstrcmp(start + indices[i], entry) == 0) {
235 return start + indices[i + 1];
236 }
237 }
238 return nullptr;
239}
240
241// --------------------------------------------------------------------------
242
244 : d(new KCharsetsPrivate)
245{
246}
247
248KCharsets::~KCharsets() = default;
249
250// sorted entities list for lookup
251constexpr inline auto MAX_CODE_SIZE = 8;
252
253struct Entity {
254 template<std::size_t N>
255 constexpr inline Entity(const char (&n)[N], uint32_t c)
256 : code(c)
257 {
258 for (std::size_t i = 0; i < N - 1; ++i) {
259 name[i] = n[i];
260 }
261 for (std::size_t i = N - 1; i < MAX_CODE_SIZE; ++i) {
262 name[i] = '\0';
263 }
264 }
265
266 char name[MAX_CODE_SIZE];
267 uint32_t code;
268};
269static constexpr inline const Entity entities[] = {
270 {"AElig", 0x00c6}, {"Aacute", 0x00c1}, {"Acirc", 0x00c2}, {"Agrave", 0x00c0}, {"Alpha", 0x0391}, {"AMP", 38}, {"Aring", 0x00c5},
271 {"Atilde", 0x00c3}, {"Auml", 0x00c4}, {"Beta", 0x0392}, {"Ccaron", 0x010c}, {"Ccedil", 0x00c7}, {"Chi", 0x03a7}, {"Dagger", 0x2021},
272 {"Dcaron", 0x010e}, {"Delta", 0x0394}, {"ETH", 0x00d0}, {"Eacute", 0x00c9}, {"Ecaron", 0x011a}, {"Ecirc", 0x00ca}, {"Egrave", 0x00c8},
273 {"Eague", 0x00c9}, {"Epsilon", 0x0395}, {"Eta", 0x0397}, {"Euml", 0x00cb}, {"Gamma", 0x0393}, {"GT", 62}, {"Iacute", 0x00cd},
274 {"Icirc", 0x00ce}, {"Igrave", 0x00cc}, {"Iota", 0x0399}, {"Iuml", 0x00cf}, {"Kappa", 0x039a}, {"Lambda", 0x039b}, {"LT", 60},
275 {"Mu", 0x039c}, {"Ncaron", 0x0147}, {"Ntilde", 0x00d1}, {"Nu", 0x039d}, {"OElig", 0x0152}, {"Oacute", 0x00d3}, {"Ocirc", 0x00d4},
276 {"Ograve", 0x00d2}, {"Omega", 0x03a9}, {"Omicron", 0x039f}, {"Oslash", 0x00d8}, {"Otilde", 0x00d5}, {"Ouml", 0x00d6}, {"Phi", 0x03a6},
277 {"Pi", 0x03a0}, {"Prime", 0x2033}, {"Psi", 0x03a8}, {"QUOT", 34}, {"Rcaron", 0x0158}, {"Rho", 0x03a1}, {"Scaron", 0x0160},
278 {"Sigma", 0x03a3}, {"THORN", 0x00de}, {"Tau", 0x03a4}, {"Tcaron", 0x0164}, {"Theta", 0x0398}, {"Uacute", 0x00da}, {"Ucirc", 0x00db},
279 {"Ugrave", 0x00d9}, {"Upsilon", 0x03a5}, {"Uring", 0x016e}, {"Uuml", 0x00dc}, {"Xi", 0x039e}, {"Yacute", 0x00dd}, {"Yuml", 0x0178},
280 {"Zcaron", 0x017d}, {"Zeta", 0x0396}, {"aacute", 0x00e1}, {"acirc", 0x00e2}, {"acute", 0x00b4}, {"aelig", 0x00e6}, {"agrave", 0x00e0},
281 {"alefsym", 0x2135}, {"alpha", 0x03b1}, {"amp", 38}, {"and", 0x2227}, {"ang", 0x2220}, {"apos", 0x0027}, {"aring", 0x00e5},
282 {"asymp", 0x2248}, {"atilde", 0x00e3}, {"auml", 0x00e4}, {"bdquo", 0x201e}, {"beta", 0x03b2}, {"brvbar", 0x00a6}, {"bull", 0x2022},
283 {"cap", 0x2229}, {"ccaron", 0x010d}, {"ccedil", 0x00e7}, {"cedil", 0x00b8}, {"cent", 0x00a2}, {"chi", 0x03c7}, {"circ", 0x02c6},
284 {"clubs", 0x2663}, {"cong", 0x2245}, {"copy", 0x00a9}, {"crarr", 0x21b5}, {"cup", 0x222a}, {"curren", 0x00a4}, {"dArr", 0x21d3},
285 {"dagger", 0x2020}, {"darr", 0x2193}, {"dcaron", 0x10f}, {"deg", 0x00b0}, {"delta", 0x03b4}, {"diams", 0x2666}, {"divide", 0x00f7},
286 {"dol", 0x0024}, {"dollar", 0x0024}, {"eacute", 0x00e9}, {"ecaron", 0x011b}, {"eague", 0x00e9}, {"ecirc", 0x00ea}, {"egrave", 0x00e8},
287 {"emdash", 0x2014}, {"empty", 0x2205}, {"emsp", 0x2003}, {"endash", 0x2013}, {"ensp", 0x2002}, {"epsilon", 0x03b5}, {"equiv", 0x2261},
288 {"eta", 0x03b7}, {"eth", 0x00f0}, {"euml", 0x00eb}, {"euro", 0x20ac}, {"exist", 0x2203}, {"fnof", 0x0192}, {"forall", 0x2200},
289 {"frac12", 0x00bd}, {"frac14", 0x00bc}, {"frac34", 0x00be}, {"frasl", 0x2044}, {"gamma", 0x03b3}, {"ge", 0x2265}, {"gt", 62},
290 {"hArr", 0x21d4}, {"harr", 0x2194}, {"hearts", 0x2665}, {"hellip", 0x2026}, {"iacute", 0x00ed}, {"icirc", 0x00ee}, {"iexcl", 0x00a1},
291 {"igrave", 0x00ec}, {"image", 0x2111}, {"infin", 0x221e}, {"int", 0x222b}, {"iota", 0x03b9}, {"iquest", 0x00bf}, {"isin", 0x2208},
292 {"iuml", 0x00ef}, {"kappa", 0x03ba}, {"lArr", 0x21d0}, {"lambda", 0x03bb}, {"lang", 0x2329}, {"laquo", 0x00ab}, {"larr", 0x2190},
293 {"lceil", 0x2308}, {"ldquo", 0x201c}, {"le", 0x2264}, {"lfloor", 0x230a}, {"lowast", 0x2217}, {"loz", 0x25ca}, {"lrm", 0x200e},
294 {"lsaquo", 0x2039}, {"lsquo", 0x2018}, {"lt", 60}, {"macr", 0x00af}, {"mdash", 0x2014}, {"micro", 0x00b5}, {"middot", 0x00b7},
295 {"minus", 0x2212}, {"mu", 0x03bc}, {"nabla", 0x2207}, {"nbsp", 0x00a0}, {"ncaron", 0x0148}, {"ndash", 0x2013}, {"ne", 0x2260},
296 {"ni", 0x220b}, {"not", 0x00ac}, {"notin", 0x2209}, {"nsub", 0x2284}, {"ntilde", 0x00f1}, {"nu", 0x03bd}, {"oacute", 0x00f3},
297 {"ocirc", 0x00f4}, {"oelig", 0x0153}, {"ograve", 0x00f2}, {"oline", 0x203e}, {"omega", 0x03c9}, {"omicron", 0x03bf}, {"oplus", 0x2295},
298 {"or", 0x2228}, {"ordf", 0x00aa}, {"ordm", 0x00ba}, {"oslash", 0x00f8}, {"otilde", 0x00f5}, {"otimes", 0x2297}, {"ouml", 0x00f6},
299 {"para", 0x00b6}, {"part", 0x2202}, {"percnt", 0x0025}, {"permil", 0x2030}, {"perp", 0x22a5}, {"phi", 0x03c6}, {"pi", 0x03c0},
300 {"piv", 0x03d6}, {"plusmn", 0x00b1}, {"pound", 0x00a3}, {"prime", 0x2032}, {"prod", 0x220f}, {"prop", 0x221d}, {"psi", 0x03c8},
301 {"quot", 34}, {"rArr", 0x21d2}, {"radic", 0x221a}, {"rang", 0x232a}, {"raquo", 0x00bb}, {"rarr", 0x2192}, {"rcaron", 0x0159},
302 {"rceil", 0x2309}, {"rdquo", 0x201d}, {"real", 0x211c}, {"reg", 0x00ae}, {"rfloor", 0x230b}, {"rho", 0x03c1}, {"rlm", 0x200f},
303 {"rsaquo", 0x203a}, {"rsquo", 0x2019}, {"sbquo", 0x201a}, {"scaron", 0x0161}, {"sdot", 0x22c5}, {"sect", 0x00a7}, {"shy", 0x00ad},
304 {"sigma", 0x03c3}, {"sigmaf", 0x03c2}, {"sim", 0x223c}, {"spades", 0x2660}, {"sub", 0x2282}, {"sube", 0x2286}, {"sum", 0x2211},
305 {"sup1", 0x00b9}, {"supl", 0x00b9}, {"sup2", 0x00b2}, {"sup3", 0x00b3}, {"sup", 0x2283}, {"supe", 0x2287}, {"szlig", 0x00df},
306 {"tau", 0x03c4}, {"tcaron", 0x0165}, {"there4", 0x2234}, {"theta", 0x03b8}, {"thetasym", 0x03d1}, {"thinsp", 0x2009}, {"thorn", 0x00fe},
307 {"tilde", 0x02dc}, {"times", 0x00d7}, {"trade", 0x2122}, {"uArr", 0x21d1}, {"uacute", 0x00fa}, {"uarr", 0x2191}, {"ucirc", 0x00fb},
308 {"ugrave", 0x00f9}, {"uml", 0x00a8}, {"upsih", 0x03d2}, {"upsilon", 0x03c5}, {"uring", 0x016f}, {"uuml", 0x00fc}, {"weierp", 0x2118},
309 {"xi", 0x03be}, {"yacute", 0x00fd}, {"yen", 0x00a5}, {"yuml", 0x00ff}, {"zcaron", 0x017e}, {"zeta", 0x03b6}, {"zwj", 0x200d},
310 {"zwnj", 0x200c}};
311
312[[nodiscard]] static bool operator<(const Entity &lhs, const QByteArray &rhs)
313{
314 return std::strncmp(lhs.name, rhs.constData(), MAX_CODE_SIZE) < 0;
315}
316
318{
319 QChar res = QChar::Null;
320
321 if (str.isEmpty()) {
322 return QChar::Null;
323 }
324
325 int pos = 0;
326 if (str[pos] == QLatin1Char('&')) {
327 pos++;
328 }
329
330 // Check for '&#000' or '&#x0000' sequence
331 if (str[pos] == QLatin1Char('#') && str.length() - pos > 1) {
332 bool ok;
333 pos++;
334 if (str[pos] == QLatin1Char('x') || str[pos] == QLatin1Char('X')) {
335 pos++;
336 // '&#x0000', hexadecimal character reference
337 const auto tmp = str.mid(pos);
338 res = QChar(tmp.toInt(&ok, 16));
339 } else {
340 // '&#0000', decimal character reference
341 const auto tmp = str.mid(pos);
342 res = QChar(tmp.toInt(&ok, 10));
343 }
344 if (ok) {
345 return res;
346 } else {
347 return QChar::Null;
348 }
349 }
350
351 const QByteArray raw(str.toLatin1());
352 const auto e = std::lower_bound(std::begin(entities), std::end(entities), raw);
353
354 if (e == std::end(entities) || raw.size() > MAX_CODE_SIZE || std::strncmp(e->name, raw.constData(), MAX_CODE_SIZE) != 0) {
355 return QChar::Null;
356 }
357
358 return QChar(e->code);
359}
360
362{
363 // entities are never longer than 8 chars... we start from
364 // that length and work backwards...
365 len = 8;
366 while (len > 0) {
367 const auto tmp = str.left(len);
368 QChar res = fromEntity(tmp);
369 if (res != QChar::Null) {
370 return res;
371 }
372 len--;
373 }
374 return QChar::Null;
375}
376
378{
379 return QString::asprintf("&#0x%x;", ch.unicode());
380}
381
383{
384 QString text = input;
385 const QChar *p = text.unicode();
386 const QChar *end = p + text.length();
387 const QChar *ampersand = nullptr;
388 bool scanForSemicolon = false;
389
390 for (; p < end; ++p) {
391 const QChar ch = *p;
392
393 if (ch == QLatin1Char('&')) {
394 ampersand = p;
395 scanForSemicolon = true;
396 continue;
397 }
398
399 if (ch != QLatin1Char(';') || scanForSemicolon == false) {
400 continue;
401 }
402
403 assert(ampersand);
404
405 scanForSemicolon = false;
406
407 const QChar *entityBegin = ampersand + 1;
408
409 const uint entityLength = p - entityBegin;
410 if (entityLength == 0) {
411 continue;
412 }
413
414 const QChar entityValue = KCharsets::fromEntity(QStringView(entityBegin, entityLength));
415 if (entityValue.isNull()) {
416 continue;
417 }
418
419 const uint ampersandPos = ampersand - text.unicode();
420
421 text[(int)ampersandPos] = entityValue;
422 text.remove(ampersandPos + 1, entityLength + 1);
423 p = text.unicode() + ampersandPos;
424 end = text.unicode() + text.length();
425 ampersand = nullptr;
426 }
427
428 return text;
429}
430
432{
433 QStringList available;
434 for (const int *p = language_for_encoding_indices; *p != -1; p += 2) {
435 available.append(QString::fromUtf8(language_for_encoding_string + *p));
436 }
437 available.sort();
438 return available;
439}
440
442{
443 const char *lang = kcharsets_array_search(language_for_encoding_string, language_for_encoding_indices, encoding.toUtf8().data());
444 if (lang) {
445 return tr("%1 ( %2 )", "@item %1 character set, %2 encoding").arg(tr(lang, "@item Text character set"), encoding);
446 } else {
447 return tr("Other encoding (%1)", "@item").arg(encoding);
448 }
449}
450
451QString KCharsets::encodingForName(const QString &descriptiveName) const
452{
453 const int left = descriptiveName.lastIndexOf(QLatin1Char('('));
454
455 if (left < 0) { // No parenthesis, so assume it is a normal encoding name
456 return descriptiveName.trimmed();
457 }
458
459 QString name(descriptiveName.mid(left + 1));
460
461 const int right = name.lastIndexOf(QLatin1Char(')'));
462
463 if (right < 0) {
464 return name;
465 }
466
467 return name.left(right).trimmed();
468}
469
471{
472 QStringList encodings;
473 for (const int *p = language_for_encoding_indices; *p != -1; p += 2) {
474 const QString name = QString::fromUtf8(language_for_encoding_string + p[0]);
475 const QString description = tr(language_for_encoding_string + p[1], "@item Text character set");
476 encodings.append(tr("%1 ( %2 )", "@item Text encoding: %1 character set, %2 encoding").arg(description, name));
477 }
478 encodings.sort();
479 return encodings;
480}
481
483{
484 if (!d->encodingsByScript.isEmpty()) {
485 return d->encodingsByScript;
486 }
487 int i;
488 for (const int *p = language_for_encoding_indices; *p != -1; p += 2) {
489 const QString name = QString::fromUtf8(language_for_encoding_string + p[0]);
490 const QString description = tr(language_for_encoding_string + p[1], "@item Text character set");
491
492 for (i = 0; i < d->encodingsByScript.size(); ++i) {
493 if (d->encodingsByScript.at(i).at(0) == description) {
494 d->encodingsByScript[i].append(name);
495 break;
496 }
497 }
498
499 if (i == d->encodingsByScript.size()) {
500 d->encodingsByScript.append(QStringList() << description << name);
501 }
502 }
503 return d->encodingsByScript;
504}
505
507{
508 return &globalCharsets()->instance;
509}
QStringList descriptiveEncodingNames() const
Lists the available encoding names together with a more descriptive language.
QList< QStringList > encodingsByScript() const
Lists the available encoding names grouped by script (or language that uses them).
~KCharsets()
Destructor.
QString encodingForName(const QString &descriptiveName) const
Returns the encoding for a string obtained with descriptiveEncodingNames().
QString descriptionForEncoding(QStringView encoding) const
Returns a long description for an encoding name.
KCharsets()
Protected constructor.
static QChar fromEntity(QStringView str)
Converts an entity to a character.
QStringList availableEncodingNames() const
Lists all available encodings as names.
static QString resolveEntities(const QString &text)
Scans the given string for entities (like &amp;) and resolves them using fromEntity.
static QString toEntity(const QChar &ch)
Converts a QChar to an entity.
static KCharsets * charsets()
The global charset manager.
Q_SCRIPTABLE QString start(QString train="")
const char * constData() const const
char * data()
qsizetype size() const const
bool isNull() const const
char16_t & unicode()
void append(QList< T > &&value)
QString asprintf(const char *cformat,...)
QString fromUtf8(QByteArrayView str)
qsizetype lastIndexOf(QChar ch, Qt::CaseSensitivity cs) const const
QString left(qsizetype n) const const
qsizetype length() const const
QString mid(qsizetype position, qsizetype n) const const
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString trimmed() const const
const QChar * unicode() const const
void sort(Qt::CaseSensitivity cs)
QStringView left(qsizetype length) const const
QStringView mid(qsizetype start, qsizetype length) const const
bool isEmpty() const const
qsizetype length() const const
QByteArray toLatin1() const const
QByteArray toUtf8() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Feb 21 2025 11:52:18 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.