• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • sources
  • kde-4.14
  • kdelibs
  • kdecore
  • localization
kcharsets.cpp
Go to the documentation of this file.
1 /* This file is part of the KDE libraries
2  Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3  Copyright (C) 2001, 2003, 2004, 2005, 2006 Nicolas GOUTTE <goutte@kde.org>
4  Copyright (C) 2007 Nick Shaforostoff <shafff@ukr.net>
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Library General Public
8  License as published by the Free Software Foundation; either
9  version 2 of the License, or (at your option) any later version.
10 
11  This library is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Library General Public License for more details.
15 
16  You should have received a copy of the GNU Library General Public License
17  along with this library; see the file COPYING.LIB. If not, write to
18  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  Boston, MA 02110-1301, USA.
20 */
21 #include "kcharsets.h"
22 
23 #include "kfilterdev.h"
24 #include "kentities.cc"
25 
26 #include "kconfig.h"
27 #include "kdebug.h"
28 #include "kglobal.h"
29 #include "klocale.h"
30 
31 #include <QtCore/QDir>
32 #include <QtCore/QRegExp>
33 #include <QtCore/QCharRef>
34 #include <QtCore/QMutableStringListIterator>
35 #include <QtCore/QTextCodec>
36 
37 #include <assert.h>
38 #include <QHash>
39 
40 /*
41  * ### FIXME KDE4: the name of the encodings should mostly be uppercase
42  * The names of this list are user-visible
43  * Generate with generate_string_table.pl, input data:
44 ISO 8859-1
45 i18n:Western European
46 ISO 8859-15
47 i18n:Western European
48 ISO 8859-14
49 i18n:Western European
50 cp 1252
51 i18n:Western European
52 IBM850
53 i18n:Western European
54 ISO 8859-2
55 i18n:Central European
56 ISO 8859-3
57 i18n:Central European
58 ISO 8859-4
59 i18n:Baltic
60 ISO 8859-13
61 i18n:Baltic
62 ISO 8859-16
63 i18n:South-Eastern Europe
64 cp 1250
65 i18n:Central European
66 cp 1254
67 i18n:Turkish
68 cp 1257
69 i18n:Baltic
70 KOI8-R
71 i18n:Cyrillic
72 ISO 8859-5
73 i18n:Cyrillic
74 cp 1251
75 i18n:Cyrillic
76 KOI8-U
77 i18n:Cyrillic
78 IBM866
79 i18n:Cyrillic
80 Big5
81 i18n:Chinese Traditional
82 Big5-HKSCS
83 i18n:Chinese Traditional
84 GB18030
85 i18n:Chinese Simplified
86 GBK
87 i18n:Chinese Simplified
88 GB2312
89 i18n:Chinese Simplified
90 EUC-KR
91 i18n:Korean
92 cp 949
93 i18n:Korean
94 sjis
95 i18n:Japanese
96 jis7
97 i18n:Japanese
98 EUC-JP
99 i18n:Japanese
100 ISO 8859-7
101 i18n:Greek
102 cp 1253
103 i18n:Greek
104 ISO 8859-6
105 i18n:Arabic
106 cp 1256
107 i18n:Arabic
108 ISO 8859-8
109 i18n:Hebrew
110 ISO 8859-8-I
111 i18n:Hebrew
112 cp 1255
113 i18n:Hebrew
114 ISO 8859-9
115 i18n:Turkish
116 TIS620
117 i18n:Thai
118 ISO 8859-11
119 i18n:Thai
120 UTF-8
121 i18n:Unicode
122 UTF-16
123 i18n:Unicode
124 utf7
125 i18n:Unicode
126 ucs2
127 i18n:Unicode
128 ISO 10646-UCS-2
129 i18n:Unicode
130 winsami2
131 i18n:Northern Saami
132 windows-1258
133 i18n:Other
134 IBM874
135 i18n:Other
136 TSCII
137 i18n:Other
138  */
139 /*
140  * Notes about the table:
141  *
142  * - The following entries were disabled and removed from the table:
143 ibm852
144 i18n:Central European
145 pt 154
146 i18n:Cyrillic // ### TODO "PT 154" seems to have been removed from Qt
147  *
148  * - ISO 8559-11 is the deprecated name of TIS-620
149  * - utf7 is not in Qt
150  * - UTF-16 is duplicated as "ucs2" and "ISO 10646-UCS-2"
151  * - windows-1258: TODO
152  * - IBM874: TODO
153  * - TSCII: TODO
154  */
155 static const char language_for_encoding_string[] =
156  "ISO 8859-1\0"
157  I18N_NOOP2("@item Text character set", "Western European")"\0"
158  "ISO 8859-15\0"
159  "ISO 8859-14\0"
160  "cp 1252\0"
161  "IBM850\0"
162  "ISO 8859-2\0"
163  I18N_NOOP2("@item Text character set", "Central European")"\0"
164  "ISO 8859-3\0"
165  "ISO 8859-4\0"
166  I18N_NOOP2("@item Text character set", "Baltic")"\0"
167  "ISO 8859-13\0"
168  "ISO 8859-16\0"
169  I18N_NOOP2("@item Text character set", "South-Eastern Europe")"\0"
170  "cp 1250\0"
171  "cp 1254\0"
172  I18N_NOOP2("@item Text character set", "Turkish")"\0"
173  "cp 1257\0"
174  "KOI8-R\0"
175  I18N_NOOP2("@item Text character set", "Cyrillic")"\0"
176  "ISO 8859-5\0"
177  "cp 1251\0"
178  "KOI8-U\0"
179  "IBM866\0"
180  "Big5\0"
181  I18N_NOOP2("@item Text character set", "Chinese Traditional")"\0"
182  "Big5-HKSCS\0"
183  "GB18030\0"
184  I18N_NOOP2("@item Text character set", "Chinese Simplified")"\0"
185  "GBK\0"
186  "GB2312\0"
187  "EUC-KR\0"
188  I18N_NOOP2("@item Text character set", "Korean")"\0"
189  "cp 949\0"
190  "sjis\0"
191  I18N_NOOP2("@item Text character set", "Japanese")"\0"
192  "jis7\0"
193  "EUC-JP\0"
194  "ISO 8859-7\0"
195  I18N_NOOP2("@item Text character set", "Greek")"\0"
196  "cp 1253\0"
197  "ISO 8859-6\0"
198  I18N_NOOP2("@item Text character set", "Arabic")"\0"
199  "cp 1256\0"
200  "ISO 8859-8\0"
201  I18N_NOOP2("@item Text character set", "Hebrew")"\0"
202  "ISO 8859-8-I\0"
203  "cp 1255\0"
204  "ISO 8859-9\0"
205  "TIS620\0"
206  I18N_NOOP2("@item Text character set", "Thai")"\0"
207  "ISO 8859-11\0"
208  "UTF-8\0"
209  I18N_NOOP2("@item Text character set", "Unicode")"\0"
210  "UTF-16\0"
211  "utf7\0"
212  "ucs2\0"
213  "ISO 10646-UCS-2\0"
214  "winsami2\0"
215  I18N_NOOP2("@item Text character set", "Northern Saami")"\0"
216  "windows-1258\0"
217  I18N_NOOP2("@item Text character set", "Other")"\0"
218  "IBM874\0"
219  "TSCII\0"
220  "\0";
221 
222 static const int language_for_encoding_indices[] = {
223  0, 11, 28, 11, 40, 11, 52, 11,
224  60, 11, 67, 78, 95, 78, 106, 117,
225  124, 117, 136, 148, 169, 78, 177, 185,
226  193, 117, 201, 208, 217, 208, 228, 208,
227  236, 208, 243, 208, 250, 255, 275, 255,
228  286, 294, 313, 294, 317, 294, 324, 331,
229  338, 331, 345, 350, 359, 350, 364, 350,
230  371, 382, 388, 382, 396, 407, 414, 407,
231  422, 433, 440, 433, 453, 433, 461, 185,
232  472, 479, 484, 479, 496, 502, 510, 502,
233  517, 502, 522, 502, 527, 502, 543, 552,
234  567, 580, 586, 580, 593, 580, -1
235 };
236 
237 /*
238  * defines some different names for codecs that are built into Qt.
239  * The names in this list must be lower-case.
240  * input data for generate_string_table.pl:
241 iso-ir-111
242 koi8-r
243 koi unified
244 koi8-r
245 us-ascii
246 iso 8859-1
247 usascii
248 iso 8859-1
249 ascii
250 iso 8859-1
251 unicode-1-1-utf-7
252 utf-7
253 ucs2
254 iso-10646-ucs-2
255 iso10646-1
256 iso-10646-ucs-2
257 gb18030.2000-1
258 gb18030
259 gb18030.2000-0
260 gb18030
261 gbk-0
262 gbk
263 gb2312
264 gbk
265 gb2312.1980-0
266 gbk
267 big5-0
268 big5
269 euc-kr
270 euckr
271 cp949
272 cp 949
273 euc-jp
274 eucjp
275 jisx0201.1976-0
276 eucjp
277 jisx0208.1983-0
278 eucjp
279 jisx0208.1990-0
280 eucjp
281 jisx0208.1997-0
282 eucjp
283 jisx0212.1990-0
284 eucjp
285 jisx0213.2000-1
286 eucjp
287 jisx0213.2000-2
288 eucjp
289 shift_jis
290 sjis
291 shift-jis
292 sjis
293 sjis
294 sjis
295 iso-2022-jp
296 jis7
297 windows850
298 ibm850
299 windows866
300 ibm866
301 windows-850
302 ibm850
303 windows-866
304 ibm866
305 cp-10000
306 apple roman
307 thai-tis620
308 iso 8859-11
309 windows-874
310 ibm874
311 windows874
312 ibm874
313 cp-874
314 ibm874
315 ksc5601.1987-0
316 euckr
317 ks_c_5601-1987
318 euckr
319 mac-roman
320 apple roman
321 macintosh
322 apple roman
323 mac
324 apple roman
325 csiso2022jp
326 iso-2022-jp
327 */
328 /*
329  * Notes about the table:
330  * - using ISO-8859-1 for ASCII is only an approximation (as you cannot test if a character is part of the set)
331  * - utf7 is not in Qt
332  * - UTF-16 is duplicated as "ucs2" and "ISO 10646-UCS-2"
333  * - sjis: appears on the table for x-sjis
334  * - jis7: ISO-2022-JP is now the default name in Qt4
335  * - cp-874: is it really needed?
336  * - mac-roman: appears on the table for x-mac-roman
337  * - csiso2022jp: See bug #77243
338  */
339 static const char builtin_string[] =
340  "iso-ir-111\0"
341  "koi8-r\0"
342  "koi unified\0"
343  "us-ascii\0"
344  "iso 8859-1\0"
345  "usascii\0"
346  "ascii\0"
347  "unicode-1-1-utf-7\0"
348  "utf-7\0"
349  "ucs2\0"
350  "iso-10646-ucs-2\0"
351  "iso10646-1\0"
352  "gb18030.2000-1\0"
353  "gb18030\0"
354  "gb18030.2000-0\0"
355  "gbk-0\0"
356  "gbk\0"
357  "gb2312\0"
358  "gb2312.1980-0\0"
359  "big5-0\0"
360  "big5\0"
361  "euc-kr\0"
362  "euckr\0"
363  "cp949\0"
364  "cp 949\0"
365  "euc-jp\0"
366  "eucjp\0"
367  "jisx0201.1976-0\0"
368  "jisx0208.1983-0\0"
369  "jisx0208.1990-0\0"
370  "jisx0208.1997-0\0"
371  "jisx0212.1990-0\0"
372  "jisx0213.2000-1\0"
373  "jisx0213.2000-2\0"
374  "shift_jis\0"
375  "sjis\0"
376  "shift-jis\0"
377  "iso-2022-jp\0"
378  "jis7\0"
379  "windows850\0"
380  "ibm850\0"
381  "windows866\0"
382  "ibm866\0"
383  "windows-850\0"
384  "windows-866\0"
385  "cp-10000\0"
386  "apple roman\0"
387  "thai-tis620\0"
388  "iso 8859-11\0"
389  "windows-874\0"
390  "ibm874\0"
391  "windows874\0"
392  "cp-874\0"
393  "ksc5601.1987-0\0"
394  "ks_c_5601-1987\0"
395  "mac-roman\0"
396  "macintosh\0"
397  "mac\0"
398  "csiso2022jp\0"
399  "\0";
400 
401 static const int builtin_indices[] = {
402  0, 11, 18, 11, 30, 39, 50, 39,
403  58, 39, 64, 82, 88, 93, 109, 93,
404  120, 135, 143, 135, 158, 164, 168, 164,
405  175, 164, 189, 196, 201, 208, 214, 220,
406  227, 234, 240, 234, 256, 234, 272, 234,
407  288, 234, 304, 234, 320, 234, 336, 234,
408  352, 362, 367, 362, 362, 362, 377, 389,
409  394, 405, 412, 423, 430, 405, 442, 423,
410  454, 463, 475, 487, 499, 511, 518, 511,
411  529, 511, 536, 208, 551, 208, 566, 463,
412  576, 463, 586, 463, 590, 377, -1
413 };
414 
415 /*
416  * some last resort hints in case the charmap file couldn't be found.
417  * This gives at least a partial conversion and helps making things readable.
418  *
419  * the name used as input here is already converted to the more canonical
420  * name as defined in the aliases array.
421  *
422  * Input data:
423 cp1250
424 iso-8859-2
425 koi8-r
426 iso-8859-5
427 koi8-u
428 koi8-r
429 pt 154
430 windows-1251
431 paratype-154
432 windows-1251
433 pt-154
434 windows-1251
435  */
436 /* Notes:
437  * - KDE had always "CP 1251" as best fallback to PT 154. As Qt does not offer this encoding anymore, the codepage 1251 is used as fallback.
438  */
439 static const char conversion_hints_string[] =
440  "cp1250\0"
441  "iso-8859-2\0"
442  "koi8-r\0"
443  "iso-8859-5\0"
444  "koi8-u\0"
445  "pt 154\0"
446  "windows-1251\0"
447  "paratype-154\0"
448  "pt-154\0"
449  "\0";
450 
451 static const int conversion_hints_indices[] = {
452  0, 7, 18, 25, 36, 18, 43, 50,
453  63, 50, 76, 50, -1
454 };
455 
456 // search an array of items index/data, find first matching index
457 // and return data, or return 0
458 static inline
459 const char *kcharsets_array_search(const char *start, const int *indices, const char *entry)
460 {
461  for (int i = 0; indices[i] != -1; i += 2)
462  if (qstrcmp(start + indices[i], entry) == 0)
463  return start + indices[i + 1];
464  return 0;
465 }
466 
467 
468 class KCharsetsPrivate
469 {
470 public:
471  KCharsetsPrivate(KCharsets* _kc)
472  {
473  kc = _kc;
474  codecForNameDict.reserve( 43 );
475  }
476  // Hash for the encoding names (sensitive case)
477  QHash<QByteArray,QTextCodec*> codecForNameDict;
478  KCharsets* kc;
479 
480  //Cache list so QStrings can be implicitly shared
481  QList<QStringList> encodingsByScript;
482 };
483 
484 // --------------------------------------------------------------------------
485 
486 KCharsets::KCharsets()
487  :d(new KCharsetsPrivate(this))
488 {
489 }
490 
491 KCharsets::~KCharsets()
492 {
493  delete d;
494 }
495 
496 QChar KCharsets::fromEntity(const QString &str)
497 {
498  QChar res = QChar::Null;
499 
500  if ( str.isEmpty() )
501  return QChar::Null;
502 
503  int pos = 0;
504  if(str[pos] == QLatin1Char('&')) pos++;
505 
506  // Check for '&#000' or '&#x0000' sequence
507  if (str[pos] == QLatin1Char('#') && str.length()-pos > 1) {
508  bool ok;
509  pos++;
510  if (str[pos] == QLatin1Char('x') || str[pos] == QLatin1Char('X')) {
511  pos++;
512  // '&#x0000', hexadecimal character reference
513  const QString tmp( str.mid( pos ) );
514  res = tmp.toInt(&ok, 16);
515  } else {
516  // '&#0000', decimal character reference
517  const QString tmp( str.mid( pos ) );
518  res = tmp.toInt(&ok, 10);
519  }
520  if ( ok )
521  return res;
522  else
523  return QChar::Null;
524  }
525 
526  const QByteArray raw ( str.toLatin1() );
527  const entity *e = Perfect_Hash::kde_findEntity( raw, raw.length() );
528 
529  if(!e)
530  {
531  //kDebug( 0 ) << "unknown entity " << str <<", len = " << str.length();
532  return QChar::Null;
533  }
534  //kDebug() << "got entity " << str << " = " << e->code;
535 
536  return QChar(e->code);
537 }
538 
539 QChar KCharsets::fromEntity(const QString &str, int &len)
540 {
541  // entities are never longer than 8 chars... we start from
542  // that length and work backwards...
543  len = 8;
544  while(len > 0)
545  {
546  QString tmp = str.left(len);
547  QChar res = fromEntity(tmp);
548  if( res != QChar::Null ) return res;
549  len--;
550  }
551  return QChar::Null;
552 }
553 
554 
555 QString KCharsets::toEntity(const QChar &ch)
556 {
557  QString ent;
558  ent.sprintf("&#0x%x;", ch.unicode());
559  return ent;
560 }
561 
562 QString KCharsets::resolveEntities( const QString &input )
563 {
564  QString text = input;
565  const QChar *p = text.unicode();
566  const QChar *end = p + text.length();
567  const QChar *ampersand = 0;
568  bool scanForSemicolon = false;
569 
570  for ( ; p < end; ++p ) {
571  const QChar ch = *p;
572 
573  if ( ch == QLatin1Char('&') ) {
574  ampersand = p;
575  scanForSemicolon = true;
576  continue;
577  }
578 
579  if ( ch != QLatin1Char(';') || scanForSemicolon == false )
580  continue;
581 
582  assert( ampersand );
583 
584  scanForSemicolon = false;
585 
586  const QChar *entityBegin = ampersand + 1;
587 
588  const uint entityLength = p - entityBegin;
589  if ( entityLength == 0 )
590  continue;
591 
592  const QChar entityValue = KCharsets::fromEntity( QString( entityBegin, entityLength ) );
593  if ( entityValue.isNull() )
594  continue;
595 
596  const uint ampersandPos = ampersand - text.unicode();
597 
598  text[ (int)ampersandPos ] = entityValue;
599  text.remove( ampersandPos + 1, entityLength + 1 );
600  p = text.unicode() + ampersandPos;
601  end = text.unicode() + text.length();
602  ampersand = 0;
603  }
604 
605  return text;
606 }
607 
608 QStringList KCharsets::availableEncodingNames() const
609 {
610  QStringList available;
611  for ( const int *p = language_for_encoding_indices; *p != -1; p += 2)
612  available.append( QString::fromUtf8( language_for_encoding_string + *p ) );
613  available.sort();
614  return available;
615 }
616 
617 #ifndef KDE_NO_DEPRECATED
618 QString KCharsets::languageForEncoding( const QString &encoding ) const
619 {
620  const char* lang = kcharsets_array_search( (const char*)language_for_encoding_string,
621  language_for_encoding_indices,
622  encoding.toUtf8().constData() );
623  if ( lang )
624  return i18nc( "@item Text character set", lang );
625  else
626  return i18nc( "@item Text character set", "Other" );
627 }
628 #endif
629 
630 QString KCharsets::descriptionForEncoding( const QString& encoding ) const
631 {
632  const char* lang = kcharsets_array_search( language_for_encoding_string,
633  language_for_encoding_indices,
634  encoding.toUtf8() );
635  if ( lang )
636  return i18nc( "@item %1 character set, %2 encoding", "%1 ( %2 )",
637  i18nc( "@item Text character set", lang ), encoding );
638  else
639  return i18nc( "@item", "Other encoding (%1)", encoding );
640 }
641 
642 QString KCharsets::encodingForName( const QString &descriptiveName ) const
643 {
644  const int left = descriptiveName.lastIndexOf( QLatin1Char('(') );
645 
646  if (left<0) // No parenthesis, so assume it is a normal encoding name
647  return descriptiveName.trimmed();
648 
649  QString name(descriptiveName.mid(left+1));
650 
651  const int right = name.lastIndexOf( QLatin1Char(')') );
652 
653  if (right<0)
654  return name;
655 
656  return name.left(right).trimmed();
657 }
658 
659 QStringList KCharsets::descriptiveEncodingNames() const
660 {
661  QStringList encodings;
662  for ( const int *p = language_for_encoding_indices; *p != -1; p += 2) {
663  const QString name = QString::fromUtf8( language_for_encoding_string + p[0] );
664  const QString description = i18nc( "@item Text character set", language_for_encoding_string + p[1] );
665  encodings.append( i18nc( "@item Text encoding: %1 character set, %2 encoding", "%1 ( %2 )",
666  description, name ) );
667  }
668  encodings.sort();
669  return encodings;
670 }
671 
672 QList<QStringList> KCharsets::encodingsByScript() const
673 {
674  if (!d->encodingsByScript.isEmpty())
675  return d->encodingsByScript;
676  int i;
677  for ( const int *p = language_for_encoding_indices; *p != -1; p += 2) {
678  const QString name = QString::fromUtf8( language_for_encoding_string + p[0] );
679  const QString description = i18nc("@item Text character set", language_for_encoding_string + p[1] );
680 
681  for (i=0; i<d->encodingsByScript.size(); ++i) {
682  if (d->encodingsByScript.at(i).at(0) == description) {
683  d->encodingsByScript[i].append(name);
684  break;
685  }
686  }
687 
688  if (i==d->encodingsByScript.size()) {
689  d->encodingsByScript.append(QStringList() << description << name);
690  }
691 
692  }
693  return d->encodingsByScript;
694 }
695 
696 QTextCodec* KCharsets::codecForName(const QString &n) const
697 {
698  if ( n == QLatin1String("gb2312") || n == QLatin1String("gbk") )
699  return QTextCodec::codecForName( "gb18030" );
700  const QByteArray name( n.toLatin1() );
701  QTextCodec* codec = codecForNameOrNull( name );
702  if ( codec )
703  return codec;
704  else
705  return QTextCodec::codecForName( "iso-8859-1" );
706 }
707 
708 QTextCodec* KCharsets::codecForName(const QString &n, bool &ok) const
709 {
710  if (n == QLatin1String("gb2312") || n == QLatin1String("gbk")) {
711  ok = true;
712  return QTextCodec::codecForName( "gb18030" );
713  }
714  const QByteArray name( n.toLatin1() );
715  QTextCodec* codec = codecForNameOrNull( name );
716  if ( codec )
717  {
718  ok = true;
719  return codec;
720  }
721  else
722  {
723  ok = false;
724  return QTextCodec::codecForName( "iso-8859-1" );
725  }
726 }
727 
728 QTextCodec *KCharsets::codecForNameOrNull( const QByteArray& n ) const
729 {
730  QTextCodec* codec = 0;
731 
732  if (n.isEmpty()) {
733  // No name, assume locale (KDE's, not Qt's)
734  const QByteArray locale = "->locale<-";
735  if ( d->codecForNameDict.contains( locale ) )
736  return d->codecForNameDict.value( locale );
737  codec = KGlobal::locale()->codecForEncoding();
738  d->codecForNameDict.insert("->locale<-", codec);
739  return codec;
740  }
741  // For a non-empty name, lookup the "dictionnary", in a case-sensitive way.
742  else if ( d->codecForNameDict.contains( n ) ) {
743  return d->codecForNameDict.value( n );
744  }
745 
746  // If the name is not in the hash table, call directly QTextCoded::codecForName.
747  // We assume that QTextCodec is smarter and more maintained than this code.
748  codec = QTextCodec::codecForName( n );
749  if ( codec ) {
750  d->codecForNameDict.insert( n, codec );
751  return codec;
752  }
753 
754  // We have had no luck with QTextCodec::codecForName, so we must now process the name, so that QTextCodec::codecForName could work with it.
755 
756  QByteArray name = n.toLower();
757  bool changed = false;
758  if (name.endsWith("_charset")) { // krazy:exclude=strings
759  name.chop( 8 );
760  changed = true;
761  }
762  if ( name.startsWith( "x-" ) ) { // krazy:exclude=strings
763  name.remove( 0, 2 ); // remove x- at start
764  changed = true;
765  }
766 
767  if (name.isEmpty()) {
768  // We have no name anymore, therefore the name is invalid.
769  return 0;
770  }
771 
772  // We only need to check changed names.
773  if ( changed ) {
774  codec = QTextCodec::codecForName(name);
775  if (codec) {
776  d->codecForNameDict.insert( n, codec );
777  return codec;
778  }
779  changed = false;
780  }
781 
782  // these codecs are built into Qt, but the name given for the codec is different,
783  // so QTextCodec did not recognize it.
784  QByteArray cname = kcharsets_array_search( builtin_string, builtin_indices, name);
785 
786  if(!cname.isEmpty())
787  codec = QTextCodec::codecForName(cname);
788 
789  if (codec)
790  {
791  d->codecForNameDict.insert( n, codec );
792  return codec;
793  }
794 
795  // this also failed, the last resort is now to take some compatibility charmap
796  // ### TODO: while emergency conversions might be useful at read, it is not sure if they should be done if the application plans to write.
797  cname = kcharsets_array_search( conversion_hints_string, conversion_hints_indices, name );
798 
799  if (!cname.isEmpty()) {
800  codec = QTextCodec::codecForName(cname);
801  if (codec) {
802  d->codecForNameDict.insert( n, codec );
803  return codec;
804  }
805  }
806 
807  // we could not assign a codec, therefore return NULL
808  return 0;
809 }
KCharsets::languageForEncoding
QString languageForEncoding(const QString &encoding) const
Returns the language the encoding is used for.
Definition: kcharsets.cpp:618
KCharsets::codecForName
QTextCodec * codecForName(const QString &name) const
Provided for compatibility.
Definition: kcharsets.cpp:696
builtin_string
static const char builtin_string[]
Definition: kcharsets.cpp:339
kcharsets.h
KCharsets::descriptiveEncodingNames
QStringList descriptiveEncodingNames() const
Lists the available encoding names together with a more descriptive language.
Definition: kcharsets.cpp:659
kdebug.h
KCharsets
Charset font and encoder/decoder handling.
Definition: kcharsets.h:46
kfilterdev.h
QByteArray::toLower
QByteArray toLower() const
QByteArray
I18N_NOOP2
#define I18N_NOOP2(comment, x)
If the string is too ambiguous to be translated well to a non-english language, use this instead of I...
Definition: klocalizedstring.h:72
QChar
kconfig.h
QByteArray::chop
void chop(int n)
KCharsets::KCharsets
KCharsets()
Protected constructor.
Definition: kcharsets.cpp:486
QByteArray::isEmpty
bool isEmpty() const
QByteArray::startsWith
bool startsWith(const QByteArray &ba) const
KCharsets::~KCharsets
virtual ~KCharsets()
Destructor.
Definition: kcharsets.cpp:491
language_for_encoding_indices
static const int language_for_encoding_indices[]
Definition: kcharsets.cpp:222
QString::remove
QString & remove(int position, int n)
klocale.h
i18nc
QString i18nc(const char *ctxt, const char *text)
Returns a localized version of a string and a context.
Definition: klocalizedstring.h:797
QString::lastIndexOf
int lastIndexOf(QChar ch, int from, Qt::CaseSensitivity cs) const
KCharsets::encodingsByScript
QList< QStringList > encodingsByScript() const
Lists the available encoding names grouped by script (or language that uses them).
Definition: kcharsets.cpp:672
builtin_indices
static const int builtin_indices[]
Definition: kcharsets.cpp:401
kglobal.h
QList::append
void append(const T &value)
QString::fromUtf8
QString fromUtf8(const char *str, int size)
KCharsets::availableEncodingNames
QStringList availableEncodingNames() const
Lists all available encodings as names.
Definition: kcharsets.cpp:608
KCharsets::descriptionForEncoding
QString descriptionForEncoding(const QString &encoding) const
Returns a long description for an encoding name.
Definition: kcharsets.cpp:630
KLocale::codecForEncoding
QTextCodec * codecForEncoding() const
Returns the user's preferred encoding.
Definition: klocale.cpp:650
QHash< QByteArray, QTextCodec * >
QString::isEmpty
bool isEmpty() const
QString::trimmed
QString trimmed() const
QByteArray::constData
const char * constData() const
KCharsets::toEntity
static QString toEntity(const QChar &ch)
Converts a QChar to an entity.
Definition: kcharsets.cpp:555
KCharsets::encodingForName
QString encodingForName(const QString &descriptiveName) const
Returns the encoding for a string obtained with descriptiveEncodingNames().
Definition: kcharsets.cpp:642
QString
QList< QStringList >
QChar::unicode
ushort unicode() const
QTextCodec
conversion_hints_indices
static const int conversion_hints_indices[]
Definition: kcharsets.cpp:451
QStringList
QChar::isNull
bool isNull() const
QLatin1Char
KGlobal::locale
KLocale * locale()
Returns the global locale object.
Definition: kglobal.cpp:170
QString::unicode
const QChar * unicode() const
QString::toLatin1
QByteArray toLatin1() const
KCharsets::resolveEntities
static QString resolveEntities(const QString &text)
Scans the given string for entities (like &) and resolves them using fromEntity.
Definition: kcharsets.cpp:562
QString::mid
QString mid(int position, int n) const
KCharsets::fromEntity
static QChar fromEntity(const QString &str)
Converts an entity to a character.
Definition: kcharsets.cpp:496
QLatin1String
kcharsets_array_search
static const char * kcharsets_array_search(const char *start, const int *indices, const char *entry)
Definition: kcharsets.cpp:459
QString::sprintf
QString & sprintf(const char *cformat,...)
QTextCodec::codecForName
QTextCodec * codecForName(const QByteArray &name)
QString::length
int length() const
language_for_encoding_string
static const char language_for_encoding_string[]
Definition: kcharsets.cpp:155
QString::left
QString left(int n) const
QStringList::sort
void sort()
conversion_hints_string
static const char conversion_hints_string[]
Definition: kcharsets.cpp:439
QByteArray::remove
QByteArray & remove(int pos, int len)
QByteArray::endsWith
bool endsWith(const QByteArray &ba) const
kentities.cc
QString::toUtf8
QByteArray toUtf8() const
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 22 2020 13:22:10 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal