• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • sources
  • kde-4.12
  • kdelibs
  • kdecore
  • localization
kcharsets.cpp
Go to the documentation of this file.
1 /* This file is part of the KDE libraries
2  Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3  Copyright (C) 2001, 2003, 2004, 2005, 2006 Nicolas GOUTTE <goutte@kde.org>
4  Copyright (C) 2007 Nick Shaforostoff <shafff@ukr.net>
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Library General Public
8  License as published by the Free Software Foundation; either
9  version 2 of the License, or (at your option) any later version.
10 
11  This library is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Library General Public License for more details.
15 
16  You should have received a copy of the GNU Library General Public License
17  along with this library; see the file COPYING.LIB. If not, write to
18  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  Boston, MA 02110-1301, USA.
20 */
21 #include "kcharsets.h"
22 
23 #include "kfilterdev.h"
24 #include "kentities.c"
25 
26 #include "kconfig.h"
27 #include "kdebug.h"
28 #include "kglobal.h"
29 #include "klocale.h"
30 
31 #include <QtCore/QDir>
32 #include <QtCore/QRegExp>
33 #include <QtCore/QCharRef>
34 #include <QtCore/QMutableStringListIterator>
35 #include <QtCore/QTextCodec>
36 
37 #include <assert.h>
38 #include <QHash>
39 
40 /*
41  * ### FIXME KDE4: the name of the encodings should mostly be uppercase
42  * The names of this list are user-visible
43  * Generate with generate_string_table.pl, input data:
44 ISO 8859-1
45 i18n:Western European
46 ISO 8859-15
47 i18n:Western European
48 ISO 8859-14
49 i18n:Western European
50 cp 1252
51 i18n:Western European
52 IBM850
53 i18n:Western European
54 ISO 8859-2
55 i18n:Central European
56 ISO 8859-3
57 i18n:Central European
58 ISO 8859-4
59 i18n:Baltic
60 ISO 8859-13
61 i18n:Baltic
62 ISO 8859-16
63 i18n:South-Eastern Europe
64 cp 1250
65 i18n:Central European
66 cp 1254
67 i18n:Turkish
68 cp 1257
69 i18n:Baltic
70 KOI8-R
71 i18n:Cyrillic
72 ISO 8859-5
73 i18n:Cyrillic
74 cp 1251
75 i18n:Cyrillic
76 KOI8-U
77 i18n:Cyrillic
78 IBM866
79 i18n:Cyrillic
80 Big5
81 i18n:Chinese Traditional
82 Big5-HKSCS
83 i18n:Chinese Traditional
84 GB18030
85 i18n:Chinese Simplified
86 GBK
87 i18n:Chinese Simplified
88 GB2312
89 i18n:Chinese Simplified
90 EUC-KR
91 i18n:Korean
92 sjis
93 i18n:Japanese
94 jis7
95 i18n:Japanese
96 EUC-JP
97 i18n:Japanese
98 ISO 8859-7
99 i18n:Greek
100 cp 1253
101 i18n:Greek
102 ISO 8859-6
103 i18n:Arabic
104 cp 1256
105 i18n:Arabic
106 ISO 8859-8
107 i18n:Hebrew
108 ISO 8859-8-I
109 i18n:Hebrew
110 cp 1255
111 i18n:Hebrew
112 ISO 8859-9
113 i18n:Turkish
114 TIS620
115 i18n:Thai
116 ISO 8859-11
117 i18n:Thai
118 UTF-8
119 i18n:Unicode
120 UTF-16
121 i18n:Unicode
122 utf7
123 i18n:Unicode
124 ucs2
125 i18n:Unicode
126 ISO 10646-UCS-2
127 i18n:Unicode
128 winsami2
129 i18n:Northern Saami
130 windows-1258
131 i18n:Other
132 IBM874
133 i18n:Other
134 TSCII
135 i18n:Other
136  */
137 /*
138  * Notes about the table:
139  *
140  * - The following entries were disabled and removed from the table:
141 ibm852
142 i18n:Central European
143 pt 154
144 i18n:Cyrillic // ### TODO "PT 154" seems to have been removed from Qt
145  *
146  * - ISO 8559-11 is the deprecated name of TIS-620
147  * - utf7 is not in Qt
148  * - UTF-16 is duplicated as "ucs2" and "ISO 10646-UCS-2"
149  * - windows-1258: TODO
150  * - IBM874: TODO
151  * - TSCII: TODO
152  */
153 static const char language_for_encoding_string[] =
154  "ISO 8859-1\0"
155  I18N_NOOP2("@item Text character set", "Western European")"\0"
156  "ISO 8859-15\0"
157  "ISO 8859-14\0"
158  "cp 1252\0"
159  "IBM850\0"
160  "ISO 8859-2\0"
161  I18N_NOOP2("@item Text character set", "Central European")"\0"
162  "ISO 8859-3\0"
163  "ISO 8859-4\0"
164  I18N_NOOP2("@item Text character set", "Baltic")"\0"
165  "ISO 8859-13\0"
166  "ISO 8859-16\0"
167  I18N_NOOP2("@item Text character set", "South-Eastern Europe")"\0"
168  "cp 1250\0"
169  "cp 1254\0"
170  I18N_NOOP2("@item Text character set", "Turkish")"\0"
171  "cp 1257\0"
172  "KOI8-R\0"
173  I18N_NOOP2("@item Text character set", "Cyrillic")"\0"
174  "ISO 8859-5\0"
175  "cp 1251\0"
176  "KOI8-U\0"
177  "IBM866\0"
178  "Big5\0"
179  I18N_NOOP2("@item Text character set", "Chinese Traditional")"\0"
180  "Big5-HKSCS\0"
181  "GB18030\0"
182  I18N_NOOP2("@item Text character set", "Chinese Simplified")"\0"
183  "GBK\0"
184  "GB2312\0"
185  "EUC-KR\0"
186  I18N_NOOP2("@item Text character set", "Korean")"\0"
187  "sjis\0"
188  I18N_NOOP2("@item Text character set", "Japanese")"\0"
189  "jis7\0"
190  "EUC-JP\0"
191  "ISO 8859-7\0"
192  I18N_NOOP2("@item Text character set", "Greek")"\0"
193  "cp 1253\0"
194  "ISO 8859-6\0"
195  I18N_NOOP2("@item Text character set", "Arabic")"\0"
196  "cp 1256\0"
197  "ISO 8859-8\0"
198  I18N_NOOP2("@item Text character set", "Hebrew")"\0"
199  "ISO 8859-8-I\0"
200  "cp 1255\0"
201  "ISO 8859-9\0"
202  "TIS620\0"
203  I18N_NOOP2("@item Text character set", "Thai")"\0"
204  "ISO 8859-11\0"
205  "UTF-8\0"
206  I18N_NOOP2("@item Text character set", "Unicode")"\0"
207  "UTF-16\0"
208  "utf7\0"
209  "ucs2\0"
210  "ISO 10646-UCS-2\0"
211  "winsami2\0"
212  I18N_NOOP2("@item Text character set", "Northern Saami")"\0"
213  "windows-1258\0"
214  I18N_NOOP2("@item Text character set", "Other")"\0"
215  "IBM874\0"
216  "TSCII\0"
217  "\0";
218 
219 static const int language_for_encoding_indices[] = {
220  0, 11, 28, 11, 40, 11, 52, 11,
221  60, 11, 67, 78, 95, 78, 106, 117,
222  124, 117, 136, 148, 169, 78, 177, 185,
223  193, 117, 201, 208, 217, 208, 228, 208,
224  236, 208, 243, 208, 250, 255, 275, 255,
225  286, 294, 313, 294, 317, 294, 324, 331,
226  338, 343, 352, 343, 357, 343, 364, 375,
227  381, 375, 389, 400, 407, 400, 415, 426,
228  433, 426, 446, 426, 454, 185, 465, 472,
229  477, 472, 489, 495, 503, 495, 510, 495,
230  515, 495, 520, 495, 536, 545, 560, 573,
231  579, 573, 586, 573, -1
232 };
233 
234 /*
235  * defines some different names for codecs that are built into Qt.
236  * The names in this list must be lower-case.
237  * input data for generate_string_table.pl:
238 iso-ir-111
239 koi8-r
240 koi unified
241 koi8-r
242 us-ascii
243 iso 8859-1
244 usascii
245 iso 8859-1
246 ascii
247 iso 8859-1
248 unicode-1-1-utf-7
249 utf-7
250 ucs2
251 iso-10646-ucs-2
252 iso10646-1
253 iso-10646-ucs-2
254 gb18030.2000-1
255 gb18030
256 gb18030.2000-0
257 gb18030
258 gbk-0
259 gbk
260 gb2312
261 gbk
262 gb2312.1980-0
263 gbk
264 big5-0
265 big5
266 euc-kr
267 euckr
268 euc-jp
269 eucjp
270 jisx0201.1976-0
271 eucjp
272 jisx0208.1983-0
273 eucjp
274 jisx0208.1990-0
275 eucjp
276 jisx0208.1997-0
277 eucjp
278 jisx0212.1990-0
279 eucjp
280 jisx0213.2000-1
281 eucjp
282 jisx0213.2000-2
283 eucjp
284 shift_jis
285 sjis
286 shift-jis
287 sjis
288 sjis
289 sjis
290 iso-2022-jp
291 jis7
292 windows850
293 ibm850
294 windows866
295 ibm866
296 windows-850
297 ibm850
298 windows-866
299 ibm866
300 cp-10000
301 apple roman
302 thai-tis620
303 iso 8859-11
304 windows-874
305 ibm874
306 windows874
307 ibm874
308 cp-874
309 ibm874
310 ksc5601.1987-0
311 euckr
312 ks_c_5601-1987
313 euckr
314 mac-roman
315 apple roman
316 macintosh
317 apple roman
318 mac
319 apple roman
320 csiso2022jp
321 iso-2022-jp
322 */
323 /*
324  * Notes about the table:
325  * - using ISO-8859-1 for ASCII is only an approximation (as you cannot test if a character is part of the set)
326  * - utf7 is not in Qt
327  * - UTF-16 is duplicated as "ucs2" and "ISO 10646-UCS-2"
328  * - sjis: appears on the table for x-sjis
329  * - jis7: ISO-2022-JP is now the default name in Qt4
330  * - cp-874: is it really needed?
331  * - mac-roman: appears on the table for x-mac-roman
332  * - csiso2022jp: See bug #77243
333  */
334 static const char builtin_string[] =
335  "iso-ir-111\0"
336  "koi8-r\0"
337  "koi unified\0"
338  "us-ascii\0"
339  "iso 8859-1\0"
340  "usascii\0"
341  "ascii\0"
342  "unicode-1-1-utf-7\0"
343  "utf-7\0"
344  "ucs2\0"
345  "iso-10646-ucs-2\0"
346  "iso10646-1\0"
347  "gb18030.2000-1\0"
348  "gb18030\0"
349  "gb18030.2000-0\0"
350  "gbk-0\0"
351  "gbk\0"
352  "gb2312\0"
353  "gb2312.1980-0\0"
354  "big5-0\0"
355  "big5\0"
356  "euc-kr\0"
357  "euckr\0"
358  "euc-jp\0"
359  "eucjp\0"
360  "jisx0201.1976-0\0"
361  "jisx0208.1983-0\0"
362  "jisx0208.1990-0\0"
363  "jisx0208.1997-0\0"
364  "jisx0212.1990-0\0"
365  "jisx0213.2000-1\0"
366  "jisx0213.2000-2\0"
367  "shift_jis\0"
368  "sjis\0"
369  "shift-jis\0"
370  "iso-2022-jp\0"
371  "jis7\0"
372  "windows850\0"
373  "ibm850\0"
374  "windows866\0"
375  "ibm866\0"
376  "windows-850\0"
377  "windows-866\0"
378  "cp-10000\0"
379  "apple roman\0"
380  "thai-tis620\0"
381  "iso 8859-11\0"
382  "windows-874\0"
383  "ibm874\0"
384  "windows874\0"
385  "cp-874\0"
386  "ksc5601.1987-0\0"
387  "ks_c_5601-1987\0"
388  "mac-roman\0"
389  "macintosh\0"
390  "mac\0"
391  "csiso2022jp\0"
392  "\0";
393 
394 static const int builtin_indices[] = {
395  0, 11, 18, 11, 30, 39, 50, 39,
396  58, 39, 64, 82, 88, 93, 109, 93,
397  120, 135, 143, 135, 158, 164, 168, 164,
398  175, 164, 189, 196, 201, 208, 214, 221,
399  227, 221, 243, 221, 259, 221, 275, 221,
400  291, 221, 307, 221, 323, 221, 339, 349,
401  354, 349, 349, 349, 364, 376, 381, 392,
402  399, 410, 417, 392, 429, 410, 441, 450,
403  462, 474, 486, 498, 505, 498, 516, 498,
404  523, 208, 538, 208, 553, 450, 563, 450,
405  573, 450, 577, 364, -1
406 };
407 
408 /*
409  * some last resort hints in case the charmap file couldn't be found.
410  * This gives at least a partial conversion and helps making things readable.
411  *
412  * the name used as input here is already converted to the more canonical
413  * name as defined in the aliases array.
414  *
415  * Input data:
416 cp1250
417 iso-8859-2
418 koi8-r
419 iso-8859-5
420 koi8-u
421 koi8-r
422 pt 154
423 windows-1251
424 paratype-154
425 windows-1251
426 pt-154
427 windows-1251
428  */
429 /* Notes:
430  * - KDE had always "CP 1251" as best fallback to PT 154. As Qt does not offer this encoding anymore, the codepage 1251 is used as fallback.
431  */
432 static const char conversion_hints_string[] =
433  "cp1250\0"
434  "iso-8859-2\0"
435  "koi8-r\0"
436  "iso-8859-5\0"
437  "koi8-u\0"
438  "pt 154\0"
439  "windows-1251\0"
440  "paratype-154\0"
441  "pt-154\0"
442  "\0";
443 
444 static const int conversion_hints_indices[] = {
445  0, 7, 18, 25, 36, 18, 43, 50,
446  63, 50, 76, 50, -1
447 };
448 
449 // search an array of items index/data, find first matching index
450 // and return data, or return 0
451 static inline
452 const char *kcharsets_array_search(const char *start, const int *indices, const char *entry)
453 {
454  for (int i = 0; indices[i] != -1; i += 2)
455  if (qstrcmp(start + indices[i], entry) == 0)
456  return start + indices[i + 1];
457  return 0;
458 }
459 
460 
461 class KCharsetsPrivate
462 {
463 public:
464  KCharsetsPrivate(KCharsets* _kc)
465  {
466  kc = _kc;
467  codecForNameDict.reserve( 43 );
468  }
469  // Hash for the encoding names (sensitive case)
470  QHash<QByteArray,QTextCodec*> codecForNameDict;
471  KCharsets* kc;
472 
473  //Cache list so QStrings can be implicitly shared
474  QList<QStringList> encodingsByScript;
475 };
476 
477 // --------------------------------------------------------------------------
478 
479 KCharsets::KCharsets()
480  :d(new KCharsetsPrivate(this))
481 {
482 }
483 
484 KCharsets::~KCharsets()
485 {
486  delete d;
487 }
488 
489 QChar KCharsets::fromEntity(const QString &str)
490 {
491  QChar res = QChar::Null;
492 
493  if ( str.isEmpty() )
494  return QChar::Null;
495 
496  int pos = 0;
497  if(str[pos] == QLatin1Char('&')) pos++;
498 
499  // Check for '&#000' or '&#x0000' sequence
500  if (str[pos] == QLatin1Char('#') && str.length()-pos > 1) {
501  bool ok;
502  pos++;
503  if (str[pos] == QLatin1Char('x') || str[pos] == QLatin1Char('X')) {
504  pos++;
505  // '&#x0000', hexadecimal character reference
506  const QString tmp( str.mid( pos ) );
507  res = tmp.toInt(&ok, 16);
508  } else {
509  // '&#0000', decimal character reference
510  const QString tmp( str.mid( pos ) );
511  res = tmp.toInt(&ok, 10);
512  }
513  if ( ok )
514  return res;
515  else
516  return QChar::Null;
517  }
518 
519  const QByteArray raw ( str.toLatin1() );
520  const entity *e = kde_findEntity( raw, raw.length() );
521 
522  if(!e)
523  {
524  //kDebug( 0 ) << "unknown entity " << str <<", len = " << str.length();
525  return QChar::Null;
526  }
527  //kDebug() << "got entity " << str << " = " << e->code;
528 
529  return QChar(e->code);
530 }
531 
532 QChar KCharsets::fromEntity(const QString &str, int &len)
533 {
534  // entities are never longer than 8 chars... we start from
535  // that length and work backwards...
536  len = 8;
537  while(len > 0)
538  {
539  QString tmp = str.left(len);
540  QChar res = fromEntity(tmp);
541  if( res != QChar::Null ) return res;
542  len--;
543  }
544  return QChar::Null;
545 }
546 
547 
548 QString KCharsets::toEntity(const QChar &ch)
549 {
550  QString ent;
551  ent.sprintf("&#0x%x;", ch.unicode());
552  return ent;
553 }
554 
555 QString KCharsets::resolveEntities( const QString &input )
556 {
557  QString text = input;
558  const QChar *p = text.unicode();
559  const QChar *end = p + text.length();
560  const QChar *ampersand = 0;
561  bool scanForSemicolon = false;
562 
563  for ( ; p < end; ++p ) {
564  const QChar ch = *p;
565 
566  if ( ch == QLatin1Char('&') ) {
567  ampersand = p;
568  scanForSemicolon = true;
569  continue;
570  }
571 
572  if ( ch != QLatin1Char(';') || scanForSemicolon == false )
573  continue;
574 
575  assert( ampersand );
576 
577  scanForSemicolon = false;
578 
579  const QChar *entityBegin = ampersand + 1;
580 
581  const uint entityLength = p - entityBegin;
582  if ( entityLength == 0 )
583  continue;
584 
585  const QChar entityValue = KCharsets::fromEntity( QString( entityBegin, entityLength ) );
586  if ( entityValue.isNull() )
587  continue;
588 
589  const uint ampersandPos = ampersand - text.unicode();
590 
591  text[ (int)ampersandPos ] = entityValue;
592  text.remove( ampersandPos + 1, entityLength + 1 );
593  p = text.unicode() + ampersandPos;
594  end = text.unicode() + text.length();
595  ampersand = 0;
596  }
597 
598  return text;
599 }
600 
601 QStringList KCharsets::availableEncodingNames() const
602 {
603  QStringList available;
604  for ( const int *p = language_for_encoding_indices; *p != -1; p += 2)
605  available.append( QString::fromUtf8( language_for_encoding_string + *p ) );
606  available.sort();
607  return available;
608 }
609 
610 #ifndef KDE_NO_DEPRECATED
611 QString KCharsets::languageForEncoding( const QString &encoding ) const
612 {
613  const char* lang = kcharsets_array_search( (const char*)language_for_encoding_string,
614  language_for_encoding_indices,
615  encoding.toUtf8().constData() );
616  if ( lang )
617  return i18nc( "@item Text character set", lang );
618  else
619  return i18nc( "@item Text character set", "Other" );
620 }
621 #endif
622 
623 QString KCharsets::descriptionForEncoding( const QString& encoding ) const
624 {
625  const char* lang = kcharsets_array_search( language_for_encoding_string,
626  language_for_encoding_indices,
627  encoding.toUtf8() );
628  if ( lang )
629  return i18nc( "@item %1 character set, %2 encoding", "%1 ( %2 )",
630  i18nc( "@item Text character set", lang ), encoding );
631  else
632  return i18nc( "@item", "Other encoding (%1)", encoding );
633 }
634 
635 QString KCharsets::encodingForName( const QString &descriptiveName ) const
636 {
637  const int left = descriptiveName.lastIndexOf( QLatin1Char('(') );
638 
639  if (left<0) // No parenthesis, so assume it is a normal encoding name
640  return descriptiveName.trimmed();
641 
642  QString name(descriptiveName.mid(left+1));
643 
644  const int right = name.lastIndexOf( QLatin1Char(')') );
645 
646  if (right<0)
647  return name;
648 
649  return name.left(right).trimmed();
650 }
651 
652 QStringList KCharsets::descriptiveEncodingNames() const
653 {
654  QStringList encodings;
655  for ( const int *p = language_for_encoding_indices; *p != -1; p += 2) {
656  const QString name = QString::fromUtf8( language_for_encoding_string + p[0] );
657  const QString description = i18nc( "@item Text character set", language_for_encoding_string + p[1] );
658  encodings.append( i18nc( "@item Text encoding: %1 character set, %2 encoding", "%1 ( %2 )",
659  description, name ) );
660  }
661  encodings.sort();
662  return encodings;
663 }
664 
665 QList<QStringList> KCharsets::encodingsByScript() const
666 {
667  if (!d->encodingsByScript.isEmpty())
668  return d->encodingsByScript;
669  int i;
670  for ( const int *p = language_for_encoding_indices; *p != -1; p += 2) {
671  const QString name = QString::fromUtf8( language_for_encoding_string + p[0] );
672  const QString description = i18nc("@item Text character set", language_for_encoding_string + p[1] );
673 
674  for (i=0; i<d->encodingsByScript.size(); ++i) {
675  if (d->encodingsByScript.at(i).at(0) == description) {
676  d->encodingsByScript[i].append(name);
677  break;
678  }
679  }
680 
681  if (i==d->encodingsByScript.size()) {
682  d->encodingsByScript.append(QStringList() << description << name);
683  }
684 
685  }
686  return d->encodingsByScript;
687 }
688 
689 QTextCodec* KCharsets::codecForName(const QString &n) const
690 {
691  if ( n == QLatin1String("gb2312") || n == QLatin1String("gbk") )
692  return QTextCodec::codecForName( "gb18030" );
693  const QByteArray name( n.toLatin1() );
694  QTextCodec* codec = codecForNameOrNull( name );
695  if ( codec )
696  return codec;
697  else
698  return QTextCodec::codecForName( "iso-8859-1" );
699 }
700 
701 QTextCodec* KCharsets::codecForName(const QString &n, bool &ok) const
702 {
703  if (n == QLatin1String("gb2312") || n == QLatin1String("gbk")) {
704  ok = true;
705  return QTextCodec::codecForName( "gb18030" );
706  }
707  const QByteArray name( n.toLatin1() );
708  QTextCodec* codec = codecForNameOrNull( name );
709  if ( codec )
710  {
711  ok = true;
712  return codec;
713  }
714  else
715  {
716  ok = false;
717  return QTextCodec::codecForName( "iso-8859-1" );
718  }
719 }
720 
721 QTextCodec *KCharsets::codecForNameOrNull( const QByteArray& n ) const
722 {
723  QTextCodec* codec = 0;
724 
725  if (n.isEmpty()) {
726  // No name, assume locale (KDE's, not Qt's)
727  const QByteArray locale = "->locale<-";
728  if ( d->codecForNameDict.contains( locale ) )
729  return d->codecForNameDict.value( locale );
730  codec = KGlobal::locale()->codecForEncoding();
731  d->codecForNameDict.insert("->locale<-", codec);
732  return codec;
733  }
734  // For a non-empty name, lookup the "dictionnary", in a case-sensitive way.
735  else if ( d->codecForNameDict.contains( n ) ) {
736  return d->codecForNameDict.value( n );
737  }
738 
739  // If the name is not in the hash table, call directly QTextCoded::codecForName.
740  // We assume that QTextCodec is smarter and more maintained than this code.
741  codec = QTextCodec::codecForName( n );
742  if ( codec ) {
743  d->codecForNameDict.insert( n, codec );
744  return codec;
745  }
746 
747  // We have had no luck with QTextCodec::codecForName, so we must now process the name, so that QTextCodec::codecForName could work with it.
748 
749  QByteArray name = n.toLower();
750  bool changed = false;
751  if (name.endsWith("_charset")) { // krazy:exclude=strings
752  name.chop( 8 );
753  changed = true;
754  }
755  if ( name.startsWith( "x-" ) ) { // krazy:exclude=strings
756  name.remove( 0, 2 ); // remove x- at start
757  changed = true;
758  }
759 
760  if (name.isEmpty()) {
761  // We have no name anymore, therefore the name is invalid.
762  return 0;
763  }
764 
765  // We only need to check changed names.
766  if ( changed ) {
767  codec = QTextCodec::codecForName(name);
768  if (codec) {
769  d->codecForNameDict.insert( n, codec );
770  return codec;
771  }
772  changed = false;
773  }
774 
775  // these codecs are built into Qt, but the name given for the codec is different,
776  // so QTextCodec did not recognize it.
777  QByteArray cname = kcharsets_array_search( builtin_string, builtin_indices, name);
778 
779  if(!cname.isEmpty())
780  codec = QTextCodec::codecForName(cname);
781 
782  if (codec)
783  {
784  d->codecForNameDict.insert( n, codec );
785  return codec;
786  }
787 
788  // this also failed, the last resort is now to take some compatibility charmap
789  // ### TODO: while emergency conversions might be useful at read, it is not sure if they should be done if the application plans to write.
790  cname = kcharsets_array_search( conversion_hints_string, conversion_hints_indices, name );
791 
792  if (!cname.isEmpty()) {
793  codec = QTextCodec::codecForName(cname);
794  if (codec) {
795  d->codecForNameDict.insert( n, codec );
796  return codec;
797  }
798  }
799 
800  // we could not assign a codec, therefore return NULL
801  return 0;
802 }
KCharsets::languageForEncoding
QString languageForEncoding(const QString &encoding) const
Returns the language the encoding is used for.
Definition: kcharsets.cpp:611
KCharsets::codecForName
QTextCodec * codecForName(const QString &name) const
Provided for compatibility.
Definition: kcharsets.cpp:689
builtin_string
static const char builtin_string[]
Definition: kcharsets.cpp:334
kcharsets.h
KCharsets::descriptiveEncodingNames
QStringList descriptiveEncodingNames() const
Lists the available encoding names together with a more descriptive language.
Definition: kcharsets.cpp:652
kdebug.h
KCharsets
Charset font and encoder/decoder handling.
Definition: kcharsets.h:46
kfilterdev.h
I18N_NOOP2
#define I18N_NOOP2(comment, x)
If the string is too ambiguous to be translated well to a non-english language, use this instead of I...
Definition: klocalizedstring.h:72
kconfig.h
KCharsets::KCharsets
KCharsets()
Protected constructor.
Definition: kcharsets.cpp:479
KCharsets::~KCharsets
virtual ~KCharsets()
Destructor.
Definition: kcharsets.cpp:484
language_for_encoding_indices
static const int language_for_encoding_indices[]
Definition: kcharsets.cpp:219
QString
QHash< QByteArray, QTextCodec * >
klocale.h
i18nc
QString i18nc(const char *ctxt, const char *text)
Returns a localized version of a string and a context.
Definition: klocalizedstring.h:797
KCharsets::encodingsByScript
QList< QStringList > encodingsByScript() const
Lists the available encoding names grouped by script (or language that uses them).
Definition: kcharsets.cpp:665
builtin_indices
static const int builtin_indices[]
Definition: kcharsets.cpp:394
kglobal.h
KCharsets::availableEncodingNames
QStringList availableEncodingNames() const
Lists all available encodings as names.
Definition: kcharsets.cpp:601
KCharsets::descriptionForEncoding
QString descriptionForEncoding(const QString &encoding) const
Returns a long description for an encoding name.
Definition: kcharsets.cpp:623
KLocale::codecForEncoding
QTextCodec * codecForEncoding() const
Returns the user's preferred encoding.
Definition: klocale.cpp:650
QStringList
KCharsets::toEntity
static QString toEntity(const QChar &ch)
Converts a QChar to an entity.
Definition: kcharsets.cpp:548
KCharsets::encodingForName
QString encodingForName(const QString &descriptiveName) const
Returns the encoding for a string obtained with descriptiveEncodingNames().
Definition: kcharsets.cpp:635
conversion_hints_indices
static const int conversion_hints_indices[]
Definition: kcharsets.cpp:444
KGlobal::locale
KLocale * locale()
Returns the global locale object.
Definition: kglobal.cpp:169
KCharsets::resolveEntities
static QString resolveEntities(const QString &text)
Scans the given string for entities (like &) and resolves them using fromEntity.
Definition: kcharsets.cpp:555
KCharsets::fromEntity
static QChar fromEntity(const QString &str)
Converts an entity to a character.
Definition: kcharsets.cpp:489
kcharsets_array_search
static const char * kcharsets_array_search(const char *start, const int *indices, const char *entry)
Definition: kcharsets.cpp:452
language_for_encoding_string
static const char language_for_encoding_string[]
Definition: kcharsets.cpp:153
conversion_hints_string
static const char conversion_hints_string[]
Definition: kcharsets.cpp:432
QList< QStringList >
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:47:08 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal